83: def scan_tokens encoder, options
84: state = options[:state] || @state
85: plain_string_content = @plain_string_content
86: in_tag = in_attribute = nil
87:
88: encoder.begin_group :string if state == :attribute_value_string
89:
90: until eos?
91:
92: if state != :in_special_tag && match = scan(/\s+/m)
93: encoder.text_token match, :space
94:
95: else
96:
97: case state
98:
99: when :initial
100: if match = scan(/<!--(?:.*?-->|.*)/m)
101: encoder.text_token match, :comment
102: elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
103: encoder.text_token match, :doctype
104: elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
105: encoder.text_token match, :preprocessor
106: elsif match = scan(/<\?(?:.*?\?>|.*)/m)
107: encoder.text_token match, :comment
108: elsif match = scan(/<\/[-\w.:]*>?/m)
109: in_tag = nil
110: encoder.text_token match, :tag
111: elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
112: encoder.text_token match, :tag
113: in_tag = self[1]
114: if self[2]
115: state = :in_special_tag if in_tag
116: else
117: state = :attribute
118: end
119: elsif match = scan(/[^<>&]+/)
120: encoder.text_token match, :plain
121: elsif match = scan(/#{ENTITY}/ox)
122: encoder.text_token match, :entity
123: elsif match = scan(/[<>&]/)
124: in_tag = nil
125: encoder.text_token match, :error
126: else
127: raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128: end
129:
130: when :attribute
131: if match = scan(/#{TAG_END}/o)
132: encoder.text_token match, :tag
133: in_attribute = nil
134: if in_tag
135: state = :in_special_tag
136: else
137: state = :initial
138: end
139: elsif match = scan(/#{ATTR_NAME}/o)
140: in_attribute = IN_ATTRIBUTE[match]
141: encoder.text_token match, :attribute_name
142: state = :attribute_equal
143: else
144: in_tag = nil
145: encoder.text_token getch, :error
146: end
147:
148: when :attribute_equal
149: if match = scan(/=/)
150: encoder.text_token match, :operator
151: state = :attribute_value
152: else
153: state = :attribute
154: next
155: end
156:
157: when :attribute_value
158: if match = scan(/#{ATTR_NAME}/o)
159: encoder.text_token match, :attribute_value
160: state = :attribute
161: elsif match = scan(/["']/)
162: if in_attribute == :script
163: encoder.begin_group :inline
164: encoder.text_token match, :inline_delimiter
165: if scan(/javascript:[ \t]*/)
166: encoder.text_token matched, :comment
167: end
168: code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
169: scan_java_script encoder, code
170: match = scan(/["']/)
171: encoder.text_token match, :inline_delimiter if match
172: encoder.end_group :inline
173: state = :attribute
174: in_attribute = nil
175: else
176: encoder.begin_group :string
177: state = :attribute_value_string
178: plain_string_content = PLAIN_STRING_CONTENT[match]
179: encoder.text_token match, :delimiter
180: end
181: elsif match = scan(/#{TAG_END}/o)
182: encoder.text_token match, :tag
183: state = :initial
184: else
185: encoder.text_token getch, :error
186: end
187:
188: when :attribute_value_string
189: if match = scan(plain_string_content)
190: encoder.text_token match, :content
191: elsif match = scan(/['"]/)
192: encoder.text_token match, :delimiter
193: encoder.end_group :string
194: state = :attribute
195: elsif match = scan(/#{ENTITY}/ox)
196: encoder.text_token match, :entity
197: elsif match = scan(/&/)
198: encoder.text_token match, :content
199: elsif match = scan(/[\n>]/)
200: encoder.end_group :string
201: state = :initial
202: encoder.text_token match, :error
203: end
204:
205: when :in_special_tag
206: case in_tag
207: when 'script'
208: encoder.text_token match, :space if match = scan(/[ \t]*\n/)
209: if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
210: code = self[2] || self[4]
211: closing = self[3]
212: encoder.text_token self[1], :comment
213: else
214: code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
215: closing = false
216: end
217: unless code.empty?
218: encoder.begin_group :inline
219: scan_java_script encoder, code
220: encoder.end_group :inline
221: end
222: encoder.text_token closing, :comment if closing
223: state = :initial
224: else
225: raise 'unknown special tag: %p' % [in_tag]
226: end
227:
228: else
229: raise_inspect 'Unknown state: %p' % [state], encoder
230:
231: end
232:
233: end
234:
235: end
236:
237: if options[:keep_state]
238: @state = state
239: @plain_string_content = plain_string_content
240: end
241:
242: encoder.end_group :string if state == :attribute_value_string
243:
244: encoder
245: end