Class CodeRay::Scanners::HTML
In: lib/coderay/scanners/html.rb
Parent: Scanner

HTML Scanner

Alias: xhtml

See also: Scanners::XML

Methods

Constants

EVENT_ATTRIBUTES = %w( onabort onafterprint onbeforeprint onbeforeunload onblur oncanplay oncanplaythrough onchange onclick oncontextmenu oncuechange ondblclick ondrag ondragdrop ondragend ondragenter ondragleave ondragover ondragstart ondrop ondurationchange onemptied onended onerror onfocus onformchange onforminput onhashchange oninput oninvalid onkeydown onkeypress onkeyup onload onloadeddata onloadedmetadata onloadstart onmessage onmousedown onmousemove onmouseout onmouseover onmouseup onmousewheel onmove onoffline ononline onpagehide onpageshow onpause onplay onplaying onpopstate onprogress onratechange onreadystatechange onredo onreset onresize onscroll onseeked onseeking onselect onshow onstalled onstorage onsubmit onsuspend ontimeupdate onundo onunload onvolumechange onwaiting )
IN_ATTRIBUTE = WordList::CaseIgnoring.new(nil). add(EVENT_ATTRIBUTES, :script)

Public Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 61
61:     def reset
62:       super
63:       @state = :initial
64:       @plain_string_content = nil
65:     end

Protected Instance methods

[Source]

    # File lib/coderay/scanners/html.rb, line 74
74:     def scan_java_script encoder, code
75:       if code && !code.empty?
76:         @java_script_scanner ||= Scanners::JavaScript.new '', :keep_tokens => true
77:         # encoder.begin_group :inline
78:         @java_script_scanner.tokenize code, :tokens => encoder
79:         # encoder.end_group :inline
80:       end
81:     end

[Source]

     # File lib/coderay/scanners/html.rb, line 83
 83:     def scan_tokens encoder, options
 84:       state = options[:state] || @state
 85:       plain_string_content = @plain_string_content
 86:       in_tag = in_attribute = nil
 87:       
 88:       encoder.begin_group :string if state == :attribute_value_string
 89:       
 90:       until eos?
 91:         
 92:         if state != :in_special_tag && match = scan(/\s+/m)
 93:           encoder.text_token match, :space
 94:           
 95:         else
 96:           
 97:           case state
 98:           
 99:           when :initial
100:             if match = scan(/<!--(?:.*?-->|.*)/m)
101:               encoder.text_token match, :comment
102:             elsif match = scan(/<!DOCTYPE(?:.*?>|.*)/m)
103:               encoder.text_token match, :doctype
104:             elsif match = scan(/<\?xml(?:.*?\?>|.*)/m)
105:               encoder.text_token match, :preprocessor
106:             elsif match = scan(/<\?(?:.*?\?>|.*)/m)
107:               encoder.text_token match, :comment
108:             elsif match = scan(/<\/[-\w.:]*>?/m)
109:               in_tag = nil
110:               encoder.text_token match, :tag
111:             elsif match = scan(/<(?:(script)|[-\w.:]+)(>)?/m)
112:               encoder.text_token match, :tag
113:               in_tag = self[1]
114:               if self[2]
115:                 state = :in_special_tag if in_tag
116:               else
117:                 state = :attribute
118:               end
119:             elsif match = scan(/[^<>&]+/)
120:               encoder.text_token match, :plain
121:             elsif match = scan(/#{ENTITY}/ox)
122:               encoder.text_token match, :entity
123:             elsif match = scan(/[<>&]/)
124:               in_tag = nil
125:               encoder.text_token match, :error
126:             else
127:               raise_inspect '[BUG] else-case reached with state %p' % [state], encoder
128:             end
129:             
130:           when :attribute
131:             if match = scan(/#{TAG_END}/o)
132:               encoder.text_token match, :tag
133:               in_attribute = nil
134:               if in_tag
135:                 state = :in_special_tag
136:               else
137:                 state = :initial
138:               end
139:             elsif match = scan(/#{ATTR_NAME}/o)
140:               in_attribute = IN_ATTRIBUTE[match]
141:               encoder.text_token match, :attribute_name
142:               state = :attribute_equal
143:             else
144:               in_tag = nil
145:               encoder.text_token getch, :error
146:             end
147:             
148:           when :attribute_equal
149:             if match = scan(/=/)  #/
150:               encoder.text_token match, :operator
151:               state = :attribute_value
152:             else
153:               state = :attribute
154:               next
155:             end
156:             
157:           when :attribute_value
158:             if match = scan(/#{ATTR_NAME}/o)
159:               encoder.text_token match, :attribute_value
160:               state = :attribute
161:             elsif match = scan(/["']/)
162:               if in_attribute == :script
163:                 encoder.begin_group :inline
164:                 encoder.text_token match, :inline_delimiter
165:                 if scan(/javascript:[ \t]*/)
166:                   encoder.text_token matched, :comment
167:                 end
168:                 code = scan_until(match == '"' ? /(?="|\z)/ : /(?='|\z)/)
169:                 scan_java_script encoder, code
170:                 match = scan(/["']/)
171:                 encoder.text_token match, :inline_delimiter if match
172:                 encoder.end_group :inline
173:                 state = :attribute
174:                 in_attribute = nil
175:               else
176:                 encoder.begin_group :string
177:                 state = :attribute_value_string
178:                 plain_string_content = PLAIN_STRING_CONTENT[match]
179:                 encoder.text_token match, :delimiter
180:               end
181:             elsif match = scan(/#{TAG_END}/o)
182:               encoder.text_token match, :tag
183:               state = :initial
184:             else
185:               encoder.text_token getch, :error
186:             end
187:             
188:           when :attribute_value_string
189:             if match = scan(plain_string_content)
190:               encoder.text_token match, :content
191:             elsif match = scan(/['"]/)
192:               encoder.text_token match, :delimiter
193:               encoder.end_group :string
194:               state = :attribute
195:             elsif match = scan(/#{ENTITY}/ox)
196:               encoder.text_token match, :entity
197:             elsif match = scan(/&/)
198:               encoder.text_token match, :content
199:             elsif match = scan(/[\n>]/)
200:               encoder.end_group :string
201:               state = :initial
202:               encoder.text_token match, :error
203:             end
204:             
205:           when :in_special_tag
206:             case in_tag
207:             when 'script'
208:               encoder.text_token match, :space if match = scan(/[ \t]*\n/)
209:               if scan(/(\s*<!--)(?:(.*?)(-->)|(.*))/m)
210:                 code = self[2] || self[4]
211:                 closing = self[3]
212:                 encoder.text_token self[1], :comment
213:               else
214:                 code = scan_until(/(?=(?:\n\s*)?<\/script>)|\z/)
215:                 closing = false
216:               end
217:               unless code.empty?
218:                 encoder.begin_group :inline
219:                 scan_java_script encoder, code
220:                 encoder.end_group :inline
221:               end
222:               encoder.text_token closing, :comment if closing
223:               state = :initial
224:             else
225:               raise 'unknown special tag: %p' % [in_tag]
226:             end
227:             
228:           else
229:             raise_inspect 'Unknown state: %p' % [state], encoder
230:             
231:           end
232:           
233:         end
234:         
235:       end
236:       
237:       if options[:keep_state]
238:         @state = state
239:         @plain_string_content = plain_string_content
240:       end
241:       
242:       encoder.end_group :string if state == :attribute_value_string
243:       
244:       encoder
245:     end

[Source]

    # File lib/coderay/scanners/html.rb, line 69
69:     def setup
70:       @state = :initial
71:       @plain_string_content = nil
72:     end

[Validate]