235: def scan_tokens encoder, options
236:
237: if check(RE::PHP_START) ||
238: (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) ||
239: check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
240: check(/.{1,100}#{RE::PHP_START}/om)
241:
242: states = [:initial]
243: else
244:
245: states = [:initial, :php]
246: end
247:
248: label_expected = true
249: case_expected = false
250:
251: heredoc_delimiter = nil
252: delimiter = nil
253: modifier = nil
254:
255: until eos?
256:
257: case states.last
258:
259: when :initial
260: if match = scan(RE::PHP_START)
261: encoder.text_token match, :inline_delimiter
262: label_expected = true
263: states << :php
264: else
265: match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
266: @html_scanner.tokenize match unless match.empty?
267: end
268:
269: when :php
270: if match = scan(/\s+/)
271: encoder.text_token match, :space
272:
273: elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
274: encoder.text_token match, :comment
275:
276: elsif match = scan(RE::IDENTIFIER)
277: kind = Words::IDENT_KIND[match]
278: if kind == :ident && label_expected && check(/:(?!:)/)
279: kind = :label
280: label_expected = true
281: else
282: label_expected = false
283: if kind == :ident && match =~ /^[A-Z]/
284: kind = :constant
285: elsif kind == :keyword
286: case match
287: when 'class'
288: states << :class_expected
289: when 'function'
290: states << :function_expected
291: when 'case', 'default'
292: case_expected = true
293: end
294: elsif match == 'b' && check(/['"]/)
295: modifier = match
296: next
297: end
298: end
299: encoder.text_token match, kind
300:
301: elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
302: label_expected = false
303: encoder.text_token match, :float
304:
305: elsif match = scan(/0x[0-9a-fA-F]+/)
306: label_expected = false
307: encoder.text_token match, :hex
308:
309: elsif match = scan(/\d+/)
310: label_expected = false
311: encoder.text_token match, :integer
312:
313: elsif match = scan(/['"`]/)
314: encoder.begin_group :string
315: if modifier
316: encoder.text_token modifier, :modifier
317: modifier = nil
318: end
319: delimiter = match
320: encoder.text_token match, :delimiter
321: states.push match == "'" ? :sqstring : :dqstring
322:
323: elsif match = scan(RE::VARIABLE)
324: label_expected = false
325: encoder.text_token match, Words::VARIABLE_KIND[match]
326:
327: elsif match = scan(/\{/)
328: encoder.text_token match, :operator
329: label_expected = true
330: states.push :php
331:
332: elsif match = scan(/\}/)
333: if states.size == 1
334: encoder.text_token match, :error
335: else
336: states.pop
337: if states.last.is_a?(::Array)
338: delimiter = states.last[1]
339: states[-1] = states.last[0]
340: encoder.text_token match, :delimiter
341: encoder.end_group :inline
342: else
343: encoder.text_token match, :operator
344: label_expected = true
345: end
346: end
347:
348: elsif match = scan(/@/)
349: label_expected = false
350: encoder.text_token match, :exception
351:
352: elsif match = scan(RE::PHP_END)
353: encoder.text_token match, :inline_delimiter
354: states = [:initial]
355:
356: elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
357: encoder.begin_group :string
358:
359: heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
360: encoder.text_token match, :delimiter
361: states.push self[3] ? :sqstring : :dqstring
362: heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
363:
364: elsif match = scan(/#{RE::OPERATOR}/o)
365: label_expected = match == ';'
366: if case_expected
367: label_expected = true if match == ':'
368: case_expected = false
369: end
370: encoder.text_token match, :operator
371:
372: else
373: encoder.text_token getch, :error
374:
375: end
376:
377: when :sqstring
378: if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
379: encoder.text_token match, :content
380: elsif !heredoc_delimiter && match = scan(/'/)
381: encoder.text_token match, :delimiter
382: encoder.end_group :string
383: delimiter = nil
384: label_expected = false
385: states.pop
386: elsif heredoc_delimiter && match = scan(/\n/)
387: if scan heredoc_delimiter
388: encoder.text_token "\n", :content
389: encoder.text_token matched, :delimiter
390: encoder.end_group :string
391: heredoc_delimiter = nil
392: label_expected = false
393: states.pop
394: else
395: encoder.text_token match, :content
396: end
397: elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
398: encoder.text_token match, :char
399: elsif match = scan(/\\./m)
400: encoder.text_token match, :content
401: elsif match = scan(/\\/)
402: encoder.text_token match, :error
403: else
404: states.pop
405: end
406:
407: when :dqstring
408: if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
409: encoder.text_token match, :content
410: elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
411: encoder.text_token match, :delimiter
412: encoder.end_group :string
413: delimiter = nil
414: label_expected = false
415: states.pop
416: elsif heredoc_delimiter && match = scan(/\n/)
417: if scan heredoc_delimiter
418: encoder.text_token "\n", :content
419: encoder.text_token matched, :delimiter
420: encoder.end_group :string
421: heredoc_delimiter = nil
422: label_expected = false
423: states.pop
424: else
425: encoder.text_token match, :content
426: end
427: elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
428: encoder.text_token match, :char
429: elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
430: encoder.text_token match, :char
431: elsif match = scan(/\\./m)
432: encoder.text_token match, :content
433: elsif match = scan(/\\/)
434: encoder.text_token match, :error
435: elsif match = scan(/#{RE::VARIABLE}/o)
436: if check(/\[#{RE::IDENTIFIER}\]/o)
437: encoder.begin_group :inline
438: encoder.text_token match, :local_variable
439: encoder.text_token scan(/\[/), :operator
440: encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
441: encoder.text_token scan(/\]/), :operator
442: encoder.end_group :inline
443: elsif check(/\[/)
444: match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
445: encoder.text_token match, :error
446: elsif check(/->#{RE::IDENTIFIER}/o)
447: encoder.begin_group :inline
448: encoder.text_token match, :local_variable
449: encoder.text_token scan(/->/), :operator
450: encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
451: encoder.end_group :inline
452: elsif check(/->/)
453: match << scan(/->/)
454: encoder.text_token match, :error
455: else
456: encoder.text_token match, :local_variable
457: end
458: elsif match = scan(/\{/)
459: if check(/\$/)
460: encoder.begin_group :inline
461: states[-1] = [states.last, delimiter]
462: delimiter = nil
463: states.push :php
464: encoder.text_token match, :delimiter
465: else
466: encoder.text_token match, :content
467: end
468: elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
469: encoder.text_token match, :local_variable
470: elsif match = scan(/\$/)
471: encoder.text_token match, :content
472: else
473: states.pop
474: end
475:
476: when :class_expected
477: if match = scan(/\s+/)
478: encoder.text_token match, :space
479: elsif match = scan(/#{RE::IDENTIFIER}/o)
480: encoder.text_token match, :class
481: states.pop
482: else
483: states.pop
484: end
485:
486: when :function_expected
487: if match = scan(/\s+/)
488: encoder.text_token match, :space
489: elsif match = scan(/&/)
490: encoder.text_token match, :operator
491: elsif match = scan(/#{RE::IDENTIFIER}/o)
492: encoder.text_token match, :function
493: states.pop
494: else
495: states.pop
496: end
497:
498: else
499: raise_inspect 'Unknown state!', encoder, states
500: end
501:
502: end
503:
504: encoder
505: end