Class CodeRay::Scanners::PHP
In: lib/coderay/scanners/php.rb
Parent: Scanner

Scanner for PHP.

Original by Stefan Walk.

Methods

Constants

KINDS_NOT_LOC = HTML::KINDS_NOT_LOC

Protected Instance methods

[Source]

    # File lib/coderay/scanners/php.rb, line 24
24:     def reset_instance
25:       super
26:       @html_scanner.reset
27:     end

[Source]

     # File lib/coderay/scanners/php.rb, line 235
235:     def scan_tokens encoder, options
236:       
237:       if check(RE::PHP_START) ||  # starts with <?
238:        (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
239:        check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
240:        check(/.{1,100}#{RE::PHP_START}/om)  # PHP start after max 100 chars
241:         # is HTML with embedded PHP, so start with HTML
242:         states = [:initial]
243:       else
244:         # is just PHP, so start with PHP surrounded by HTML
245:         states = [:initial, :php]
246:       end
247:       
248:       label_expected = true
249:       case_expected = false
250:       
251:       heredoc_delimiter = nil
252:       delimiter = nil
253:       modifier = nil
254:       
255:       until eos?
256:         
257:         case states.last
258:         
259:         when :initial  # HTML
260:           if match = scan(RE::PHP_START)
261:             encoder.text_token match, :inline_delimiter
262:             label_expected = true
263:             states << :php
264:           else
265:             match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
266:             @html_scanner.tokenize match unless match.empty?
267:           end
268:         
269:         when :php
270:           if match = scan(/\s+/)
271:             encoder.text_token match, :space
272:           
273:           elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
274:             encoder.text_token match, :comment
275:           
276:           elsif match = scan(RE::IDENTIFIER)
277:             kind = Words::IDENT_KIND[match]
278:             if kind == :ident && label_expected && check(/:(?!:)/)
279:               kind = :label
280:               label_expected = true
281:             else
282:               label_expected = false
283:               if kind == :ident && match =~ /^[A-Z]/
284:                 kind = :constant
285:               elsif kind == :keyword
286:                 case match
287:                 when 'class'
288:                   states << :class_expected
289:                 when 'function'
290:                   states << :function_expected
291:                 when 'case', 'default'
292:                   case_expected = true
293:                 end
294:               elsif match == 'b' && check(/['"]/)  # binary string literal
295:                 modifier = match
296:                 next
297:               end
298:             end
299:             encoder.text_token match, kind
300:           
301:           elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
302:             label_expected = false
303:             encoder.text_token match, :float
304:           
305:           elsif match = scan(/0x[0-9a-fA-F]+/)
306:             label_expected = false
307:             encoder.text_token match, :hex
308:           
309:           elsif match = scan(/\d+/)
310:             label_expected = false
311:             encoder.text_token match, :integer
312:           
313:           elsif match = scan(/['"`]/)
314:             encoder.begin_group :string
315:             if modifier
316:               encoder.text_token modifier, :modifier
317:               modifier = nil
318:             end
319:             delimiter = match
320:             encoder.text_token match, :delimiter
321:             states.push match == "'" ? :sqstring : :dqstring
322:           
323:           elsif match = scan(RE::VARIABLE)
324:             label_expected = false
325:             encoder.text_token match, Words::VARIABLE_KIND[match]
326:           
327:           elsif match = scan(/\{/)
328:             encoder.text_token match, :operator
329:             label_expected = true
330:             states.push :php
331:           
332:           elsif match = scan(/\}/)
333:             if states.size == 1
334:               encoder.text_token match, :error
335:             else
336:               states.pop
337:               if states.last.is_a?(::Array)
338:                 delimiter = states.last[1]
339:                 states[-1] = states.last[0]
340:                 encoder.text_token match, :delimiter
341:                 encoder.end_group :inline
342:               else
343:                 encoder.text_token match, :operator
344:                 label_expected = true
345:               end
346:             end
347:           
348:           elsif match = scan(/@/)
349:             label_expected = false
350:             encoder.text_token match, :exception
351:           
352:           elsif match = scan(RE::PHP_END)
353:             encoder.text_token match, :inline_delimiter
354:             states = [:initial]
355:           
356:           elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
357:             encoder.begin_group :string
358:             # warn 'heredoc in heredoc?' if heredoc_delimiter
359:             heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
360:             encoder.text_token match, :delimiter
361:             states.push self[3] ? :sqstring : :dqstring
362:             heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
363:           
364:           elsif match = scan(/#{RE::OPERATOR}/o)
365:             label_expected = match == ';'
366:             if case_expected
367:               label_expected = true if match == ':'
368:               case_expected = false
369:             end
370:             encoder.text_token match, :operator
371:           
372:           else
373:             encoder.text_token getch, :error
374:           
375:           end
376:         
377:         when :sqstring
378:           if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
379:             encoder.text_token match, :content
380:           elsif !heredoc_delimiter && match = scan(/'/)
381:             encoder.text_token match, :delimiter
382:             encoder.end_group :string
383:             delimiter = nil
384:             label_expected = false
385:             states.pop
386:           elsif heredoc_delimiter && match = scan(/\n/)
387:             if scan heredoc_delimiter
388:               encoder.text_token "\n", :content
389:               encoder.text_token matched, :delimiter
390:               encoder.end_group :string
391:               heredoc_delimiter = nil
392:               label_expected = false
393:               states.pop
394:             else
395:               encoder.text_token match, :content
396:             end
397:           elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
398:             encoder.text_token match, :char
399:           elsif match = scan(/\\./m)
400:             encoder.text_token match, :content
401:           elsif match = scan(/\\/)
402:             encoder.text_token match, :error
403:           else
404:             states.pop
405:           end
406:         
407:         when :dqstring
408:           if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
409:             encoder.text_token match, :content
410:           elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
411:             encoder.text_token match, :delimiter
412:             encoder.end_group :string
413:             delimiter = nil
414:             label_expected = false
415:             states.pop
416:           elsif heredoc_delimiter && match = scan(/\n/)
417:             if scan heredoc_delimiter
418:               encoder.text_token "\n", :content
419:               encoder.text_token matched, :delimiter
420:               encoder.end_group :string
421:               heredoc_delimiter = nil
422:               label_expected = false
423:               states.pop
424:             else
425:               encoder.text_token match, :content
426:             end
427:           elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
428:             encoder.text_token match, :char
429:           elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
430:             encoder.text_token match, :char
431:           elsif match = scan(/\\./m)
432:             encoder.text_token match, :content
433:           elsif match = scan(/\\/)
434:             encoder.text_token match, :error
435:           elsif match = scan(/#{RE::VARIABLE}/o)
436:             if check(/\[#{RE::IDENTIFIER}\]/o)
437:               encoder.begin_group :inline
438:               encoder.text_token match, :local_variable
439:               encoder.text_token scan(/\[/), :operator
440:               encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
441:               encoder.text_token scan(/\]/), :operator
442:               encoder.end_group :inline
443:             elsif check(/\[/)
444:               match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
445:               encoder.text_token match, :error
446:             elsif check(/->#{RE::IDENTIFIER}/o)
447:               encoder.begin_group :inline
448:               encoder.text_token match, :local_variable
449:               encoder.text_token scan(/->/), :operator
450:               encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
451:               encoder.end_group :inline
452:             elsif check(/->/)
453:               match << scan(/->/)
454:               encoder.text_token match, :error
455:             else
456:               encoder.text_token match, :local_variable
457:             end
458:           elsif match = scan(/\{/)
459:             if check(/\$/)
460:               encoder.begin_group :inline
461:               states[-1] = [states.last, delimiter]
462:               delimiter = nil
463:               states.push :php
464:               encoder.text_token match, :delimiter
465:             else
466:               encoder.text_token match, :content
467:             end
468:           elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
469:             encoder.text_token match, :local_variable
470:           elsif match = scan(/\$/)
471:             encoder.text_token match, :content
472:           else
473:             states.pop
474:           end
475:         
476:         when :class_expected
477:           if match = scan(/\s+/)
478:             encoder.text_token match, :space
479:           elsif match = scan(/#{RE::IDENTIFIER}/o)
480:             encoder.text_token match, :class
481:             states.pop
482:           else
483:             states.pop
484:           end
485:         
486:         when :function_expected
487:           if match = scan(/\s+/)
488:             encoder.text_token match, :space
489:           elsif match = scan(/&/)
490:             encoder.text_token match, :operator
491:           elsif match = scan(/#{RE::IDENTIFIER}/o)
492:             encoder.text_token match, :function
493:             states.pop
494:           else
495:             states.pop
496:           end
497:         
498:         else
499:           raise_inspect 'Unknown state!', encoder, states
500:         end
501:         
502:       end
503:       
504:       encoder
505:     end

[Source]

    # File lib/coderay/scanners/php.rb, line 20
20:     def setup
21:       @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
22:     end

[Validate]