annotate vendor/gems/coderay-1.0.0/lib/coderay/scanners/groovy.rb @ 1169:492ff72268e3 bug_521

Close obsolete branch bug_521
author Chris Cannam
date Thu, 18 Oct 2012 10:42:48 +0100
parents cbb26bc654de
children
rev   line source
Chris@909 1 module CodeRay
Chris@909 2 module Scanners
Chris@909 3
Chris@909 4 load :java
Chris@909 5
Chris@909 6 # Scanner for Groovy.
Chris@909 7 class Groovy < Java
Chris@909 8
Chris@909 9 register_for :groovy
Chris@909 10
Chris@909 11 # TODO: check list of keywords
Chris@909 12 GROOVY_KEYWORDS = %w[
Chris@909 13 as assert def in
Chris@909 14 ] # :nodoc:
Chris@909 15 KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
Chris@909 16 case instanceof new return throw typeof while as assert in
Chris@909 17 ] # :nodoc:
Chris@909 18 GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc:
Chris@909 19
Chris@909 20 IDENT_KIND = Java::IDENT_KIND.dup.
Chris@909 21 add(GROOVY_KEYWORDS, :keyword).
Chris@909 22 add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc:
Chris@909 23
Chris@909 24 ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
Chris@909 25 UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
Chris@909 26 REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
Chris@909 27
Chris@909 28 # TODO: interpretation inside ', ", /
Chris@909 29 STRING_CONTENT_PATTERN = {
Chris@909 30 "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
Chris@909 31 '"' => /[^\\$"\n]+/,
Chris@909 32 "'''" => /(?>[^\\']+|'(?!''))+/,
Chris@909 33 '"""' => /(?>[^\\$"]+|"(?!""))+/,
Chris@909 34 '/' => /[^\\$\/\n]+/,
Chris@909 35 } # :nodoc:
Chris@909 36
Chris@909 37 protected
Chris@909 38
Chris@909 39 def scan_tokens encoder, options
Chris@909 40
Chris@909 41 state = :initial
Chris@909 42 inline_block_stack = []
Chris@909 43 inline_block_paren_depth = nil
Chris@909 44 string_delimiter = nil
Chris@909 45 import_clause = class_name_follows = last_token = after_def = false
Chris@909 46 value_expected = true
Chris@909 47
Chris@909 48 until eos?
Chris@909 49
Chris@909 50 case state
Chris@909 51
Chris@909 52 when :initial
Chris@909 53
Chris@909 54 if match = scan(/ \s+ | \\\n /x)
Chris@909 55 encoder.text_token match, :space
Chris@909 56 if match.index ?\n
Chris@909 57 import_clause = after_def = false
Chris@909 58 value_expected = true unless value_expected
Chris@909 59 end
Chris@909 60 next
Chris@909 61
Chris@909 62 elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
Chris@909 63 value_expected = true
Chris@909 64 after_def = false
Chris@909 65 encoder.text_token match, :comment
Chris@909 66
Chris@909 67 elsif bol? && match = scan(/ \#!.* /x)
Chris@909 68 encoder.text_token match, :doctype
Chris@909 69
Chris@909 70 elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
Chris@909 71 after_def = value_expected = false
Chris@909 72 encoder.text_token match, :include
Chris@909 73
Chris@909 74 elsif match = scan(/ #{IDENT} | \[\] /ox)
Chris@909 75 kind = IDENT_KIND[match]
Chris@909 76 value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
Chris@909 77 if last_token == '.'
Chris@909 78 kind = :ident
Chris@909 79 elsif class_name_follows
Chris@909 80 kind = :class
Chris@909 81 class_name_follows = false
Chris@909 82 elsif after_def && check(/\s*[({]/)
Chris@909 83 kind = :method
Chris@909 84 after_def = false
Chris@909 85 elsif kind == :ident && last_token != '?' && check(/:/)
Chris@909 86 kind = :key
Chris@909 87 else
Chris@909 88 class_name_follows = true if match == 'class' || (import_clause && match == 'as')
Chris@909 89 import_clause = match == 'import'
Chris@909 90 after_def = true if match == 'def'
Chris@909 91 end
Chris@909 92 encoder.text_token match, kind
Chris@909 93
Chris@909 94 elsif match = scan(/;/)
Chris@909 95 import_clause = after_def = false
Chris@909 96 value_expected = true
Chris@909 97 encoder.text_token match, :operator
Chris@909 98
Chris@909 99 elsif match = scan(/\{/)
Chris@909 100 class_name_follows = after_def = false
Chris@909 101 value_expected = true
Chris@909 102 encoder.text_token match, :operator
Chris@909 103 if !inline_block_stack.empty?
Chris@909 104 inline_block_paren_depth += 1
Chris@909 105 end
Chris@909 106
Chris@909 107 # TODO: ~'...', ~"..." and ~/.../ style regexps
Chris@909 108 elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
Chris@909 109 && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
Chris@909 110 value_expected = true
Chris@909 111 value_expected = :regexp if match == '~'
Chris@909 112 after_def = false
Chris@909 113 encoder.text_token match, :operator
Chris@909 114
Chris@909 115 elsif match = scan(/ [)\]}] /x)
Chris@909 116 value_expected = after_def = false
Chris@909 117 if !inline_block_stack.empty? && match == '}'
Chris@909 118 inline_block_paren_depth -= 1
Chris@909 119 if inline_block_paren_depth == 0 # closing brace of inline block reached
Chris@909 120 encoder.text_token match, :inline_delimiter
Chris@909 121 encoder.end_group :inline
Chris@909 122 state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
Chris@909 123 next
Chris@909 124 end
Chris@909 125 end
Chris@909 126 encoder.text_token match, :operator
Chris@909 127
Chris@909 128 elsif check(/[\d.]/)
Chris@909 129 after_def = value_expected = false
Chris@909 130 if match = scan(/0[xX][0-9A-Fa-f]+/)
Chris@909 131 encoder.text_token match, :hex
Chris@909 132 elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
Chris@909 133 encoder.text_token match, :octal
Chris@909 134 elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
Chris@909 135 encoder.text_token match, :float
Chris@909 136 elsif match = scan(/\d+[lLgG]?/)
Chris@909 137 encoder.text_token match, :integer
Chris@909 138 end
Chris@909 139
Chris@909 140 elsif match = scan(/'''|"""/)
Chris@909 141 after_def = value_expected = false
Chris@909 142 state = :multiline_string
Chris@909 143 encoder.begin_group :string
Chris@909 144 string_delimiter = match
Chris@909 145 encoder.text_token match, :delimiter
Chris@909 146
Chris@909 147 # TODO: record.'name' syntax
Chris@909 148 elsif match = scan(/["']/)
Chris@909 149 after_def = value_expected = false
Chris@909 150 state = match == '/' ? :regexp : :string
Chris@909 151 encoder.begin_group state
Chris@909 152 string_delimiter = match
Chris@909 153 encoder.text_token match, :delimiter
Chris@909 154
Chris@909 155 elsif value_expected && match = scan(/\//)
Chris@909 156 after_def = value_expected = false
Chris@909 157 encoder.begin_group :regexp
Chris@909 158 state = :regexp
Chris@909 159 string_delimiter = '/'
Chris@909 160 encoder.text_token match, :delimiter
Chris@909 161
Chris@909 162 elsif match = scan(/ @ #{IDENT} /ox)
Chris@909 163 after_def = value_expected = false
Chris@909 164 encoder.text_token match, :annotation
Chris@909 165
Chris@909 166 elsif match = scan(/\//)
Chris@909 167 after_def = false
Chris@909 168 value_expected = true
Chris@909 169 encoder.text_token match, :operator
Chris@909 170
Chris@909 171 else
Chris@909 172 encoder.text_token getch, :error
Chris@909 173
Chris@909 174 end
Chris@909 175
Chris@909 176 when :string, :regexp, :multiline_string
Chris@909 177 if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
Chris@909 178 encoder.text_token match, :content
Chris@909 179
Chris@909 180 elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
Chris@909 181 encoder.text_token match, :delimiter
Chris@909 182 if state == :regexp
Chris@909 183 # TODO: regexp modifiers? s, m, x, i?
Chris@909 184 modifiers = scan(/[ix]+/)
Chris@909 185 encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
Chris@909 186 end
Chris@909 187 state = :string if state == :multiline_string
Chris@909 188 encoder.end_group state
Chris@909 189 string_delimiter = nil
Chris@909 190 after_def = value_expected = false
Chris@909 191 state = :initial
Chris@909 192 next
Chris@909 193
Chris@909 194 elsif (state == :string || state == :multiline_string) &&
Chris@909 195 (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
Chris@909 196 if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
Chris@909 197 encoder.text_token match, :content
Chris@909 198 else
Chris@909 199 encoder.text_token match, :char
Chris@909 200 end
Chris@909 201 elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
Chris@909 202 encoder.text_token match, :char
Chris@909 203
Chris@909 204 elsif match = scan(/ \$ #{IDENT} /mox)
Chris@909 205 encoder.begin_group :inline
Chris@909 206 encoder.text_token '$', :inline_delimiter
Chris@909 207 match = match[1..-1]
Chris@909 208 encoder.text_token match, IDENT_KIND[match]
Chris@909 209 encoder.end_group :inline
Chris@909 210 next
Chris@909 211 elsif match = scan(/ \$ \{ /x)
Chris@909 212 encoder.begin_group :inline
Chris@909 213 encoder.text_token match, :inline_delimiter
Chris@909 214 inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
Chris@909 215 inline_block_paren_depth = 1
Chris@909 216 state = :initial
Chris@909 217 next
Chris@909 218
Chris@909 219 elsif match = scan(/ \$ /mx)
Chris@909 220 encoder.text_token match, :content
Chris@909 221
Chris@909 222 elsif match = scan(/ \\. /mx)
Chris@909 223 encoder.text_token match, :content # TODO: Shouldn't this be :error?
Chris@909 224
Chris@909 225 elsif match = scan(/ \\ | \n /x)
Chris@909 226 encoder.end_group state
Chris@909 227 encoder.text_token match, :error
Chris@909 228 after_def = value_expected = false
Chris@909 229 state = :initial
Chris@909 230
Chris@909 231 else
Chris@909 232 raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
Chris@909 233
Chris@909 234 end
Chris@909 235
Chris@909 236 else
Chris@909 237 raise_inspect 'Unknown state', encoder
Chris@909 238
Chris@909 239 end
Chris@909 240
Chris@909 241 last_token = match unless [:space, :comment, :doctype].include? kind
Chris@909 242
Chris@909 243 end
Chris@909 244
Chris@909 245 if [:multiline_string, :string, :regexp].include? state
Chris@909 246 encoder.end_group state
Chris@909 247 end
Chris@909 248
Chris@909 249 encoder
Chris@909 250 end
Chris@909 251
Chris@909 252 end
Chris@909 253
Chris@909 254 end
Chris@909 255 end