To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.
root / .svn / pristine / 0e / 0ee15adcf36536c48b6695748041d2c5e43cb26a.svn-base @ 912:5e80956cc792
History | View | Annotate | Download (8.99 KB)
| 1 | 909:cbb26bc654de | Chris | module CodeRay |
|---|---|---|---|
| 2 | module Scanners |
||
| 3 | |||
| 4 | load :java |
||
| 5 | |||
| 6 | # Scanner for Groovy. |
||
| 7 | class Groovy < Java |
||
| 8 | |||
| 9 | register_for :groovy |
||
| 10 | |||
| 11 | # TODO: check list of keywords |
||
| 12 | GROOVY_KEYWORDS = %w[ |
||
| 13 | as assert def in |
||
| 14 | ] # :nodoc: |
||
| 15 | KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[ |
||
| 16 | case instanceof new return throw typeof while as assert in |
||
| 17 | ] # :nodoc: |
||
| 18 | GROOVY_MAGIC_VARIABLES = %w[ it ] # :nodoc: |
||
| 19 | |||
| 20 | IDENT_KIND = Java::IDENT_KIND.dup. |
||
| 21 | add(GROOVY_KEYWORDS, :keyword). |
||
| 22 | add(GROOVY_MAGIC_VARIABLES, :local_variable) # :nodoc: |
||
| 23 | |||
| 24 | ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
||
| 25 | UNICODE_ESCAPE = / u[a-fA-F0-9]{4} /x # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
|
||
| 26 | REGEXP_ESCAPE = / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x # :nodoc:
|
||
| 27 | |||
| 28 | # TODO: interpretation inside ', ", / |
||
| 29 | STRING_CONTENT_PATTERN = {
|
||
| 30 | "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/, |
||
| 31 | '"' => /[^\\$"\n]+/, |
||
| 32 | "'''" => /(?>[^\\']+|'(?!''))+/, |
||
| 33 | '"""' => /(?>[^\\$"]+|"(?!""))+/, |
||
| 34 | '/' => /[^\\$\/\n]+/, |
||
| 35 | } # :nodoc: |
||
| 36 | |||
| 37 | protected |
||
| 38 | |||
| 39 | def scan_tokens encoder, options |
||
| 40 | |||
| 41 | state = :initial |
||
| 42 | inline_block_stack = [] |
||
| 43 | inline_block_paren_depth = nil |
||
| 44 | string_delimiter = nil |
||
| 45 | import_clause = class_name_follows = last_token = after_def = false |
||
| 46 | value_expected = true |
||
| 47 | |||
| 48 | until eos? |
||
| 49 | |||
| 50 | case state |
||
| 51 | |||
| 52 | when :initial |
||
| 53 | |||
| 54 | if match = scan(/ \s+ | \\\n /x) |
||
| 55 | encoder.text_token match, :space |
||
| 56 | if match.index ?\n |
||
| 57 | import_clause = after_def = false |
||
| 58 | value_expected = true unless value_expected |
||
| 59 | end |
||
| 60 | next |
||
| 61 | |||
| 62 | elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx) |
||
| 63 | value_expected = true |
||
| 64 | after_def = false |
||
| 65 | encoder.text_token match, :comment |
||
| 66 | |||
| 67 | elsif bol? && match = scan(/ \#!.* /x) |
||
| 68 | encoder.text_token match, :doctype |
||
| 69 | |||
| 70 | elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
|
||
| 71 | after_def = value_expected = false |
||
| 72 | encoder.text_token match, :include |
||
| 73 | |||
| 74 | elsif match = scan(/ #{IDENT} | \[\] /ox)
|
||
| 75 | kind = IDENT_KIND[match] |
||
| 76 | value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match] |
||
| 77 | if last_token == '.' |
||
| 78 | kind = :ident |
||
| 79 | elsif class_name_follows |
||
| 80 | kind = :class |
||
| 81 | class_name_follows = false |
||
| 82 | elsif after_def && check(/\s*[({]/)
|
||
| 83 | kind = :method |
||
| 84 | after_def = false |
||
| 85 | elsif kind == :ident && last_token != '?' && check(/:/) |
||
| 86 | kind = :key |
||
| 87 | else |
||
| 88 | class_name_follows = true if match == 'class' || (import_clause && match == 'as') |
||
| 89 | import_clause = match == 'import' |
||
| 90 | after_def = true if match == 'def' |
||
| 91 | end |
||
| 92 | encoder.text_token match, kind |
||
| 93 | |||
| 94 | elsif match = scan(/;/) |
||
| 95 | import_clause = after_def = false |
||
| 96 | value_expected = true |
||
| 97 | encoder.text_token match, :operator |
||
| 98 | |||
| 99 | elsif match = scan(/\{/)
|
||
| 100 | class_name_follows = after_def = false |
||
| 101 | value_expected = true |
||
| 102 | encoder.text_token match, :operator |
||
| 103 | if !inline_block_stack.empty? |
||
| 104 | inline_block_paren_depth += 1 |
||
| 105 | end |
||
| 106 | |||
| 107 | # TODO: ~'...', ~"..." and ~/.../ style regexps |
||
| 108 | elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ | |
||
| 109 | && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x) |
||
| 110 | value_expected = true |
||
| 111 | value_expected = :regexp if match == '~' |
||
| 112 | after_def = false |
||
| 113 | encoder.text_token match, :operator |
||
| 114 | |||
| 115 | elsif match = scan(/ [)\]}] /x) |
||
| 116 | value_expected = after_def = false |
||
| 117 | if !inline_block_stack.empty? && match == '}' |
||
| 118 | inline_block_paren_depth -= 1 |
||
| 119 | if inline_block_paren_depth == 0 # closing brace of inline block reached |
||
| 120 | encoder.text_token match, :inline_delimiter |
||
| 121 | encoder.end_group :inline |
||
| 122 | state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop |
||
| 123 | next |
||
| 124 | end |
||
| 125 | end |
||
| 126 | encoder.text_token match, :operator |
||
| 127 | |||
| 128 | elsif check(/[\d.]/) |
||
| 129 | after_def = value_expected = false |
||
| 130 | if match = scan(/0[xX][0-9A-Fa-f]+/) |
||
| 131 | encoder.text_token match, :hex |
||
| 132 | elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/) |
||
| 133 | encoder.text_token match, :octal |
||
| 134 | elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/) |
||
| 135 | encoder.text_token match, :float |
||
| 136 | elsif match = scan(/\d+[lLgG]?/) |
||
| 137 | encoder.text_token match, :integer |
||
| 138 | end |
||
| 139 | |||
| 140 | elsif match = scan(/'''|"""/) |
||
| 141 | after_def = value_expected = false |
||
| 142 | state = :multiline_string |
||
| 143 | encoder.begin_group :string |
||
| 144 | string_delimiter = match |
||
| 145 | encoder.text_token match, :delimiter |
||
| 146 | |||
| 147 | # TODO: record.'name' syntax |
||
| 148 | elsif match = scan(/["']/) |
||
| 149 | after_def = value_expected = false |
||
| 150 | state = match == '/' ? :regexp : :string |
||
| 151 | encoder.begin_group state |
||
| 152 | string_delimiter = match |
||
| 153 | encoder.text_token match, :delimiter |
||
| 154 | |||
| 155 | elsif value_expected && match = scan(/\//) |
||
| 156 | after_def = value_expected = false |
||
| 157 | encoder.begin_group :regexp |
||
| 158 | state = :regexp |
||
| 159 | string_delimiter = '/' |
||
| 160 | encoder.text_token match, :delimiter |
||
| 161 | |||
| 162 | elsif match = scan(/ @ #{IDENT} /ox)
|
||
| 163 | after_def = value_expected = false |
||
| 164 | encoder.text_token match, :annotation |
||
| 165 | |||
| 166 | elsif match = scan(/\//) |
||
| 167 | after_def = false |
||
| 168 | value_expected = true |
||
| 169 | encoder.text_token match, :operator |
||
| 170 | |||
| 171 | else |
||
| 172 | encoder.text_token getch, :error |
||
| 173 | |||
| 174 | end |
||
| 175 | |||
| 176 | when :string, :regexp, :multiline_string |
||
| 177 | if match = scan(STRING_CONTENT_PATTERN[string_delimiter]) |
||
| 178 | encoder.text_token match, :content |
||
| 179 | |||
| 180 | elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/) |
||
| 181 | encoder.text_token match, :delimiter |
||
| 182 | if state == :regexp |
||
| 183 | # TODO: regexp modifiers? s, m, x, i? |
||
| 184 | modifiers = scan(/[ix]+/) |
||
| 185 | encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty? |
||
| 186 | end |
||
| 187 | state = :string if state == :multiline_string |
||
| 188 | encoder.end_group state |
||
| 189 | string_delimiter = nil |
||
| 190 | after_def = value_expected = false |
||
| 191 | state = :initial |
||
| 192 | next |
||
| 193 | |||
| 194 | elsif (state == :string || state == :multiline_string) && |
||
| 195 | (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
|
||
| 196 | if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'") |
||
| 197 | encoder.text_token match, :content |
||
| 198 | else |
||
| 199 | encoder.text_token match, :char |
||
| 200 | end |
||
| 201 | elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
|
||
| 202 | encoder.text_token match, :char |
||
| 203 | |||
| 204 | elsif match = scan(/ \$ #{IDENT} /mox)
|
||
| 205 | encoder.begin_group :inline |
||
| 206 | encoder.text_token '$', :inline_delimiter |
||
| 207 | match = match[1..-1] |
||
| 208 | encoder.text_token match, IDENT_KIND[match] |
||
| 209 | encoder.end_group :inline |
||
| 210 | next |
||
| 211 | elsif match = scan(/ \$ \{ /x)
|
||
| 212 | encoder.begin_group :inline |
||
| 213 | encoder.text_token match, :inline_delimiter |
||
| 214 | inline_block_stack << [state, string_delimiter, inline_block_paren_depth] |
||
| 215 | inline_block_paren_depth = 1 |
||
| 216 | state = :initial |
||
| 217 | next |
||
| 218 | |||
| 219 | elsif match = scan(/ \$ /mx) |
||
| 220 | encoder.text_token match, :content |
||
| 221 | |||
| 222 | elsif match = scan(/ \\. /mx) |
||
| 223 | encoder.text_token match, :content # TODO: Shouldn't this be :error? |
||
| 224 | |||
| 225 | elsif match = scan(/ \\ | \n /x) |
||
| 226 | encoder.end_group state |
||
| 227 | encoder.text_token match, :error |
||
| 228 | after_def = value_expected = false |
||
| 229 | state = :initial |
||
| 230 | |||
| 231 | else |
||
| 232 | raise_inspect "else case \" reached; %p not handled." % peek(1), encoder |
||
| 233 | |||
| 234 | end |
||
| 235 | |||
| 236 | else |
||
| 237 | raise_inspect 'Unknown state', encoder |
||
| 238 | |||
| 239 | end |
||
| 240 | |||
| 241 | last_token = match unless [:space, :comment, :doctype].include? kind |
||
| 242 | |||
| 243 | end |
||
| 244 | |||
| 245 | if [:multiline_string, :string, :regexp].include? state |
||
| 246 | encoder.end_group state |
||
| 247 | end |
||
| 248 | |||
| 249 | encoder |
||
| 250 | end |
||
| 251 | |||
| 252 | end |
||
| 253 | |||
| 254 | end |
||
| 255 | end |