To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / .svn / pristine / 0e / 0ee15adcf36536c48b6695748041d2c5e43cb26a.svn-base @ 912:5e80956cc792

History | View | Annotate | Download (8.99 KB)

1 909:cbb26bc654de Chris
module CodeRay
2
module Scanners
3
4
  load :java
5
6
  # Scanner for Groovy.
7
  class Groovy < Java
8
9
    register_for :groovy
10
11
    # TODO: check list of keywords
12
    GROOVY_KEYWORDS = %w[
13
      as assert def in
14
    ]  # :nodoc:
15
    KEYWORDS_EXPECTING_VALUE = WordList.new.add %w[
16
      case instanceof new return throw typeof while as assert in
17
    ]  # :nodoc:
18
    GROOVY_MAGIC_VARIABLES = %w[ it ]  # :nodoc:
19
20
    IDENT_KIND = Java::IDENT_KIND.dup.
21
      add(GROOVY_KEYWORDS, :keyword).
22
      add(GROOVY_MAGIC_VARIABLES, :local_variable)  # :nodoc:
23
24
    ESCAPE = / [bfnrtv$\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x  # :nodoc:
25
    UNICODE_ESCAPE =  / u[a-fA-F0-9]{4} /x  # :nodoc: no 4-byte unicode chars? U[a-fA-F0-9]{8}
26
    REGEXP_ESCAPE =  / [bfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} | \d | [bBdDsSwW\/] /x  # :nodoc:
27
28
    # TODO: interpretation inside ', ", /
29
    STRING_CONTENT_PATTERN = {
30
      "'" => /(?>\\[^\\'\n]+|[^\\'\n]+)+/,
31
      '"' => /[^\\$"\n]+/,
32
      "'''" => /(?>[^\\']+|'(?!''))+/,
33
      '"""' => /(?>[^\\$"]+|"(?!""))+/,
34
      '/' => /[^\\$\/\n]+/,
35
    }  # :nodoc:
36
37
  protected
38
39
    def scan_tokens encoder, options
40
41
      state = :initial
42
      inline_block_stack = []
43
      inline_block_paren_depth = nil
44
      string_delimiter = nil
45
      import_clause = class_name_follows = last_token = after_def = false
46
      value_expected = true
47
48
      until eos?
49
50
        case state
51
52
        when :initial
53
54
          if match = scan(/ \s+ | \\\n /x)
55
            encoder.text_token match, :space
56
            if match.index ?\n
57
              import_clause = after_def = false
58
              value_expected = true unless value_expected
59
            end
60
            next
61
62
          elsif match = scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
63
            value_expected = true
64
            after_def = false
65
            encoder.text_token match, :comment
66
67
          elsif bol? && match = scan(/ \#!.* /x)
68
            encoder.text_token match, :doctype
69
70
          elsif import_clause && match = scan(/ (?!as) #{IDENT} (?: \. #{IDENT} )* (?: \.\* )? /ox)
71
            after_def = value_expected = false
72
            encoder.text_token match, :include
73
74
          elsif match = scan(/ #{IDENT} | \[\] /ox)
75
            kind = IDENT_KIND[match]
76
            value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
77
            if last_token == '.'
78
              kind = :ident
79
            elsif class_name_follows
80
              kind = :class
81
              class_name_follows = false
82
            elsif after_def && check(/\s*[({]/)
83
              kind = :method
84
              after_def = false
85
            elsif kind == :ident && last_token != '?' && check(/:/)
86
              kind = :key
87
            else
88
              class_name_follows = true if match == 'class' || (import_clause && match == 'as')
89
              import_clause = match == 'import'
90
              after_def = true if match == 'def'
91
            end
92
            encoder.text_token match, kind
93
94
          elsif match = scan(/;/)
95
            import_clause = after_def = false
96
            value_expected = true
97
            encoder.text_token match, :operator
98
99
          elsif match = scan(/\{/)
100
            class_name_follows = after_def = false
101
            value_expected = true
102
            encoder.text_token match, :operator
103
            if !inline_block_stack.empty?
104
              inline_block_paren_depth += 1
105
            end
106
107
          # TODO: ~'...', ~"..." and ~/.../ style regexps
108
          elsif match = scan(/ \.\.<? | \*?\.(?!\d)@? | \.& | \?:? | [,?:(\[] | -[->] | \+\+ |
109
              && | \|\| | \*\*=? | ==?~ | <=?>? | [-+*%^~&|>=!]=? | <<<?=? | >>>?=? /x)
110
            value_expected = true
111
            value_expected = :regexp if match == '~'
112
            after_def = false
113
            encoder.text_token match, :operator
114
115
          elsif match = scan(/ [)\]}] /x)
116
            value_expected = after_def = false
117
            if !inline_block_stack.empty? && match == '}'
118
              inline_block_paren_depth -= 1
119
              if inline_block_paren_depth == 0  # closing brace of inline block reached
120
                encoder.text_token match, :inline_delimiter
121
                encoder.end_group :inline
122
                state, string_delimiter, inline_block_paren_depth = inline_block_stack.pop
123
                next
124
              end
125
            end
126
            encoder.text_token match, :operator
127
128
          elsif check(/[\d.]/)
129
            after_def = value_expected = false
130
            if match = scan(/0[xX][0-9A-Fa-f]+/)
131
              encoder.text_token match, :hex
132
            elsif match = scan(/(?>0[0-7]+)(?![89.eEfF])/)
133
              encoder.text_token match, :octal
134
            elsif match = scan(/\d+[fFdD]|\d*\.\d+(?:[eE][+-]?\d+)?[fFdD]?|\d+[eE][+-]?\d+[fFdD]?/)
135
              encoder.text_token match, :float
136
            elsif match = scan(/\d+[lLgG]?/)
137
              encoder.text_token match, :integer
138
            end
139
140
          elsif match = scan(/'''|"""/)
141
            after_def = value_expected = false
142
            state = :multiline_string
143
            encoder.begin_group :string
144
            string_delimiter = match
145
            encoder.text_token match, :delimiter
146
147
          # TODO: record.'name' syntax
148
          elsif match = scan(/["']/)
149
            after_def = value_expected = false
150
            state = match == '/' ? :regexp : :string
151
            encoder.begin_group state
152
            string_delimiter = match
153
            encoder.text_token match, :delimiter
154
155
          elsif value_expected && match = scan(/\//)
156
            after_def = value_expected = false
157
            encoder.begin_group :regexp
158
            state = :regexp
159
            string_delimiter = '/'
160
            encoder.text_token match, :delimiter
161
162
          elsif match = scan(/ @ #{IDENT} /ox)
163
            after_def = value_expected = false
164
            encoder.text_token match, :annotation
165
166
          elsif match = scan(/\//)
167
            after_def = false
168
            value_expected = true
169
            encoder.text_token match, :operator
170
171
          else
172
            encoder.text_token getch, :error
173
174
          end
175
176
        when :string, :regexp, :multiline_string
177
          if match = scan(STRING_CONTENT_PATTERN[string_delimiter])
178
            encoder.text_token match, :content
179
180
          elsif match = scan(state == :multiline_string ? /'''|"""/ : /["'\/]/)
181
            encoder.text_token match, :delimiter
182
            if state == :regexp
183
              # TODO: regexp modifiers? s, m, x, i?
184
              modifiers = scan(/[ix]+/)
185
              encoder.text_token modifiers, :modifier if modifiers && !modifiers.empty?
186
            end
187
            state = :string if state == :multiline_string
188
            encoder.end_group state
189
            string_delimiter = nil
190
            after_def = value_expected = false
191
            state = :initial
192
            next
193
194
          elsif (state == :string || state == :multiline_string) &&
195
              (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
196
            if string_delimiter[0] == ?' && !(match == "\\\\" || match == "\\'")
197
              encoder.text_token match, :content
198
            else
199
              encoder.text_token match, :char
200
            end
201
          elsif state == :regexp && match = scan(/ \\ (?: #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
202
            encoder.text_token match, :char
203
204
          elsif match = scan(/ \$ #{IDENT} /mox)
205
            encoder.begin_group :inline
206
            encoder.text_token '$', :inline_delimiter
207
            match = match[1..-1]
208
            encoder.text_token match, IDENT_KIND[match]
209
            encoder.end_group :inline
210
            next
211
          elsif match = scan(/ \$ \{ /x)
212
            encoder.begin_group :inline
213
            encoder.text_token match, :inline_delimiter
214
            inline_block_stack << [state, string_delimiter, inline_block_paren_depth]
215
            inline_block_paren_depth = 1
216
            state = :initial
217
            next
218
219
          elsif match = scan(/ \$ /mx)
220
            encoder.text_token match, :content
221
222
          elsif match = scan(/ \\. /mx)
223
            encoder.text_token match, :content  # TODO: Shouldn't this be :error?
224
225
          elsif match = scan(/ \\ | \n /x)
226
            encoder.end_group state
227
            encoder.text_token match, :error
228
            after_def = value_expected = false
229
            state = :initial
230
231
          else
232
            raise_inspect "else case \" reached; %p not handled." % peek(1), encoder
233
234
          end
235
236
        else
237
          raise_inspect 'Unknown state', encoder
238
239
        end
240
241
        last_token = match unless [:space, :comment, :doctype].include? kind
242
243
      end
244
245
      if [:multiline_string, :string, :regexp].include? state
246
        encoder.end_group state
247
      end
248
249
      encoder
250
    end
251
252
  end
253
254
end
255
end