To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / vendor / gems / coderay-0.9.7 / lib / coderay / scanner.rb @ 442:753f1380d6bc

History | View | Annotate | Download (7.97 KB)

1
module CodeRay
2

    
3
  require 'coderay/helpers/plugin'
4

    
5
  # = Scanners
6
  #
7
  # This module holds the Scanner class and its subclasses.
8
  # For example, the Ruby scanner is named CodeRay::Scanners::Ruby
9
  # can be found in coderay/scanners/ruby.
10
  #
11
  # Scanner also provides methods and constants for the register
12
  # mechanism and the [] method that returns the Scanner class
13
  # belonging to the given lang.
14
  #
15
  # See PluginHost.
16
  module Scanners
17
    extend PluginHost
18
    plugin_path File.dirname(__FILE__), 'scanners'
19

    
20
    require 'strscan'
21

    
22
    # = Scanner
23
    #
24
    # The base class for all Scanners.
25
    #
26
    # It is a subclass of Ruby's great +StringScanner+, which
27
    # makes it easy to access the scanning methods inside.
28
    #
29
    # It is also +Enumerable+, so you can use it like an Array of
30
    # Tokens:
31
    #
32
    #   require 'coderay'
33
    #   
34
    #   c_scanner = CodeRay::Scanners[:c].new "if (*p == '{') nest++;"
35
    #   
36
    #   for text, kind in c_scanner
37
    #     puts text if kind == :operator
38
    #   end
39
    #   
40
    #   # prints: (*==)++;
41
    #
42
    # OK, this is a very simple example :)
43
    # You can also use +map+, +any?+, +find+ and even +sort_by+,
44
    # if you want.
45
    class Scanner < StringScanner
46
      
47
      extend Plugin
48
      plugin_host Scanners
49

    
50
      # Raised if a Scanner fails while scanning
51
      ScanError = Class.new(Exception)
52

    
53
      require 'coderay/helpers/word_list'
54

    
55
      # The default options for all scanner classes.
56
      #
57
      # Define @default_options for subclasses.
58
      DEFAULT_OPTIONS = { :stream => false }
59
      
60
      KINDS_NOT_LOC = [:comment, :doctype]
61

    
62
      class << self
63

    
64
        # Returns if the Scanner can be used in streaming mode.
65
        def streamable?
66
          is_a? Streamable
67
        end
68

    
69
        def normify code
70
          code = code.to_s
71
          if code.respond_to?(:encoding) && (code.encoding.name != 'UTF-8' || !code.valid_encoding?)
72
            code = code.dup
73
            original_encoding = code.encoding
74
            code.force_encoding 'Windows-1252'
75
            unless code.valid_encoding?
76
              code.force_encoding original_encoding
77
              if code.encoding.name == 'UTF-8'
78
                code.encode! 'UTF-16BE', :invalid => :replace, :undef => :replace, :replace => '?'
79
              end
80
              code.encode! 'UTF-8', :invalid => :replace, :undef => :replace, :replace => '?'
81
            end
82
          end
83
          code.to_unix
84
        end
85
        
86
        def file_extension extension = nil
87
          if extension
88
            @file_extension = extension.to_s
89
          else
90
            @file_extension ||= plugin_id.to_s
91
          end
92
        end
93

    
94
      end
95

    
96
=begin
97
## Excluded for speed reasons; protected seems to make methods slow.
98

99
  # Save the StringScanner methods from being called.
100
  # This would not be useful for highlighting.
101
  strscan_public_methods =
102
    StringScanner.instance_methods -
103
    StringScanner.ancestors[1].instance_methods
104
  protected(*strscan_public_methods)
105
=end
106

    
107
      # Create a new Scanner.
108
      #
109
      # * +code+ is the input String and is handled by the superclass
110
      #   StringScanner.
111
      # * +options+ is a Hash with Symbols as keys.
112
      #   It is merged with the default options of the class (you can
113
      #   overwrite default options here.)
114
      # * +block+ is the callback for streamed highlighting.
115
      #
116
      # If you set :stream to +true+ in the options, the Scanner uses a
117
      # TokenStream with the +block+ as callback to handle the tokens.
118
      #
119
      # Else, a Tokens object is used.
120
      def initialize code='', options = {}, &block
121
        raise "I am only the basic Scanner class. I can't scan "\
122
          "anything. :( Use my subclasses." if self.class == Scanner
123
        
124
        @options = self.class::DEFAULT_OPTIONS.merge options
125

    
126
        super Scanner.normify(code)
127

    
128
        @tokens = options[:tokens]
129
        if @options[:stream]
130
          warn "warning in CodeRay::Scanner.new: :stream is set, "\
131
            "but no block was given" unless block_given?
132
          raise NotStreamableError, self unless kind_of? Streamable
133
          @tokens ||= TokenStream.new(&block)
134
        else
135
          warn "warning in CodeRay::Scanner.new: Block given, "\
136
            "but :stream is #{@options[:stream]}" if block_given?
137
          @tokens ||= Tokens.new
138
        end
139
        @tokens.scanner = self
140

    
141
        setup
142
      end
143

    
144
      def reset
145
        super
146
        reset_instance
147
      end
148

    
149
      def string= code
150
        code = Scanner.normify(code)
151
        if defined?(RUBY_DESCRIPTION) && RUBY_DESCRIPTION['rubinius 1.0.1']
152
          reset_state
153
          @string = code
154
        else
155
          super code
156
        end
157
        reset_instance
158
      end
159

    
160
      # More mnemonic accessor name for the input string.
161
      alias code string
162
      alias code= string=
163

    
164
      # Returns the Plugin ID for this scanner.
165
      def lang
166
        self.class.plugin_id
167
      end
168

    
169
      # Scans the code and returns all tokens in a Tokens object.
170
      def tokenize new_string=nil, options = {}
171
        options = @options.merge(options)
172
        self.string = new_string if new_string
173
        @cached_tokens =
174
          if @options[:stream]  # :stream must have been set already
175
            reset unless new_string
176
            scan_tokens @tokens, options
177
            @tokens
178
          else
179
            scan_tokens @tokens, options
180
          end
181
      end
182

    
183
      def tokens
184
        @cached_tokens ||= tokenize
185
      end
186
      
187
      # Whether the scanner is in streaming mode.
188
      def streaming?
189
        !!@options[:stream]
190
      end
191

    
192
      # Traverses the tokens.
193
      def each &block
194
        raise ArgumentError,
195
          'Cannot traverse TokenStream.' if @options[:stream]
196
        tokens.each(&block)
197
      end
198
      include Enumerable
199

    
200
      # The current line position of the scanner.
201
      #
202
      # Beware, this is implemented inefficiently. It should be used
203
      # for debugging only.
204
      def line
205
        string[0..pos].count("\n") + 1
206
      end
207
      
208
      def column pos = self.pos
209
        return 0 if pos <= 0
210
        string = string()
211
        if string.respond_to?(:bytesize) && (defined?(@bin_string) || string.bytesize != string.size)
212
          @bin_string ||= string.dup.force_encoding('binary')
213
          string = @bin_string
214
        end
215
        pos - (string.rindex(?\n, pos) || 0)
216
      end
217
      
218
      def marshal_dump
219
        @options
220
      end
221
      
222
      def marshal_load options
223
        @options = options
224
      end
225

    
226
    protected
227

    
228
      # Can be implemented by subclasses to do some initialization
229
      # that has to be done once per instance.
230
      #
231
      # Use reset for initialization that has to be done once per
232
      # scan.
233
      def setup
234
      end
235

    
236
      # This is the central method, and commonly the only one a
237
      # subclass implements.
238
      #
239
      # Subclasses must implement this method; it must return +tokens+
240
      # and must only use Tokens#<< for storing scanned tokens!
241
      def scan_tokens tokens, options
242
        raise NotImplementedError,
243
          "#{self.class}#scan_tokens not implemented."
244
      end
245

    
246
      def reset_instance
247
        @tokens.clear unless @options[:keep_tokens]
248
        @cached_tokens = nil
249
        @bin_string = nil if defined? @bin_string
250
      end
251

    
252
      # Scanner error with additional status information
253
      def raise_inspect msg, tokens, state = 'No state given!', ambit = 30
254
        raise ScanError, <<-EOE % [
255

256

257
***ERROR in %s: %s (after %d tokens)
258

259
tokens:
260
%s
261

262
current line: %d  column: %d  pos: %d
263
matched: %p  state: %p
264
bol? = %p,  eos? = %p
265

266
surrounding code:
267
%p  ~~  %p
268

269

270
***ERROR***
271

272
        EOE
273
          File.basename(caller[0]),
274
          msg,
275
          tokens.size,
276
          tokens.last(10).map { |t| t.inspect }.join("\n"),
277
          line, column, pos,
278
          matched, state, bol?, eos?,
279
          string[pos - ambit, ambit],
280
          string[pos, ambit],
281
        ]
282
      end
283

    
284
    end
285

    
286
  end
287
end
288

    
289
class String
290
  # I love this hack. It seems to silence all dos/unix/mac newline problems.
291
  def to_unix
292
    if index ?\r
293
      gsub(/\r\n?/, "\n")
294
    else
295
      self
296
    end
297
  end
298
end