To check out this repository please hg clone the following URL, or open the URL using EasyMercurial or your preferred Mercurial client.

Statistics Download as Zip
| Branch: | Tag: | Revision:

root / .svn / pristine / d9 / d9545294de024a7e3a5f7c74daf7f80255e0f5d7.svn-base @ 1297:0a574315af3e

History | View | Annotate | Download (6.4 KB)

1
module CodeRay
2
  
3
  # GZip library for writing and reading token dumps.
4
  autoload :GZip, 'coderay/helpers/gzip'
5
  
6
  # = Tokens  TODO: Rewrite!
7
  #
8
  # The Tokens class represents a list of tokens returnd from
9
  # a Scanner.
10
  #
11
  # A token is not a special object, just a two-element Array
12
  # consisting of
13
  # * the _token_ _text_ (the original source of the token in a String) or
14
  #   a _token_ _action_ (begin_group, end_group, begin_line, end_line)
15
  # * the _token_ _kind_ (a Symbol representing the type of the token)
16
  #
17
  # A token looks like this:
18
  #
19
  #   ['# It looks like this', :comment]
20
  #   ['3.1415926', :float]
21
  #   ['$^', :error]
22
  #
23
  # Some scanners also yield sub-tokens, represented by special
24
  # token actions, namely begin_group and end_group.
25
  #
26
  # The Ruby scanner, for example, splits "a string" into:
27
  #
28
  #  [
29
  #   [:begin_group, :string],
30
  #   ['"', :delimiter],
31
  #   ['a string', :content],
32
  #   ['"', :delimiter],
33
  #   [:end_group, :string]
34
  #  ]
35
  #
36
  # Tokens is the interface between Scanners and Encoders:
37
  # The input is split and saved into a Tokens object. The Encoder
38
  # then builds the output from this object.
39
  #
40
  # Thus, the syntax below becomes clear:
41
  #
42
  #   CodeRay.scan('price = 2.59', :ruby).html
43
  #   # the Tokens object is here -------^
44
  #
45
  # See how small it is? ;)
46
  #
47
  # Tokens gives you the power to handle pre-scanned code very easily:
48
  # You can convert it to a webpage, a YAML file, or dump it into a gzip'ed string
49
  # that you put in your DB.
50
  # 
51
  # It also allows you to generate tokens directly (without using a scanner),
52
  # to load them from a file, and still use any Encoder that CodeRay provides.
53
  class Tokens < Array
54
    
55
    # The Scanner instance that created the tokens.
56
    attr_accessor :scanner
57
    
58
    # Encode the tokens using encoder.
59
    #
60
    # encoder can be
61
    # * a symbol like :html oder :statistic
62
    # * an Encoder class
63
    # * an Encoder object
64
    #
65
    # options are passed to the encoder.
66
    def encode encoder, options = {}
67
      encoder = Encoders[encoder].new options if encoder.respond_to? :to_sym
68
      encoder.encode_tokens self, options
69
    end
70
    
71
    # Turn tokens into a string by concatenating them.
72
    def to_s
73
      encode CodeRay::Encoders::Encoder.new
74
    end
75
    
76
    # Redirects unknown methods to encoder calls.
77
    #
78
    # For example, if you call +tokens.html+, the HTML encoder
79
    # is used to highlight the tokens.
80
    def method_missing meth, options = {}
81
      encode meth, options
82
    rescue PluginHost::PluginNotFound
83
      super
84
    end
85
    
86
    # Split the tokens into parts of the given +sizes+.
87
    # 
88
    # The result will be an Array of Tokens objects. The parts have
89
    # the text size specified by the parameter. In addition, each
90
    # part closes all opened tokens. This is useful to insert tokens
91
    # betweem them.
92
    # 
93
    # This method is used by @Scanner#tokenize@ when called with an Array
94
    # of source strings. The Diff encoder uses it for inline highlighting.
95
    def split_into_parts *sizes
96
      parts = []
97
      opened = []
98
      content = nil
99
      part = Tokens.new
100
      part_size = 0
101
      size = sizes.first
102
      i = 0
103
      for item in self
104
        case content
105
        when nil
106
          content = item
107
        when String
108
          if size && part_size + content.size > size  # token must be cut
109
            if part_size < size  # some part of the token goes into this part
110
              content = content.dup  # content may no be safe to change
111
              part << content.slice!(0, size - part_size) << item
112
            end
113
            # close all open groups and lines...
114
            closing = opened.reverse.flatten.map do |content_or_kind|
115
              case content_or_kind
116
              when :begin_group
117
                :end_group
118
              when :begin_line
119
                :end_line
120
              else
121
                content_or_kind
122
              end
123
            end
124
            part.concat closing
125
            begin
126
              parts << part
127
              part = Tokens.new
128
              size = sizes[i += 1]
129
            end until size.nil? || size > 0
130
            # ...and open them again.
131
            part.concat opened.flatten
132
            part_size = 0
133
            redo unless content.empty?
134
          else
135
            part << content << item
136
            part_size += content.size
137
          end
138
          content = nil
139
        when Symbol
140
          case content
141
          when :begin_group, :begin_line
142
            opened << [content, item]
143
          when :end_group, :end_line
144
            opened.pop
145
          else
146
            raise ArgumentError, 'Unknown token action: %p, kind = %p' % [content, item]
147
          end
148
          part << content << item
149
          content = nil
150
        else
151
          raise ArgumentError, 'Token input junk: %p, kind = %p' % [content, item]
152
        end
153
      end
154
      parts << part
155
      parts << Tokens.new while parts.size < sizes.size
156
      parts
157
    end
158
    
159
    # Dumps the object into a String that can be saved
160
    # in files or databases.
161
    #
162
    # The dump is created with Marshal.dump;
163
    # In addition, it is gzipped using GZip.gzip.
164
    #
165
    # The returned String object includes Undumping
166
    # so it has an #undump method. See Tokens.load.
167
    #
168
    # You can configure the level of compression,
169
    # but the default value 7 should be what you want
170
    # in most cases as it is a good compromise between
171
    # speed and compression rate.
172
    #
173
    # See GZip module.
174
    def dump gzip_level = 7
175
      dump = Marshal.dump self
176
      dump = GZip.gzip dump, gzip_level
177
      dump.extend Undumping
178
    end
179
    
180
    # Return the actual number of tokens.
181
    def count
182
      size / 2
183
    end
184
    
185
    # Include this module to give an object an #undump
186
    # method.
187
    #
188
    # The string returned by Tokens.dump includes Undumping.
189
    module Undumping
190
      # Calls Tokens.load with itself.
191
      def undump
192
        Tokens.load self
193
      end
194
    end
195
    
196
    # Undump the object using Marshal.load, then
197
    # unzip it using GZip.gunzip.
198
    #
199
    # The result is commonly a Tokens object, but
200
    # this is not guaranteed.
201
    def Tokens.load dump
202
      dump = GZip.gunzip dump
203
      @dump = Marshal.load dump
204
    end
205
    
206
    alias text_token push
207
    def begin_group kind; push :begin_group, kind end
208
    def end_group kind; push :end_group, kind end
209
    def begin_line kind; push :begin_line, kind end
210
    def end_line kind; push :end_line, kind end
211
    alias tokens concat
212
    
213
  end
214
  
215
end