14: def scan_tokens tokens, options
15:
16: value_expected = nil
17: state = :initial
18: key_indent = indent = 0
19:
20: until eos?
21:
22: kind = nil
23: match = nil
24: key_indent = nil if bol?
25:
26: if match = scan(/ +[\t ]*/)
27: kind = :space
28:
29: elsif match = scan(/\n+/)
30: kind = :space
31: state = :initial if match.index(?\n)
32:
33: elsif match = scan(/#.*/)
34: kind = :comment
35:
36: elsif bol? and case
37: when match = scan(/---|\.\.\./)
38: tokens << [:open, :head]
39: tokens << [match, :head]
40: tokens << [:close, :head]
41: next
42: when match = scan(/%.*/)
43: tokens << [match, :doctype]
44: next
45: end
46:
47: elsif state == :value and case
48: when !check(/(?:"[^"]*")(?=: |:$)/) && scan(/"/)
49: tokens << [:open, :string]
50: tokens << [matched, :delimiter]
51: tokens << [matched, :content] if scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
52: tokens << [matched, :delimiter] if scan(/"/)
53: tokens << [:close, :string]
54: next
55: when match = scan(/[|>][-+]?/)
56: tokens << [:open, :string]
57: tokens << [match, :delimiter]
58: string_indent = key_indent || column(pos - match.size - 1)
59: tokens << [matched, :content] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
60: tokens << [:close, :string]
61: next
62: when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
63: tokens << [match, :string]
64: string_indent = key_indent || column(pos - match.size - 1)
65: tokens << [matched, :string] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
66: next
67: end
68:
69: elsif case
70: when match = scan(/[-:](?= |$)/)
71: state = :value if state == :colon && (match == ':' || match == '-')
72: state = :value if state == :initial && match == '-'
73: kind = :operator
74: when match = scan(/[,{}\[\]]/)
75: kind = :operator
76: when state == :initial && match = scan(/[\w.() ]*\S(?=: |:$)/)
77: kind = :key
78: key_indent = column(pos - match.size - 1)
79:
80: state = :colon
81: when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?=: |:$)/)
82: tokens << [:open, :key]
83: tokens << [match[0,1], :delimiter]
84: tokens << [match[1..-2], :content]
85: tokens << [match[-1,1], :delimiter]
86: tokens << [:close, :key]
87: key_indent = column(pos - match.size - 1)
88:
89: state = :colon
90: next
91: when scan(/(![\w\/]+)(:([\w:]+))?/)
92: tokens << [self[1], :type]
93: if self[2]
94: tokens << [':', :operator]
95: tokens << [self[3], :class]
96: end
97: next
98: when scan(/&\S+/)
99: kind = :variable
100: when scan(/\*\w+/)
101: kind = :global_variable
102: when scan(/<</)
103: kind = :class_variable
104: when scan(/\d\d:\d\d:\d\d/)
105: kind = :oct
106: when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
107: kind = :oct
108: when scan(/:\w+/)
109: kind = :symbol
110: when scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
111: kind = :error
112: when scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
113: kind = :error
114: end
115:
116: else
117: getch
118: kind = :error
119:
120: end
121:
122: match ||= matched
123:
124: if $CODERAY_DEBUG and not kind
125: raise_inspect 'Error token %p in line %d' %
126: [[match, kind], line], tokens, state
127: end
128: raise_inspect 'Empty token', tokens, state unless match
129:
130: tokens << [match, kind]
131:
132: end
133:
134: tokens
135: end