Frames

Tokenizer

0
1
2
1require_relative "./token"
2require_relative "./rule"
3
4class Tokenizer
5 class << self
6 def tokenize(markdown)
7 scan(markdown, [])
8 end
9
10 private
11
12 def scan(markdown, tokens)
13 if markdown.length == 0
14 return tokens.append(eof_token)
15 end
16
17 next_token = nil
18
19 rules.each do |rule|
20 if rule.applies_to?(markdown[0])
21 next_token = rule.tokenize(markdown)
22 break
23 end
24 end
25
26 if next_token.nil?
27 next_token = text_token(markdown)
28 end
29
30 scan(markdown[next_token.length..-1], tokens + [next_token])
31 end
32
33 def eof_token
34 Token.new(type: :eof, value: "")
35 end
36
37 def text_token(markdown)
38 text = markdown.each_char.take_while { |char|
39 !char.match?(Regexp.union(rules.map(&:regex)))
40 }.join("")
41
42 Token.new(type: :text, value: text)
43 end
44
45 def rules
46 [
47 Rule.new(
48 regex: Regexp.new("\_"),
49 tokenize_rule: ->(markdown, regex) {
50 Token.new(type: :underscore, value: "_")
51 }
52 ),
53 Rule.new(
54 regex: Regexp.new("\\*"),
55 tokenize_rule: ->(markdown, regex) {
56 Token.new(type: :asterisk, value: "*")
57 }
58 ),
59 Rule.new(
60 regex: Regexp.new("\\["),
61 tokenize_rule: ->(markdown, regex) {
62 Token.new(type: :open_square_bracket, value: "[")
63 }
64 ),
65 Rule.new(
66 regex: Regexp.new("\\]"),
67 tokenize_rule: ->(markdown, regex) {
68 Token.new(type: :close_square_bracket, value: "]")
69 }
70 ),
71 Rule.new(
72 regex: Regexp.new("\\("),
73 tokenize_rule: ->(markdown, regex) {
74 Token.new(type: :open_parenthesis, value: "(")
75 }
76 ),
77 Rule.new(
78 regex: Regexp.new("\\)"),
79 tokenize_rule: ->(markdown, regex) {
80 Token.new(type: :close_parenthesis, value: ")")
81 }
82 ),
83 Rule.new(
84 regex: Regexp.new("\n"),
85 tokenize_rule: ->(markdown, regex) {
86 Token.new(type: :newline, value: "\n")
87 }
88 ),
89 ]
90 end
91 end
92end
93