手作り有限状態機械で字句解析

Last Modified: Wed Apr 7 10:54:14 UTC 2010

正規表現よりも(場合によっては)扱いやすい手作りの有限状態機械を使った字句解析の紹介。(約25分)


a.py

#!/usr/bin/env python

class Parser(object):

  def __init__(self):
    self.parse1 = self.parse_main
    self.items = []
    self.item = ''
    return

  def feed(self, text):
    i = 0
    while i < len(text):
      (self.parse1, i) = self.parse1(text, i)
    return
  
  def parse_main(self, text, i):
    c = text[i]
    if c == ',':
      self.finish_item()
      return (self.parse_main, i+1)
    elif c == '"':
      return (self.parse_quote, i+1)
    elif c == '\\':
      return (self.parse_bs, i+1)
    elif c.isspace():
      return (self.parse_main, i+1)
    else:
      self.item += c
      return (self.parse_main, i+1)

  def parse_bs(self, text, i):
    c = text[i]
    self.item += c
    return (self.parse_main, i+1)

  def parse_quote(self, text, i):
    c = text[i]
    if c == '"':
      return (self.parse_main, i+1)
    elif c == '\\':
      return (self.parse_quote_bs, i+1)
    self.item += c
    return (self.parse_quote, i+1)

  def parse_quote_bs(self, text, i):
    c = text[i]
    self.item += c
    return (self.parse_quote, i+1)

  def finish_item(self):
    self.items.append(self.item)
    self.item = ''
    return

  def finish(self):
    self.finish_item()
    return self.items

text1 = '  a\\"  , "b\\"'
text2 = '\\\\ " ,  c     '
print text1+text2
p = Parser()
p.feed(text1)
p.feed(text2)
print p.finish()

a.pyの状態遷移図


Yusuke Shinyama