最低限の lex/yacc

最低限の lex / yacc

https://kenichi-asai.github.io/lex-yacc/

OCamlのocamllexとocamlyaccを使った、最低限のlexとyaccの使い方の解説

こちらも参考にした

ocamllexとocamlyaccで電卓をつくる (OCaml)

https://gkuga.hatenablog.com/entry/2020/03/30/101730

→ 「最低限のlex/yacc」では連携済みのlexとyaccのコードが「ドンッ」と出てきたため、どこまでがlexでどこまでがyaccの切り分けがわかりづらかった。この文書ではlex単独で動かしてからyaccの連携を行なっていたため、そのへんが分かりやすかった

引き算電卓

整数、引き算、掛け算、丸括弧からのみなる数式を受け付ける電卓

引き算電卓に渡すことのできる数式を引き算言語と呼ぶことにする

引き算言語のトークン

( , )

-, *

整数（ [0-9]+）

(* 以降、行末までをコメントとする

トークンの定義は lexer.mll に書く

引き算言語の文法

<式>:=<式>-<式> | <式>*<式> | -<式> | 整数 | (<式>)

<...> で囲まれた文法項目は非終端記号と呼ばれる

文法の定義は parser.mlyに書く

code:例

(* test *)

-12 * (14 - 8 * 2)

ファイルの構成

lexer.mll 字句の定義

parser.mly 文法の定義

syntax.ml 構文木の定義

eval.ml 入力された式の計算

main.ml メインファイル

Duneプロジェクトを作成

code:sh

$ dune init project dentaku

構文木を定義

code:lib/syntax.ml

type op_t = Minus | Times

type t = Num of int (* 整数 *)

| Op of t * op_t * t (* 二項演算 *)

let rec string_of_expr expr = match expr with

Num n -> string_of_int n

| Op (e1, op, e2) ->

let op_str = match op with

| Minus -> "-"

| Times -> "*"

"(" ^ string_of_expr e1 ^ op_str ^ string_of_expr e2 ^ ")"

let print expr =

let str = string_of_expr expr

in print_string str

字句解析器を定義

code:lib/lexer.mll

{

(* 補助的な変数、関数、型などを定義 *)

}

(* 正規表現の略記 *)

let space = ' '\t' '\n' '\r'

let digit = '0'-'9'

(* 字句解析の規則 *)

rule token = parse

space+ { token lexbuf } (* 空白は読み飛ばす *)

| "(*" ^'\n'* "\n" { token lexbuf } (* コメントは読み飛ばす *)

| digit+ { ("NUMBER", Lexing.lexeme lexbuf) }

| "(" { ("LPAREN", Lexing.lexeme lexbuf) }

| ")" { ("RPAREN", Lexing.lexeme lexbuf) }

| "-" { ("MINUS", Lexing.lexeme lexbuf) }

| "*" { ("TIMES", Lexing.lexeme lexbuf) }

| eof { ("EOF", "") }

| _ { failwith "unknown token" }

ocamllex を呼び出すよう lib/dune に設定を追加

code:lib/dune

(library

(name dentaku))

(ocamllex lexer)

dune build 後、dune utop から呼び出してみる。いい感じに動いてそう。

code:utop

# let buff = Lexing.from_string "50 - (10 * 2)";;

# Dentaku.Lexer.token buff;;

- : string * string = ("NUMBER", "50")

# Dentaku.Lexer.token buff;;

- : string * string = ("MINUS", "-")

Dentaku.Lexer.token buff;;

- : string * string = ("LPAREN", "(")

# Dentaku.Lexer.token buff;;

- : string * string = ("NUMBER", "10")

# Dentaku.Lexer.token buff;;

- : string * string = ("TIMES", "*")

# Dentaku.Lexer.token buff;;

- : string * string = ("NUMBER", "2")

# Dentaku.Lexer.token buff;;

- : string * string = ("RPAREN", ")")

# Dentaku.Lexer.token buff;;

- : string * string = ("EOF", "")

構文解析器を定義して、字句解析器と連携させる

code:diff

diff --git a/dentaku/lib/dune b/dentaku/lib/dune

index 4f855e4..1d847bd 100644

--- a/dentaku/lib/dune

+++ b/dentaku/lib/dune

@@ -2,3 +2,4 @@

(name dentaku))

(ocamllex lexer)

+(ocamlyacc parser)

diff --git a/dentaku/lib/lexer.mll b/dentaku/lib/lexer.mll

index 8c5f615..ae1e306 100644

--- a/dentaku/lib/lexer.mll

+++ b/dentaku/lib/lexer.mll

@@ -10,10 +10,10 @@ let digit = '0'-'9'

rule token = parse

space+ { token lexbuf } (* 空白は読み飛ばす *)

| "(*" ^'\n'* "\n" { token lexbuf } (* コメントは読み飛ばす *)

- | digit+ { ("NUMBER", Lexing.lexeme lexbuf) }

- | "(" { ("LPAREN", Lexing.lexeme lexbuf) }

- | ")" { ("RPAREN", Lexing.lexeme lexbuf) }

- | "-" { ("MINUS", Lexing.lexeme lexbuf) }

- | "*" { ("TIMES", Lexing.lexeme lexbuf) }

- | eof { ("EOF", "") }

+ | digit+ { Parser.NUMBER(int_of_string(Lexing.lexeme lexbuf)) }

+ | "(" { Parser.LPAREN }

+ | ")" { Parser.RPAREN }

+ | "-" { Parser.MINUS }

+ | "*" { Parser.TIMES }

+ | eof { Parser.EOF }

| _ { failwith "unknown token" }

diff --git a/dentaku/lib/parser.mly b/dentaku/lib/parser.mly

new file mode 100644

index 0000000..90a54b5

--- /dev/null

+++ b/dentaku/lib/parser.mly

@@ -0,0 +1,36 @@

+%{

+ (* 補助的な変数、関数、型などの定義 *)

+%}

+%token LPAREN RPAREN

+%token MINUS TIMES

+%token <int> NUMBER

+%token EOF

+// エントリーポイントの定義

+%start start

+// 非終端記号の型をここで宣言する

+%type <Syntax.t> start

+// 演算子の優先順位を指定する

+// 下に行くほど優先順位が高い

+%left MINUS

+%left TIMES

+%nonassoc UNARY

+// 文法規則の定義

+%%

+start:

+| expr EOF { $1 }

+simple_expr:

+| NUMBER { Syntax.Num($1) }

+| LPAREN expr RPAREN { $2 }

+expr:

+| simple_expr { $1 }

+| expr MINUS expr { Syntax.Op($1, Syntax.Minus, $3) }

+| expr TIMES expr { Syntax.Op($1, Syntax.Times, $3) }

+| MINUS expr %prec UNARY { Syntax.Op(Syntax.Num(0), Syntax.Minus, $2) }

utop から呼んでみる。いい感じに動いてそう。

code:utop

# let buff = Lexing.from_string "50 - (10 * 2)";;

# Dentaku.Parser.start Dentaku.Lexer.token(buff);;

- : Dentaku.Syntax.t =

Dentaku.Syntax.Op (Dentaku.Syntax.Num 50, Dentaku.Syntax.Minus,

Dentaku.Syntax.Op (Dentaku.Syntax.Num 10, Dentaku.Syntax.Times,

Dentaku.Syntax.Num 2))

構文木を評価する eval.ml を実装する

code:lib/eval.ml

let rec f expr =

match expr with

Syntax.Num(n) -> n

| Syntax.Op(e1, op, e2) ->

let v1 = f e1 in

let v2 = f e2 in

match op with

Syntax.Minus -> v1 - v2

| Syntax.Times -> v1 * v2

utop で動かしてみる。いい感じに動いてそう。

code:utop

# let buff = Lexing.from_string "50 - (10 * 2)";;

# let expr = Dentaku.Parser.start Dentaku.Lexer.token(buff);;

# Dentaku.Eval.f expr;;

- : int = 30

メイン関数から呼び出してみる。

code:bin/main.ml

let () =

let buff = Lexing.from_channel stdin in

let expr = Dentaku.Parser.start Dentaku.Lexer.token(buff) in

let result = Dentaku.Eval.f expr in

print_int result;

print_newline ()

電卓が動いた〜

code:sh

$ dune build

$ echo "50 - (10 * 3)" | dune exec dentaku