Pascalのparserを作る
code:ts
import {
all,
choice,
lazy,
match,
Parser,
text,
///////////////////////////////////////////////////////////////////////
const whitespace = match(/\s*/m);
const nothing = text("");
const token = <S extends string>(str: S) => text(str).trim(whitespace);
const identifier = match(/\S+/).trim(whitespace);
const colon = token(":");
const semicolon = token(";");
/** end of statement */
const eos = semicolon;
const comma = token(",");
const lParen = token("(");
const rParen = token(")");
const programHead = token("program").chain(() =>
identifier.and(programFiles.or(nothing))
);
const programFiles = identifier.sepBy(comma, 1).wrap(lParen, rParen);
const integer = match(/-?(0|1-9\d*)/).map(Number).trim(whitespace); const label = integer;
const labelDeclarationsPart = label.sepBy(comma, 1).wrap(
token("label"),
eos,
).or(nothing);
const constantExpression: Parser<string | number | boolean> = lazy(() =>
choice(
constantExpression.skip(token("+")).and(constantExpression).map((a, b) => a + b
),
constantExpression.skip(token("-")).and(constantExpression).map((a, b) => a - b
),
constantExpression.skip(token("*")).and(constantExpression).map((a, b) => a * b
),
constantExpression.skip(token("/")).and(constantExpression).map((a, b) => a / b
),
constantExpression.skip(token("div")).and(constantExpression).map((
) => Math.floor(a / b)),
constantExpression.skip(token("mod")).and(constantExpression).map((
) => a % b),
constantExpression.skip(token("and")).and(constantExpression).map((
) => a & b),
constantExpression.skip(token("or")).and(constantExpression).map((a, b) => a | b
),
constantExpression.skip(token("not")).map((a) => !a),
constantExpression.skip(token("=")).and(constantExpression).map((a, b) => a === b
),
constantExpression.skip(token("<>")).and(constantExpression).map((a, b) => a !== b
),
constantExpression.skip(token("<")).and(constantExpression).map((a, b) => a < b
),
constantExpression.skip(token("<=")).and(constantExpression).map((a, b) => a <= b
),
constantExpression.skip(token(">")).and(constantExpression).map((a, b) => a > b
),
constantExpression.skip(token(">=")).and(constantExpression).map((a, b) => a >= b
),
constantExpression.wrap(lParen, rParen),
constant,
identifier,
)
);
const constDeclaration = identifier.skip(token("=")).and(constantExpression)
.skip(eos);
const constDeclarationsPart = constDeclaration.sepBy(comma, 1).wrap(
token("const"),
eos,
).or(nothing);
const char = match(/^'/).or(text("''")); const character = char.trim(text("'"));
const literal = char.repeat().map((chars) => chars.join("")).trim(text("'"));
const real = match(/-?(0|1-9\d*)\.\d+(e+-?\d+)?/i).map(Number).trim( whitespace,
);
const pascalTrue = token("true").map(() => true);
const pascalFalse = token("false").map(() => false);
const constant = choice(
integer,
real,
pascalTrue,
pascalFalse,
character,
literal,
);
const plus = token("+");
const minus = token("-");
const subrangeConstant = choice(
integer,
plus.next(integer.or(identifier)),
minus.next(integer.or(identifier)).map((n) => -n),
);
const pascalType: Parser<unknown> = lazy(() =>
choice(
arrayType,
recordType,
fileType,
pointerType,
subrangeType,
identifier,
)
);
const subrangeType = choice(subrangeConstant, identifier).skip(token("..")).and(
choice(subrangeConstant, identifier),
);
const indexType = subrangeType.or(identifier);
const arrayType = match(/(packed\s+)?array/).trim(whitespace).next(
indexType.wrap(token("token("")).skip(token("of")).and(pascalType),
);
const pointerType = token("^").next(identifier);
const recordCase: Parser<unknown> = lazy(() =>
token("i_num").next(fieldList.wrap(lParen, rParen).wrap(colon, eos))
);
const recordSection = choice(
identifier.sepBy(comma, 1).skip(colon).and(pascalType),
token("case").next(token("identifier")).next(token("of")).next(
recordCase.sepBy(whitespace, 1),
),
);
const fieldList = recordSection.sepBy(semicolon);
const recordType = token("packed").or(nothing).next(token("record")).next(
fieldList,
);
const fileType = token("packed").or(nothing).next(token("file")).next(
token("of"),
)
.next(pascalType);
const typeDefinition = identifier.sepBy(comma, 1).skip(token("=")).and(
pascalType,
).skip(eos);
const typeDefinitionPart = token("type").next(typeDefinition.sepBy(comma, 1))
.or(
nothing,
);
const variableDefinition = identifier.skip(colon).and(pascalType).skip(eos);
const variableDefinitionPart = token("var").next(
variableDefinition.sepBy(whitespace, 1),
).or(nothing);
const statement = identifier;
const body = statement.sepBy(semicolon, 1).wrap(token("begin"), token("end"))
.skip(token(".")).or(nothing);
const caseLabel = integer.or(token("others"));
const endCase = semicolon.or(nothing).next(token("end"));
const expression: Parser<string | number | boolean> = lazy(() =>
choice(
expression.skip(token("+")).and(expression).map((a, b) => a + b), expression.skip(token("-")).and(expression).map((a, b) => a - b), expression.skip(token("*")).and(expression).map((a, b) => a * b), expression.skip(token("/")).and(expression).map((a, b) => a / b), expression.skip(token("div")).and(expression).map((
) => Math.floor(a / b)),
expression.skip(token("mod")).and(expression).map((
) => a % b),
expression.skip(token("and")).and(expression).map((
) => a & b),
expression.skip(token("or")).and(expression).map((a, b) => a | b), expression.skip(token("not")).map((a) => !a),
expression.skip(token("=")).and(expression).map((a, b) => a === b), expression.skip(token("<>")).and(expression).map((a, b) => a !== b), expression.skip(token("<")).and(expression).map((a, b) => a < b), expression.skip(token("<=")).and(expression).map((a, b) => a <= b), expression.skip(token(">")).and(expression).map((a, b) => a > b), expression.skip(token(">=")).and(expression).map((a, b) => a >= b), expression.wrap(lParen, rParen),
variable,
constant,
identifier.and(parameterList),
)
);
const variable: Parser<unknown> = lazy(() =>
choice(
identifier,
variable.and(expression.wrap(lParen, rParen)),
variable.skip(token(".")).and(identifier),
variable.skip(token("^")),
)
);
const parameter = expression.and(colon.next(integer).or(nothing));
const parameterList = parameter.sepBy(comma, 1).wrap(lParen, rParen);
/* "variable" here really means "identifier" */
const procedureStatement = choice(
variable,
variable.skip(token("(").and(token(")"))),
variable.and(parameterList),
);
const block = token("forward").or(all(
labelDeclarationsPart,
constDeclarationsPart,
typeDefinitionPart,
variableDefinitionPart,
compoundStatement,
));
const pascal = all(
programHead,
labelDeclarationsPart,
constDeclarationsPart,
typeDefinitionPart,
variableDefinitionPart,
functionDefinitonPart,
body,
);