deelang/src/parser.rs
2024-11-18 22:02:33 -06:00

472 lines
14 KiB
Rust

use std::fmt;
#[derive(Debug,PartialEq,Clone)]
pub enum Stmt {
Assignment(String, Expr),
Conditional(Vec<GuardedBlock>, Option<Block>),
Loop(Loop),
BareExpr(Expr),
}
#[derive(Debug,PartialEq,Clone)]
pub enum Expr {
Id(String),
Atom(Atom),
Funcall(String, Vec<Expr>),
Funcdef(Option<String>, Block),
UnaryOp(String, Box<Expr>),
BinaryOp(String, Box<Expr>, Box<Expr>),
}
pub type Block = Vec<Stmt>;
#[derive(Debug,PartialEq,Clone)]
pub enum Atom {
String(String),
Num(f64),
Bool(bool),
}
#[derive(Debug,PartialEq,Clone)]
pub struct GuardedBlock {
pub guard: Expr,
pub block: Block,
}
#[derive(Debug,PartialEq,Clone)]
pub enum Loop {
Bare(Block),
Until(GuardedBlock),
Over(String, Expr, Block),
}
peg::parser! {
grammar deelang_parser() for str {
pub rule program() -> Vec<Stmt>
= __* s:stmt()* { s }
pub rule stmt() -> Stmt
= a:assignment() { a } /
c:conditional() { c } /
l:_loop() { Stmt::Loop(l) } /
e:expr() stop() { Stmt::BareExpr(e) }
rule expr() -> Expr = precedence! {
e1:(@) "=" _ e2:@ { Expr::BinaryOp("=".to_string(), Box::new(e1), Box::new(e2))}
--
e1:(@) r:relop() e2:@ { Expr::BinaryOp(r, Box::new(e1), Box::new(e2)) }
--
"-" _ e1:@ { Expr::UnaryOp("-".to_string(), Box::new(e1)) }
--
e1:(@) "+" _ e2:@ { Expr::BinaryOp("+".to_string(), Box::new(e1), Box::new(e2)) }
e1:(@) "-" _ e2:@ { Expr::BinaryOp("-".to_string(), Box::new(e1), Box::new(e2)) }
--
e1:(@) "*" _ e2:@ { Expr::BinaryOp("*".to_string(), Box::new(e1), Box::new(e2)) }
e1:(@) "/" _ e2:@ { Expr::BinaryOp("/".to_string(), Box::new(e1), Box::new(e2)) }
e1:(@) "%" _ e2:@ { Expr::BinaryOp("%".to_string(), Box::new(e1), Box::new(e2)) }
--
"(" _ e:expr() ")" _ { e }
['"'] s:$((!['"'] [_] / r#"\""#)*) ['"'] { Expr::Atom(Atom::String(s.to_string())) }
f:funcall() { f }
f:funcdef() { f }
b:boolean() _ { Expr::Atom(Atom::Bool(b)) }
i:id() _ { Expr::Id(i) }
n:num() _ { Expr::Atom(Atom::Num(n)) }
}
rule boolean() -> bool
= b:$("true" / "false") { b.parse().unwrap() }
rule id() -> String
= i:$(letter() (letter() / digit() / ['?'|'.'|'-'])*) _ { i.to_string() }
rule assignment() -> Stmt
= i:id() "<-" _ e:expr() stop() { Stmt::Assignment(i, e) }
rule relop() -> String
= r:$("<" / ">" / "<=" / ">=") _ { r.to_string() }
rule num() -> f64
= n:$(digit()+ "."? digit()* / "." digit()+) _ { n.parse().unwrap() }
rule funcall() -> Expr
= i:id() "(" _ e:(expr() ** ("," _)) __* ")" _ { Expr::Funcall(i, e) }
rule funcdef() -> Expr
= i:id()? "->" _ b:(block()) { Expr::Funcdef(i, b) }
rule conditional() -> Stmt
= i:_if() __* ei:elif()* e:_else()? __* { Stmt::Conditional([vec![i], ei].concat(), e) }
rule _if() -> GuardedBlock
= "if" _ g:expr() b:block() {
GuardedBlock {
guard: g,
block: b,
}
}
rule elif() -> GuardedBlock
= "elif" _ g:expr() b:block() __* {
GuardedBlock {
guard: g,
block: b
}
}
rule _else() -> Block
= "else" _ b:block() { b }
rule _loop() -> Loop
= "loop" _ "until" _ e:expr() b:block() __* {
Loop::Until(GuardedBlock{
guard:e,
block: b,
})
} /
"loop" _ "over" _ i:id() "in" _ e:expr() b:block() __* {
Loop::Over(i, e, b)
} /
"loop" _ b:block() __* { Loop::Bare(b) }
rule block() -> Block
= i:indented_block() { i } /
e:expr() { vec![Stmt::BareExpr(e)] }
rule indented_block() -> Block
= stop() indent() __* s:stmt()+ dedent() { s }
rule letter()
= ['A'..='Z'] / ['a'..='z']
rule digit()
= ['0'..='9']
rule stop()
= __+ / eof()
rule indent()
= ">>>"
rule dedent()
= "<<<"
rule _ // Non-meaningful whitespace
= ['\t'|' ']*
rule __ // End Of Statement (comment, newline, eof, TODO semicolon)
= comment()? newline() / comment() &eof()
rule comment()
= "#" (!newline() [_])* &(newline() / eof())
rule newline()
= "\r\n" / "\r" / "\n"
rule eof()
= ![_]
}
}
pub fn preprocess(input: &str) -> String {
let mut stack = vec![0];
let mut output = String::new();
for line in input.lines() {
let mut count = 0;
for c in line.chars() {
if c == ' ' || c == '\t' {
count += 1;
} else if c == '#' {
break;
} else {
use std::cmp::Ordering::*;
let curr = stack.last().unwrap();
match curr.cmp(&count) {
Less => {
stack.push(count);
output.push_str(">>>");
},
Greater => {
while stack.last().unwrap() > &count {
output.push_str("<<<");
output.push('\n');
stack.pop();
}
}
Equal => (),
}
break;
}
}
output.push_str(line.trim());
output.push('\n');
}
while stack.len() > 1 {
// place any dedents needed to balance the program
output.push_str("<<<");
output.push('\n');
stack.pop();
}
output
}
pub fn parse(prgm: &str) -> Vec<Stmt> {
let prgm = preprocess(prgm);
deelang_parser::program(&prgm).unwrap()
}
impl fmt::Display for Stmt {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Stmt::Assignment(id, expr) => writeln!(f, "{} <- {}", id, expr),
Stmt::BareExpr(expr) => writeln!(f, "{}", expr),
Stmt::Conditional(guarded_blocks, else_block) => {
let (if_block, elif_blocks) = guarded_blocks.split_first().unwrap();
write!(f, "if {}", if_block.guard)?;
write_block(f, &if_block.block)?;
for elif_block in elif_blocks {
write!(f, "elif {}", elif_block.guard)?;
write_block(f, &if_block.block)?;
}
if let Some(block) = else_block {
write!(f, "else")?;
write_block(f, block)?;
}
Ok(())
}
Stmt::Loop(loop_inner) => match &loop_inner {
Loop::Bare(block) => {
write!(f, "loop")?;
write_block(f, block)
}
Loop::Over(id, pred, block) => {
write!(f, "loop over {} in {}", id, pred)?;
write_block(f, block)
}
Loop::Until(guarded_block) => {
write!(f, "loop until {}", guarded_block.guard)?;
write_block(f, &guarded_block.block)
}
},
}
}
}
fn write_block (f: &mut fmt::Formatter<'_>, block: &Block) -> fmt::Result {
if block.len() == 1 {
writeln!(f, " {}", block[0])
} else {
writeln!(f)?;
writeln!(f, ">>>")?;
for stmt in block {
write!(f, "{}", stmt)?;
}
writeln!(f, "<<<")
}
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expr::Atom(a) => write!(f, "{}", a),
Expr::Id(id) => write!(f, "{}", id),
Expr::BinaryOp(op, e1, e2) => write!(f, "{} {} {}", e1, op, e2),
Expr::UnaryOp(op, e) => write!(f, "{} {}", op, e),
Expr::Funcdef(_arg, _block) => todo!(),
Expr::Funcall(_id, _args) => todo!(),
}
}
}
impl fmt::Display for Atom {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Atom::String(a) => write!(f, "\"{}\"", a),
Atom::Num(a) => write!(f, "{}", a),
Atom::Bool(a) => write!(f, "{}", a),
}
}
}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_comments() {
let prgm = r"## This is a comment
apple <- 1 ## This is too
## This comment ends the file";
let expected = vec![Stmt::Assignment("apple".to_string(), Expr::Atom(Atom::Num(1.0)))];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_funcall() {
let expected = vec![
Stmt::BareExpr(Expr::Funcall("pear".to_string(), vec![])),
Stmt::BareExpr(Expr::Funcall("pear".to_string(),
vec![Expr::Id("x".to_string()), Expr::Id("y".to_string())],
))
];
let prgm = r"pear()
pear(x, y)";
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_assignment() {
let prgm = r"apple <- 1
apple <- pear(x, y)";
let expected = vec![
Stmt::Assignment("apple".to_string(),
Expr::Atom(Atom::Num(1.0))),
Stmt::Assignment("apple".to_string(),
Expr::Funcall("pear".to_string(),
vec![
Expr::Id("x".to_string()),
Expr::Id("y".to_string()),
])),
];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_operators() {
let prgm = r"three <- 1 + 2
one <- 3 - 2
four <- (3 - 1) * 2";
let expected = vec![
Stmt::Assignment("three".to_string(),
Expr::BinaryOp(
"+".to_string(),
Box::new(Expr::Atom(Atom::Num(1.0))),
Box::new(Expr::Atom(Atom::Num(2.0))),
)),
Stmt::Assignment("one".to_string(),
Expr::BinaryOp(
"-".to_string(),
Box::new(Expr::Atom(Atom::Num(3.0))),
Box::new(Expr::Atom(Atom::Num(2.0))),
)),
Stmt::Assignment("four".to_string(),
Expr::BinaryOp(
"*".to_string(),
Box::new(Expr::BinaryOp(
"-".to_string(),
Box::new(Expr::Atom(Atom::Num(3.0))),
Box::new(Expr::Atom(Atom::Num(1.0))),
)),
Box::new(Expr::Atom(Atom::Num (2.0))),
))
];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_compound_expression() {
let prgm = "apple <- pear(x, y) + z";
let expected = vec![
Stmt::Assignment("apple".to_string(),
Expr::BinaryOp(
"+".to_string(),
Box::new(Expr::Funcall(
"pear".to_string(),
vec![Expr::Id("x".to_string()), Expr::Id("y".to_string())],
)),
Box::new(Expr::Id("z".to_string()))
)
),
];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_funcdef() {
let prgm = r"foo <- -> bar()
foo <- ->
>>>
bar()
baz()
<<<
foo <- x -> y -> x * y";
let expected = vec![
Stmt::Assignment(
"foo".to_string(),
Expr::Funcdef(
None,
vec![Stmt::BareExpr(Expr::Funcall("bar".to_string(), vec![]))],
),
),
Stmt::Assignment(
"foo".to_string(),
Expr::Funcdef(
None,
vec![
Stmt::BareExpr(Expr::Funcall("bar".to_string(), vec![])),
Stmt::BareExpr(Expr::Funcall("baz".to_string(), vec![])),
],
)
),
Stmt::Assignment(
"foo".to_string(),
Expr::Funcdef(
Some("x".to_string()),
vec![
Stmt::BareExpr(Expr::Funcdef(
Some("y".to_string()),
vec![
Stmt::BareExpr(Expr::BinaryOp(
"*".to_string(),
Box::new(Expr::Id("x".to_string())),
Box::new(Expr::Id("y".to_string())),
))
],
))
],
)
),
];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_conditional() {
let prgm = r"if foo
>>>bar()
<<<
elif baz
>>>foobar()
<<<
else
>>>quux()
<<<";
let expected = vec![Stmt::Conditional(
vec![
GuardedBlock {
guard: Expr::Id("foo".to_string()),
block: vec![Stmt::BareExpr(Expr::Funcall("bar".to_string(), vec![]))]
},
GuardedBlock {
guard: Expr::Id("baz".to_string()),
block: vec![Stmt::BareExpr(Expr::Funcall("foobar".to_string(), vec![]))],
},
],
Some(vec![Stmt::BareExpr(Expr::Funcall("quux".to_string(), vec![]))]),
)];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_loop() {
let prgm = r"loop until i > 100 a";
let expected = vec![Stmt::Loop(Loop::Until(GuardedBlock {
guard: Expr::BinaryOp(
">".to_string(),
Box::new(Expr::Id("i".to_string())),
Box::new(Expr::Atom(Atom::Num(100.0))),
),
block: vec![Stmt::BareExpr(Expr::Id("a".to_string()))],
}))];
assert_eq!(deelang_parser::program(prgm).unwrap(), expected);
}
#[test]
fn test_preprocess() {
let prgm = r"
.
.
.
.
.
## Hello World";
let expected = r"
>>>.
>>>.
<<<
.
>>>.
<<<
<<<
.
## Hello World
";
assert_eq!(preprocess(prgm), expected);
}
}