1use crate::{
4 ast::{BinOp, Expr, UnaryOp},
5 error::{Error, SourcePosition},
6};
7
8#[derive(Clone, Debug)]
9pub enum Token {
10 Ident(String),
11 Str(String),
12 Int(i64),
13 Float(f64),
14 Bool(bool),
15 Null,
16 Dot,
17 Pipe,
18 Comma,
19 Colon,
20 Equals,
21 LParen,
22 RParen,
23 LBracket,
24 RBracket,
25 OpEq,
26 OpNeq,
27 OpLt,
28 OpGt,
29 OpLte,
30 OpGte,
31 Plus,
32 Minus,
33 Star,
34 Slash,
35 Percent,
36 KeywordAnd,
37 KeywordOr,
38 KeywordNot,
39 KeywordIn,
40}
41
42pub struct ExprParser {
43 tokens: Vec<(Token, SourcePosition)>,
44 pos: usize,
45}
46
47impl ExprParser {
48 pub fn new(tokens: Vec<(Token, SourcePosition)>) -> Self {
49 Self {
50 tokens,
51 pos: 0,
52 }
53 }
54
55 pub fn parse_all(mut self) -> Result<Expr, Error> {
56 let expr = self.parse_or()?;
57 if self.pos < self.tokens.len() {
58 let (_, pos) = &self.tokens[self.pos];
59 return Err(Error::syntax("unexpected trailing tokens", pos.line, pos.column));
60 }
61 Ok(expr)
62 }
63
64 fn peek_token(&self) -> Option<&Token> {
65 self.tokens.get(self.pos).map(|(t, _)| t)
66 }
67
68 fn advance(&mut self) -> Option<&(Token, SourcePosition)> {
69 let token = self.tokens.get(self.pos);
70 self.pos += 1;
71 token
72 }
73
74 fn expect(&mut self) -> Result<Token, Error> {
75 self.advance()
76 .map(|(t, _)| t.clone())
77 .ok_or_else(|| Error::parse("unexpected end of expression"))
78 }
79
80 fn expect_position(&mut self, msg: &str) -> Result<(Token, SourcePosition), Error> {
81 self.advance().cloned().ok_or_else(|| Error::syntax(msg, 0, 0))
82 }
83
84 fn expect_ident(&mut self) -> Result<String, Error> {
85 match self.expect()? {
86 Token::Ident(s) => Ok(s),
87 tok => Err(Error::parse(format!("expected identifier, got {tok:?}"))),
88 }
89 }
90
91 fn parse_or(&mut self) -> Result<Expr, Error> {
92 let mut left = self.parse_and()?;
93 while matches!(self.peek_token(), Some(Token::KeywordOr)) {
94 self.advance();
95 let right = self.parse_and()?;
96 left = Expr::BinOp {
97 left: Box::new(left),
98 op: BinOp::Or,
99 right: Box::new(right),
100 };
101 }
102 self.parse_filter_pipes(left)
103 }
104
105 fn parse_and(&mut self) -> Result<Expr, Error> {
106 let mut left = self.parse_comparison()?;
107 while matches!(self.peek_token(), Some(Token::KeywordAnd)) {
108 self.advance();
109 let right = self.parse_comparison()?;
110 left = Expr::BinOp {
111 left: Box::new(left),
112 op: BinOp::And,
113 right: Box::new(right),
114 };
115 }
116 Ok(left)
117 }
118
119 fn parse_comparison(&mut self) -> Result<Expr, Error> {
120 let left = self.parse_addition()?;
121 match self.peek_token() {
122 Some(Token::OpEq) => {
123 self.advance();
124 let right = self.parse_addition()?;
125 Ok(Expr::BinOp {
126 left: Box::new(left),
127 op: BinOp::Eq,
128 right: Box::new(right),
129 })
130 }
131 Some(Token::OpNeq) => {
132 self.advance();
133 let right = self.parse_addition()?;
134 Ok(Expr::BinOp {
135 left: Box::new(left),
136 op: BinOp::Neq,
137 right: Box::new(right),
138 })
139 }
140 Some(Token::OpLt) => {
141 self.advance();
142 let right = self.parse_addition()?;
143 Ok(Expr::BinOp {
144 left: Box::new(left),
145 op: BinOp::Lt,
146 right: Box::new(right),
147 })
148 }
149 Some(Token::OpGt) => {
150 self.advance();
151 let right = self.parse_addition()?;
152 Ok(Expr::BinOp {
153 left: Box::new(left),
154 op: BinOp::Gt,
155 right: Box::new(right),
156 })
157 }
158 Some(Token::OpLte) => {
159 self.advance();
160 let right = self.parse_addition()?;
161 Ok(Expr::BinOp {
162 left: Box::new(left),
163 op: BinOp::Lte,
164 right: Box::new(right),
165 })
166 }
167 Some(Token::OpGte) => {
168 self.advance();
169 let right = self.parse_addition()?;
170 Ok(Expr::BinOp {
171 left: Box::new(left),
172 op: BinOp::Gte,
173 right: Box::new(right),
174 })
175 }
176 Some(Token::KeywordIn) => {
177 self.advance();
178 let right = self.parse_addition()?;
179 Ok(Expr::BinOp {
180 left: Box::new(left),
181 op: BinOp::In,
182 right: Box::new(right),
183 })
184 }
185 _ => Ok(left),
186 }
187 }
188
189 fn parse_addition(&mut self) -> Result<Expr, Error> {
190 let mut left = self.parse_multiplication()?;
191 loop {
192 match self.peek_token() {
193 Some(Token::Plus) => {
194 self.advance();
195 let right = self.parse_multiplication()?;
196 left = Expr::BinOp {
197 left: Box::new(left),
198 op: BinOp::Add,
199 right: Box::new(right),
200 };
201 }
202 Some(Token::Minus) => {
203 self.advance();
204 let right = self.parse_multiplication()?;
205 left = Expr::BinOp {
206 left: Box::new(left),
207 op: BinOp::Sub,
208 right: Box::new(right),
209 };
210 }
211 _ => break,
212 }
213 }
214 Ok(left)
215 }
216
217 fn parse_multiplication(&mut self) -> Result<Expr, Error> {
218 let mut left = self.parse_unary()?;
219 loop {
220 match self.peek_token() {
221 Some(Token::Star) => {
222 self.advance();
223 let right = self.parse_unary()?;
224 left = Expr::BinOp {
225 left: Box::new(left),
226 op: BinOp::Mul,
227 right: Box::new(right),
228 };
229 }
230 Some(Token::Slash) => {
231 self.advance();
232 let right = self.parse_unary()?;
233 left = Expr::BinOp {
234 left: Box::new(left),
235 op: BinOp::Div,
236 right: Box::new(right),
237 };
238 }
239 Some(Token::Percent) => {
240 self.advance();
241 let right = self.parse_unary()?;
242 left = Expr::BinOp {
243 left: Box::new(left),
244 op: BinOp::Mod,
245 right: Box::new(right),
246 };
247 }
248 _ => break,
249 }
250 }
251 Ok(left)
252 }
253
254 fn parse_unary(&mut self) -> Result<Expr, Error> {
255 match self.peek_token() {
256 Some(Token::Minus) => {
257 self.advance();
258 let expr = self.parse_primary()?;
259 Ok(Expr::UnaryOp {
260 op: UnaryOp::Neg,
261 expr: Box::new(expr),
262 })
263 }
264 Some(Token::KeywordNot) => {
265 self.advance();
266 let expr = self.parse_unary()?;
267 Ok(Expr::UnaryOp {
268 op: UnaryOp::Not,
269 expr: Box::new(expr),
270 })
271 }
272 _ => self.parse_primary(),
273 }
274 }
275
276 fn parse_primary(&mut self) -> Result<Expr, Error> {
277 let token = self.expect_position("expected expression")?;
278 let mut expr = match token.0 {
279 Token::Ident(name) => Expr::Var(name),
280 Token::Str(s) => Expr::Str(s),
281 Token::Int(n) => Expr::I64(n),
282 Token::Float(n) => Expr::F64(n),
283 Token::Bool(b) => Expr::Bool(b),
284 Token::Null => Expr::Null,
285 Token::LParen => {
286 let inner = self.parse_or()?;
287 match self.expect_position("expected )")?.0 {
288 Token::RParen => {}
289 tok => return Err(Error::syntax(format!("expected `)`, got {tok:?}"), 0, 0)),
290 }
291 inner
292 }
293 tok => return Err(Error::syntax(format!("unexpected token {tok:?}"), 0, 0)),
294 };
295
296 loop {
297 match self.peek_token() {
298 Some(Token::Dot) => {
299 self.advance();
300 let name = self.expect_ident()?;
301 expr = Expr::Dot(Box::new(expr), name);
302 }
303 Some(Token::LBracket) => {
304 self.advance();
305 let index = self.parse_or()?;
306 match &index {
307 Expr::I64(n) if *n < 0 => {
308 let (_, pos) = &self.tokens[0];
309 return Err(Error::syntax("negative index is not allowed", pos.line, pos.column));
310 }
311 Expr::F64(n) if *n < 0.0 => {
312 let (_, pos) = &self.tokens[0];
313 return Err(Error::syntax("negative index is not allowed", pos.line, pos.column));
314 }
315 Expr::UnaryOp {
316 op: UnaryOp::Neg,
317 expr: e,
318 } if matches!(e.as_ref(), Expr::I64(_) | Expr::F64(_)) => {
319 let (_, pos) = &self.tokens[0];
320 return Err(Error::syntax("negative index is not allowed", pos.line, pos.column));
321 }
322 _ => {}
323 }
324 match self.expect_position("expected ]")?.0 {
325 Token::RBracket => {}
326 tok => return Err(Error::syntax(format!("expected `]`, got {tok:?}"), 0, 0)),
327 }
328 expr = Expr::Index(Box::new(expr), Box::new(index));
329 }
330 Some(Token::LParen) => {
331 self.advance();
332 let mut args = Vec::new();
333 if !matches!(self.peek_token(), Some(Token::RParen)) {
334 args.push(self.parse_or()?);
335 while matches!(self.peek_token(), Some(Token::Comma)) {
336 self.advance();
337 args.push(self.parse_or()?);
338 }
339 }
340 match self.expect_position("expected )")?.0 {
341 Token::RParen => {}
342 tok => return Err(Error::syntax(format!("expected `)`, got {tok:?}"), 0, 0)),
343 }
344 match expr {
346 Expr::Var(name) => {
347 expr = Expr::Call(name, args);
348 }
349 _ => return Err(Error::syntax("function calls only supported on simple names", 0, 0)),
350 }
351 }
352 _ => break,
353 }
354 }
355
356 Ok(expr)
357 }
358
359 fn parse_filter_pipes(&mut self, mut expr: Expr) -> Result<Expr, Error> {
360 while matches!(self.peek_token(), Some(Token::Pipe)) {
361 self.advance();
362 let name = self.expect_ident()?;
363 let mut args = Vec::new();
364 if matches!(self.peek_token(), Some(Token::LParen)) {
365 self.advance();
366 if !matches!(self.peek_token(), Some(Token::RParen)) {
367 loop {
368 match self.peek_token() {
369 Some(Token::Ident(_))
370 | Some(Token::Str(_))
371 | Some(Token::Int(_))
372 | Some(Token::Float(_))
373 | Some(Token::Bool(_))
374 | Some(Token::Null)
375 | Some(Token::Minus)
376 | Some(Token::KeywordNot)
377 | Some(Token::LParen) => {
378 args.push(self.parse_or()?);
379 }
380 _ => break,
381 }
382 if matches!(self.peek_token(), Some(Token::Comma)) {
383 self.advance();
384 } else {
385 break;
386 }
387 }
388 }
389 match self.expect_position("expected )")?.0 {
390 Token::RParen => {}
391 tok => return Err(Error::syntax(format!("expected `)`, got {tok:?}"), 0, 0)),
392 }
393
394 }
397 expr = Expr::Filter {
398 expr: Box::new(expr),
399 name,
400 args,
401 };
402 }
403 Ok(expr)
404 }
405}
406
407pub fn lex_expr(input: &str) -> Result<Vec<(Token, SourcePosition)>, String> {
408 let mut tokens = Vec::new();
409 let chars: Vec<char> = input.chars().collect();
410 let mut i = 0;
411 let mut line = 1;
412 let mut col = 1;
413
414 while i < chars.len() {
415 let c = chars[i];
416 let start_col = col;
417
418 if c.is_whitespace() {
419 if c == '\n' {
420 line += 1;
421 col = 1;
422 } else {
423 col += 1;
424 }
425 i += 1;
426 continue;
427 }
428
429 match c {
430 '.' => {
431 tokens.push((Token::Dot, SourcePosition::new(line, col)));
432 i += 1;
433 col += 1;
434 }
435 '|' => {
436 tokens.push((Token::Pipe, SourcePosition::new(line, col)));
437 i += 1;
438 col += 1;
439 }
440 ',' => {
441 tokens.push((Token::Comma, SourcePosition::new(line, col)));
442 i += 1;
443 col += 1;
444 }
445 ':' => {
446 tokens.push((Token::Colon, SourcePosition::new(line, col)));
447 i += 1;
448 col += 1;
449 }
450 '=' => {
451 if i + 1 < chars.len() && chars[i + 1] == '=' {
452 tokens.push((Token::OpEq, SourcePosition::new(line, col)));
453 i += 2;
454 col += 2;
455 } else {
456 tokens.push((Token::Equals, SourcePosition::new(line, col)));
457 i += 1;
458 col += 1;
459 }
460 }
461 '(' => {
462 tokens.push((Token::LParen, SourcePosition::new(line, col)));
463 i += 1;
464 col += 1;
465 }
466 ')' => {
467 tokens.push((Token::RParen, SourcePosition::new(line, col)));
468 i += 1;
469 col += 1;
470 }
471 '[' => {
472 tokens.push((Token::LBracket, SourcePosition::new(line, col)));
473 i += 1;
474 col += 1;
475 }
476 ']' => {
477 tokens.push((Token::RBracket, SourcePosition::new(line, col)));
478 i += 1;
479 col += 1;
480 }
481 '+' => {
482 tokens.push((Token::Plus, SourcePosition::new(line, col)));
483 i += 1;
484 col += 1;
485 }
486 '-' => {
487 tokens.push((Token::Minus, SourcePosition::new(line, col)));
488 i += 1;
489 col += 1;
490 }
491 '*' => {
492 tokens.push((Token::Star, SourcePosition::new(line, col)));
493 i += 1;
494 col += 1;
495 }
496 '/' => {
497 tokens.push((Token::Slash, SourcePosition::new(line, col)));
498 i += 1;
499 col += 1;
500 }
501 '%' => {
502 tokens.push((Token::Percent, SourcePosition::new(line, col)));
503 i += 1;
504 col += 1;
505 }
506 '!' => {
507 if i + 1 < chars.len() && chars[i + 1] == '=' {
508 tokens.push((Token::OpNeq, SourcePosition::new(line, col)));
509 i += 2;
510 col += 2;
511 } else {
512 return Err(format!("unexpected character `!` at {line}:{col}"));
513 }
514 }
515 '<' => {
516 if i + 1 < chars.len() && chars[i + 1] == '=' {
517 tokens.push((Token::OpLte, SourcePosition::new(line, col)));
518 i += 2;
519 col += 2;
520 } else {
521 tokens.push((Token::OpLt, SourcePosition::new(line, col)));
522 i += 1;
523 col += 1;
524 }
525 }
526 '>' => {
527 if i + 1 < chars.len() && chars[i + 1] == '=' {
528 tokens.push((Token::OpGte, SourcePosition::new(line, col)));
529 i += 2;
530 col += 2;
531 } else {
532 tokens.push((Token::OpGt, SourcePosition::new(line, col)));
533 i += 1;
534 col += 1;
535 }
536 }
537 '\'' | '"' => {
538 let quote = c;
539 let mut s = String::new();
540 i += 1;
541 col += 1;
542 while i < chars.len() {
543 if chars[i] == '\\' && i + 1 < chars.len() {
544 let next = chars[i + 1];
545 match next {
546 'n' => s.push('\n'),
547 't' => s.push('\t'),
548 'r' => s.push('\r'),
549 '\\' => s.push('\\'),
550 '\'' => s.push('\''),
551 '"' => s.push('"'),
552 c => {
553 s.push('\\');
554 s.push(c);
555 }
556 }
557 i += 2;
558 col += 2;
559 } else if chars[i] == quote {
560 i += 1;
561 col += 1;
562 break;
563 } else {
564 if chars[i] == '\n' {
565 line += 1;
566 col = 1;
567 } else {
568 col += 1;
569 }
570 s.push(chars[i]);
571 i += 1;
572 }
573 }
574 tokens.push((Token::Str(s), SourcePosition::new(line, start_col)));
575 }
576 _ if c.is_ascii_digit() || (c == '-' && i + 1 < chars.len() && chars[i + 1].is_ascii_digit()) => {
577 let mut num = String::new();
578 if c == '-' {
579 num.push('-');
580 i += 1;
581 col += 1;
582 }
583 while i < chars.len() && chars[i].is_ascii_digit() {
584 num.push(chars[i]);
585 i += 1;
586 col += 1;
587 }
588 let mut is_float = false;
589 if i < chars.len() && chars[i] == '.' {
590 is_float = true;
591 num.push('.');
592 i += 1;
593 col += 1;
594 while i < chars.len() && chars[i].is_ascii_digit() {
595 num.push(chars[i]);
596 i += 1;
597 col += 1;
598 }
599 }
600 if is_float {
601 let n: f64 = num.parse().map_err(|e| format!("bad float: {e}"))?;
602 tokens.push((Token::Float(n), SourcePosition::new(line, start_col)));
603 } else {
604 let n: i64 = num.parse().map_err(|e| format!("bad int: {e}"))?;
605 tokens.push((Token::Int(n), SourcePosition::new(line, start_col)));
606 }
607 }
608 _ if c.is_ascii_alphabetic() || c == '_' => {
609 let mut ident = String::new();
610 while i < chars.len() && (chars[i].is_ascii_alphanumeric() || chars[i] == '_') {
611 ident.push(chars[i]);
612 i += 1;
613 col += 1;
614 }
615 let token = match ident.as_str() {
616 "true" => Token::Bool(true),
617 "false" => Token::Bool(false),
618 "null" | "none" | "nil" => Token::Null,
619 "and" => Token::KeywordAnd,
620 "or" => Token::KeywordOr,
621 "not" => Token::KeywordNot,
622 "in" => Token::KeywordIn,
623 _ => Token::Ident(ident),
624 };
625 tokens.push((token, SourcePosition::new(line, start_col)));
626 }
627 _ => {
628 return Err(format!("unexpected character `{c}` at {line}:{col}"));
629 }
630 }
631 }
632
633 Ok(tokens)
634}