nginx_lint_parser/
parser.rs1use crate::syntax_kind::SyntaxKind;
7use rowan::GreenNode;
8use rowan::GreenNodeBuilder;
9
10#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct SyntaxError {
13 pub message: String,
14 pub offset: usize,
15}
16
17pub fn parse(tokens: Vec<(SyntaxKind, &str)>) -> (GreenNode, Vec<SyntaxError>) {
21 let mut parser = Parser::new(tokens);
22 parser.parse_root();
23 (parser.builder.finish(), parser.errors)
24}
25
26struct Parser<'a> {
29 tokens: Vec<(SyntaxKind, &'a str)>,
30 pos: usize,
31 builder: GreenNodeBuilder<'static>,
32 errors: Vec<SyntaxError>,
33 offset: usize,
35}
36
37impl<'a> Parser<'a> {
38 fn new(tokens: Vec<(SyntaxKind, &'a str)>) -> Self {
39 Self {
40 tokens,
41 pos: 0,
42 builder: GreenNodeBuilder::new(),
43 errors: Vec::new(),
44 offset: 0,
45 }
46 }
47
48 fn current(&self) -> Option<SyntaxKind> {
52 self.tokens.get(self.pos).map(|(k, _)| *k)
53 }
54
55 fn current_text(&self) -> &'a str {
57 self.tokens.get(self.pos).map(|(_, t)| *t).unwrap_or("")
58 }
59
60 fn at(&self, kind: SyntaxKind) -> bool {
62 self.current() == Some(kind)
63 }
64
65 fn at_end(&self) -> bool {
67 self.pos >= self.tokens.len()
68 }
69
70 fn bump(&mut self) {
72 if let Some(&(kind, text)) = self.tokens.get(self.pos) {
73 self.builder.token(kind.into(), text);
74 self.offset += text.len();
75 self.pos += 1;
76 }
77 }
78
79 fn eat_trivia(&mut self) {
81 while let Some(kind) = self.current() {
82 if kind == SyntaxKind::WHITESPACE || kind == SyntaxKind::NEWLINE {
83 self.bump();
84 } else {
85 break;
86 }
87 }
88 }
89
90 fn peek_non_trivia(&self) -> Option<SyntaxKind> {
92 let mut i = self.pos;
93 while i < self.tokens.len() {
94 let kind = self.tokens[i].0;
95 if kind != SyntaxKind::WHITESPACE && kind != SyntaxKind::NEWLINE {
96 return Some(kind);
97 }
98 i += 1;
99 }
100 None
101 }
102
103 fn error(&mut self, message: impl Into<String>) {
104 self.errors.push(SyntaxError {
105 message: message.into(),
106 offset: self.offset,
107 });
108 }
109
110 fn parse_root(&mut self) {
114 self.builder.start_node(SyntaxKind::ROOT.into());
115 self.parse_items(false);
116 self.builder.finish_node();
117 }
118
119 fn parse_items(&mut self, in_block: bool) {
121 loop {
122 match self.current() {
123 None => break,
124 Some(SyntaxKind::R_BRACE) if in_block => break,
125 Some(SyntaxKind::R_BRACE) => {
126 self.error("unexpected '}'");
128 self.builder.start_node(SyntaxKind::ERROR.into());
129 self.bump();
130 self.builder.finish_node();
131 }
132 Some(SyntaxKind::WHITESPACE) => {
133 if self.is_blank_line_start() {
136 self.parse_blank_line();
137 } else {
138 self.bump(); }
140 }
141 Some(SyntaxKind::NEWLINE) => {
142 self.bump();
145 }
146 Some(SyntaxKind::COMMENT) => {
147 self.bump();
148 }
149 Some(kind) if is_directive_start(kind) => {
150 self.parse_directive();
151 }
152 Some(SyntaxKind::ERROR) => {
153 self.error("unexpected token");
154 self.bump();
155 }
156 Some(_) => {
157 self.error(format!("unexpected token: {:?}", self.current().unwrap()));
159 self.builder.start_node(SyntaxKind::ERROR.into());
160 self.bump();
161 self.builder.finish_node();
162 }
163 }
164 }
165 }
166
167 fn is_blank_line_start(&self) -> bool {
171 if !self.at(SyntaxKind::WHITESPACE) {
172 return false;
173 }
174 let next = self.tokens.get(self.pos + 1).map(|(k, _)| *k);
176 if next != Some(SyntaxKind::NEWLINE) {
177 return false;
178 }
179 if self.pos == 0 {
181 return true;
182 }
183 let prev = self.tokens[self.pos - 1].0;
184 prev == SyntaxKind::NEWLINE
185 }
186
187 fn parse_blank_line(&mut self) {
189 self.builder.start_node(SyntaxKind::BLANK_LINE.into());
190 self.bump(); self.bump(); self.builder.finish_node();
193 }
194
195 fn parse_directive(&mut self) {
200 self.builder.start_node(SyntaxKind::DIRECTIVE.into());
201
202 let name = self.current_text().to_string();
204 self.bump(); self.parse_arguments();
208
209 let is_lua_block = name.ends_with("_by_lua_block");
211
212 match self.peek_non_trivia() {
214 Some(SyntaxKind::SEMICOLON) => {
215 self.eat_trivia();
216 self.bump(); self.eat_trailing_comment();
219 }
220 Some(SyntaxKind::L_BRACE) => {
221 self.eat_trivia();
222 if is_lua_block {
223 self.parse_raw_block();
224 } else {
225 self.parse_block();
226 }
227 }
228 _ => {
229 self.error("expected ';' or '{'");
231 }
232 }
233
234 self.builder.finish_node(); }
236
237 fn parse_arguments(&mut self) {
243 loop {
244 let mut lookahead = self.pos;
247 while lookahead < self.tokens.len() {
248 let kind = self.tokens[lookahead].0;
249 if kind == SyntaxKind::WHITESPACE || kind == SyntaxKind::NEWLINE {
250 lookahead += 1;
251 } else {
252 break;
253 }
254 }
255 if lookahead >= self.tokens.len() {
256 break;
257 }
258 let next_kind = self.tokens[lookahead].0;
259
260 if is_argument_kind(next_kind) {
261 self.eat_trivia();
263 self.bump(); } else {
265 break;
266 }
267 }
268 }
269
270 fn eat_trailing_comment(&mut self) {
276 if self.at(SyntaxKind::WHITESPACE) {
277 let next = self.tokens.get(self.pos + 1).map(|(k, _)| *k);
278 if next == Some(SyntaxKind::COMMENT) {
279 self.bump(); self.bump(); }
282 }
283 }
284
285 fn parse_block(&mut self) {
287 self.builder.start_node(SyntaxKind::BLOCK.into());
288 self.bump(); self.parse_items(true);
291
292 if self.at(SyntaxKind::R_BRACE) {
293 self.bump(); } else {
295 self.error("expected '}'");
296 }
297 self.builder.finish_node();
298 }
299
300 fn parse_raw_block(&mut self) {
304 self.builder.start_node(SyntaxKind::BLOCK.into());
305 self.bump(); let mut depth: u32 = 1;
308 while !self.at_end() && depth > 0 {
309 match self.current() {
310 Some(SyntaxKind::L_BRACE) => {
311 depth += 1;
312 self.bump();
313 }
314 Some(SyntaxKind::R_BRACE) => {
315 depth -= 1;
316 if depth == 0 {
317 self.bump(); } else {
319 self.bump(); }
321 }
322 Some(_) => {
323 self.bump();
324 }
325 None => break,
326 }
327 }
328
329 if depth > 0 {
330 self.error("expected '}' for lua block");
331 }
332
333 self.builder.finish_node();
334 }
335}
336
337fn is_argument_kind(kind: SyntaxKind) -> bool {
339 matches!(
340 kind,
341 SyntaxKind::ARGUMENT
342 | SyntaxKind::IDENT
343 | SyntaxKind::VARIABLE
344 | SyntaxKind::DOUBLE_QUOTED_STRING
345 | SyntaxKind::SINGLE_QUOTED_STRING
346 )
347}
348
349fn is_directive_start(kind: SyntaxKind) -> bool {
354 matches!(
355 kind,
356 SyntaxKind::IDENT
357 | SyntaxKind::ARGUMENT
358 | SyntaxKind::VARIABLE
359 | SyntaxKind::DOUBLE_QUOTED_STRING
360 | SyntaxKind::SINGLE_QUOTED_STRING
361 )
362}
363
364#[cfg(test)]
367mod tests {
368 use super::*;
369 use crate::lexer_rowan::tokenize;
370 use crate::syntax_kind::SyntaxNode;
371
372 fn parse_source(source: &str) -> (SyntaxNode, Vec<SyntaxError>) {
373 let tokens = tokenize(source);
374 let (green, errors) = parse(tokens);
375 (SyntaxNode::new_root(green), errors)
376 }
377
378 fn assert_lossless(source: &str) {
380 let (root, _) = parse_source(source);
381 assert_eq!(
382 root.text().to_string(),
383 source,
384 "lossless round-trip failed"
385 );
386 }
387
388 fn assert_no_errors(source: &str) -> SyntaxNode {
390 let (root, errors) = parse_source(source);
391 assert!(errors.is_empty(), "unexpected errors: {:?}", errors);
392 root
393 }
394
395 fn first_directive(root: &SyntaxNode) -> SyntaxNode {
397 root.children()
398 .find(|n| n.kind() == SyntaxKind::DIRECTIVE)
399 .expect("no DIRECTIVE node found")
400 }
401
402 fn child_kinds(node: &SyntaxNode) -> Vec<SyntaxKind> {
404 node.children_with_tokens()
405 .map(|child| child.kind())
406 .collect()
407 }
408
409 #[test]
412 fn simple_directive() {
413 let source = "listen 80;";
414 let root = assert_no_errors(source);
415 assert_lossless(source);
416
417 let dir = first_directive(&root);
418 let kinds = child_kinds(&dir);
419 assert_eq!(
420 kinds,
421 vec![
422 SyntaxKind::IDENT,
423 SyntaxKind::WHITESPACE,
424 SyntaxKind::ARGUMENT,
425 SyntaxKind::SEMICOLON
426 ]
427 );
428 }
429
430 #[test]
431 fn directive_no_args() {
432 let source = "accept_mutex on;";
433 let root = assert_no_errors(source);
434 assert_lossless(source);
435
436 let dir = first_directive(&root);
437 let kinds = child_kinds(&dir);
438 assert_eq!(
439 kinds,
440 vec![
441 SyntaxKind::IDENT,
442 SyntaxKind::WHITESPACE,
443 SyntaxKind::IDENT,
444 SyntaxKind::SEMICOLON
445 ]
446 );
447 }
448
449 #[test]
452 fn block_directive() {
453 let source = "server { listen 80; }";
454 let root = assert_no_errors(source);
455 assert_lossless(source);
456
457 let dir = first_directive(&root);
458 let kinds = child_kinds(&dir);
459 assert!(kinds.contains(&SyntaxKind::IDENT));
461 assert!(kinds.contains(&SyntaxKind::BLOCK));
462 }
463
464 #[test]
465 fn nested_blocks() {
466 let source = "http { server { listen 80; } }";
467 assert_no_errors(source);
468 assert_lossless(source);
469 }
470
471 #[test]
474 fn multiline_config() {
475 let source = "http {\n server {\n listen 80;\n }\n}";
476 assert_no_errors(source);
477 assert_lossless(source);
478 }
479
480 #[test]
483 fn comment_standalone() {
484 let source = "# this is a comment\nlisten 80;";
485 assert_no_errors(source);
486 assert_lossless(source);
487 }
488
489 #[test]
490 fn comment_after_directive() {
491 let source = "listen 80; # port";
492 let root = assert_no_errors(source);
493 assert_lossless(source);
494
495 let dir = first_directive(&root);
497 let kinds = child_kinds(&dir);
498 assert!(kinds.contains(&SyntaxKind::COMMENT));
499 }
500
501 #[test]
504 fn double_quoted_string_arg() {
505 let source = r#"return 200 "hello world";"#;
506 let root = assert_no_errors(source);
507 assert_lossless(source);
508
509 let dir = first_directive(&root);
510 let kinds = child_kinds(&dir);
511 assert!(kinds.contains(&SyntaxKind::DOUBLE_QUOTED_STRING));
512 }
513
514 #[test]
515 fn single_quoted_string_arg() {
516 let source = "return 200 'hello world';";
517 let root = assert_no_errors(source);
518 assert_lossless(source);
519
520 let dir = first_directive(&root);
521 let kinds = child_kinds(&dir);
522 assert!(kinds.contains(&SyntaxKind::SINGLE_QUOTED_STRING));
523 }
524
525 #[test]
526 fn variable_arg() {
527 let source = "set $var value;";
528 let root = assert_no_errors(source);
529 assert_lossless(source);
530
531 let dir = first_directive(&root);
532 let kinds = child_kinds(&dir);
533 assert!(kinds.contains(&SyntaxKind::VARIABLE));
534 }
535
536 #[test]
539 fn lua_block() {
540 let source = "content_by_lua_block {\n ngx.say(\"hello\")\n}";
541 let root = assert_no_errors(source);
542 assert_lossless(source);
543
544 let dir = first_directive(&root);
545 let kinds = child_kinds(&dir);
546 assert!(kinds.contains(&SyntaxKind::BLOCK));
547 }
548
549 #[test]
550 fn lua_block_nested_braces() {
551 let source =
552 "content_by_lua_block {\n if true then\n local t = {1, 2}\n end\n}";
553 assert_no_errors(source);
554 assert_lossless(source);
555 }
556
557 #[test]
560 fn missing_semicolon() {
561 let source = "listen 80";
565 let (_root, errors) = parse_source(source);
566 assert_lossless(source);
567 assert!(!errors.is_empty(), "should report missing semicolon");
568 }
569
570 #[test]
571 fn missing_closing_brace() {
572 let source = "server { listen 80;";
573 let (_root, errors) = parse_source(source);
574 assert_lossless(source);
575 assert!(!errors.is_empty(), "should report missing '}}'");
576 }
577
578 #[test]
579 fn unexpected_closing_brace() {
580 let source = "} listen 80;";
581 let (_root, errors) = parse_source(source);
582 assert_lossless(source);
583 assert!(!errors.is_empty(), "should report unexpected '}}'");
584 }
585
586 #[test]
589 fn lossless_empty() {
590 assert_lossless("");
591 }
592
593 #[test]
594 fn lossless_whitespace_only() {
595 assert_lossless(" \n \n");
596 }
597
598 #[test]
599 fn lossless_complex_config() {
600 let source = r#"http {
601 # Main server
602 server {
603 listen 80;
604 server_name example.com;
605 location / {
606 proxy_pass http://backend;
607 }
608 }
609}
610"#;
611 assert_lossless(source);
612 assert_no_errors(source);
613 }
614
615 #[test]
616 fn lossless_blank_lines() {
617 let source = "listen 80;\n\nlisten 443;\n";
618 assert_lossless(source);
619 assert_no_errors(source);
620 }
621
622 #[test]
623 fn lossless_utf8() {
624 let source = "# これは日本語コメント\nlisten 80;\n";
625 assert_lossless(source);
626 assert_no_errors(source);
627 }
628
629 #[test]
630 fn location_with_regex() {
631 let source = "location ~ ^/api/(.*) {\n proxy_pass http://backend;\n}";
632 assert_no_errors(source);
633 assert_lossless(source);
634 }
635
636 #[test]
637 fn multiple_directives() {
638 let source = "worker_processes auto;\nevents {\n worker_connections 1024;\n}\n";
639 assert_no_errors(source);
640 assert_lossless(source);
641 }
642}