rustre_parser/
lib.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
//! `rustre_parser` is a Rust implementation of a Lustre parser with the ultimate goal of being
//! entirely compliant with its official [specification][spec] and [OCaml implementation][impl].
//!
//! [spec]: https://www-verimag.imag.fr/DIST-TOOLS/SYNCHRONE/lustre-v6/doc/lv6-ref-man.pdf
//! [impl]: https://gricad-gitlab.univ-grenoble-alpes.fr/verimag/synchrone/lustre-v6/

pub mod ast;
mod ast_tests;
pub mod lexer;
pub mod parser;

use crate::lexer::{Lexer, Token};
use rowan_nom::RowanNomError;
use std::ops::Range;

/// Marker, non-constructible type that implements [`rowan::Language`] for Lustre
#[derive(Hash, Ord, PartialOrd, PartialEq, Eq, Debug, Copy, Clone)]
pub enum LustreLang {}

#[derive(Clone)]
pub struct ParserError {
    pub span: Range<usize>,
    pub msg: String,
    pub cause: Option<Box<ParserError>>,
}

impl RowanNomError<LustreLang> for ParserError {
    fn from_message(message: &str) -> Self {
        ParserError {
            span: 0..0,
            msg: message.to_string(),
            cause: None,
        }
    }

    fn from_expected(position: usize, message: &str) -> Self {
        Self {
            span: position..position,
            msg: message.to_string(),
            cause: None,
        }
    }

    fn from_expected_eof(range: Range<usize>) -> Self {
        Self {
            span: range,
            msg: "expected eof, found token".to_string(),
            cause: None,
        }
    }

    fn from_unexpected_eof(position: usize) -> Self {
        Self {
            span: position..position,
            msg: "unexpected eof".to_string(),
            cause: None,
        }
    }

    fn from_unexpected_token(span: Range<usize>, expected: Token, found: Token) -> Self {
        Self {
            span,
            msg: format!("expected {expected:?}, found {found:?}"),
            cause: None,
        }
    }

    fn with_context(self, ctx: &'static str) -> Self {
        Self {
            span: self.span.clone(),
            msg: ctx.to_string(),
            cause: Some(Box::new(self)),
        }
    }
}

/// [rowan's `SyntaxNode`][rowan::SyntaxNode] parameterized with the Lustre language marker,
/// [`LustreLang`]
pub type SyntaxNode = rowan::SyntaxNode<LustreLang>;

/// [rowan's `SyntaxToken`][rowan::SyntaxToken] parameterized with the Lustre language marker,
/// [`LustreLang`]
pub type SyntaxToken = rowan::SyntaxToken<LustreLang>;

/// [rowan's `SyntaxElement`][rowan::SyntaxElement] parameterized with the Lustre language marker,
/// [`LustreLang`]
pub type SyntaxElement = rowan::SyntaxElement<LustreLang>;

/// Lex a Lustre file into localized [Token]s
///
/// Shorthand for [`Lexer::from_source`].
///
/// # Example
///
/// ```
/// # use rustre_parser::{lex, lexer::Token};
/// // This iterator contains the tokens and their respective range
/// let spanned_tokens = lex("type register = bool^32;");
///
/// // We only keep the tokens and filter out trivia (spaces, comments)
/// let tokens = spanned_tokens.map(|(token, _range)| token).filter(|t| t.is_non_trivia());
///
/// assert_eq!(tokens.collect::<Vec<_>>(), [
///     Token::Type,
///     Token::Ident,
///     Token::Equal,
///     Token::Bool,
///     Token::Hat,
///     Token::IConst,
///     Token::Semicolon,
/// ]);
/// ```
pub fn lex(source: &str) -> Lexer {
    Lexer::from_source(source)
}

/// Parse a Lustre file's source code into a syntax tree and a list of parsing errors
///
/// The syntax tree elements correspond to structs in the [`ast`] module.
///
/// # Errors
///
/// Thanks to the design of the parser, errors don't cause the entire parser to fail, but are rather
/// accumulated in the list while the parser continues its jobs, doing the best it can to land back
/// on its feet. For this reason, errors are returned in a tuple rather than with the usual
/// [Result].
///
/// # Parsing individual grammar elements
///
/// This function parses an entire Lustre program, that is, an entire file. If you want to parse a
/// specific syntax element (say, just a node), you'll have to find the specific parser in the
/// [`parser`] module and build one of [`ast`]'s structs from its result.
///
/// While — as explained above — parsing an entire program isn't supposed to make the actual parser
/// fail, if you go about parsing individual syntax elements, their parsers may fail, typically if
/// the first (few) token(s) are/is unexpected.
pub fn parse(source: &str) -> (ast::Root, Vec<ParserError>) {
    let lexer = Lexer::from_source(source);
    let tokens = lexer
        .map(|(tok, span)| (tok, &source[span]))
        .collect::<Vec<_>>();
    let input = rowan_nom::Input::from(tokens.as_slice());

    match parser::parse_program(input) {
        Ok((_, (root, errors))) => (ast::Root { syntax: root }, errors),
        // Thanks to its design, the parser should never error at the top-level. We handle it still,
        // just in case.
        Err(_) => (
            ast::Root {
                syntax: SyntaxNode::new_root(rowan::GreenNode::new(Token::Root.into(), [])),
            },
            vec![ParserError {
                msg: "unexpected internal parser error; this should never happen".to_string(),
                span: source.len()..source.len(),
                cause: None,
            }],
        ),
    }
}

#[cfg(all(test, feature = "tests-lustre-upstream"))]
rustre_parser_tests_codegen::include_lustre_tests!(mod parser_tests);