opentitanlib/util/vmem/
parser.rs

1// Copyright lowRISC contributors (OpenTitan project).
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5//! Parsing of Verilog vmem files into the [`Vmem`] representation.
6//!
7//! See the [srec_vmem] documentation for a description of the file format.
8//!
9//! To summarise:
10//! * Files specify hexadecimal data for sequential addresses.
11//! * Start addresses for a run can be specified in hex with '@____'.
12//! * Address and data values are separated by whitespace or comments.
13//! * C-style '//' and '/* */' comments are supported.
14//!
15//! [srec_vmem]: https://srecord.sourceforge.net/man/man5/srec_vmem.5.html
16
17use std::num::ParseIntError;
18
19use thiserror::Error;
20
21use super::{Section, Vmem};
22
23pub type ParseResult<T> = Result<T, ParseError>;
24
25/// Errors that can occur when parsing vmem files.
26#[derive(Clone, Debug, Error, PartialEq, Eq)]
27pub enum ParseError {
28    /// Failure to parse an integer from hexadecimal.
29    #[error("failed to parse as hexadecimal integer")]
30    ParseInt(#[from] ParseIntError),
31
32    /// An opened comment was not closed.
33    #[error("unclosed comment")]
34    UnclosedComment,
35
36    /// An address was started with an '@' character, but no address value followed.
37    #[error("address is missing a value")]
38    AddrMissingValue,
39
40    /// Catch-all for any characters that don't belong in vmem files.
41    #[error("unknown character '{0}'")]
42    UnknownChar(char),
43}
44/// Representation of the possible tokens found in vmem files.
45#[derive(Clone, Copy, Debug, PartialEq, Eq)]
46enum Token {
47    /// End of file.
48    Eof,
49    /// Address directive, e.g. `@123abc`.
50    Addr(u32),
51    /// Data value, e.g. `abc123`.
52    Value(u32),
53    /// Comments, e.g. `/* comment */` or `// comment`.
54    Comment,
55    /// Whitespace, including newlines.
56    Whitespace,
57}
58
59/// Some span of the input text representing a token.
60#[derive(Clone, Copy, Debug, PartialEq, Eq)]
61struct Span {
62    token: Token,
63    len: usize,
64}
65
66/// Parser for vmem files.
67pub struct VmemParser;
68
69impl VmemParser {
70    /// Parse a complete vmem file from a string.
71    pub fn parse(mut s: &str) -> ParseResult<Vmem> {
72        // Build up the vmem file as sections.
73        let mut vmem = Vmem::default();
74        vmem.sections.push(Section::default());
75
76        loop {
77            // Parse a token from the input string, and move along by its span.
78            let Span { len, token } = Self::token(s)?;
79            s = &s[len..];
80
81            match token {
82                Token::Eof => break,
83                Token::Addr(addr) => {
84                    // Add a new section to the `Vmem` at this address.
85                    // Here we translate between a "word index" to a byte address.
86                    vmem.sections.push(Section {
87                        addr: addr * 4,
88                        data: Vec::new(),
89                    });
90                }
91                Token::Value(value) => {
92                    // Add the value to the current (last added) section's data.
93                    let section = vmem.sections.last_mut().unwrap();
94                    section.data.push(value)
95                }
96                // Whitespace and comments are ignored.
97                Token::Whitespace => continue,
98                Token::Comment => continue,
99            }
100        }
101
102        Ok(vmem)
103    }
104
105    /// Parse a single token from the beginning of a string.
106    fn token(s: &str) -> ParseResult<Span> {
107        let parsers = [
108            Self::parse_eof,
109            Self::parse_addr,
110            Self::parse_value,
111            Self::parse_comment,
112            Self::parse_whitespace,
113        ];
114
115        // Run each parser in order, stopping when one gets a matching parse.
116        let span = parsers.iter().find_map(|p| p(s).transpose());
117
118        // If no parsers succeeded, return an error.
119        match span {
120            Some(span) => span,
121            None => Err(ParseError::UnknownChar(s.chars().next().unwrap())),
122        }
123    }
124
125    /// Try to parse an EOF from the beginning of a string.
126    fn parse_eof(s: &str) -> ParseResult<Option<Span>> {
127        // Empty strings give a 0-length `Token::Eof` span.
128        match s.is_empty() {
129            true => Ok(Some(Span {
130                len: 0,
131                token: Token::Eof,
132            })),
133            false => Ok(None),
134        }
135    }
136
137    /// Try to parse an address from the beginning of a string.
138    fn parse_addr(s: &str) -> ParseResult<Option<Span>> {
139        // Check for the beginning '@' symbol.
140        let Some(addr) = s.strip_prefix('@') else {
141            return Ok(None);
142        };
143
144        // Find the length of the actual address string.
145        let addr_len = match addr.find(|c: char| !c.is_ascii_hexdigit()) {
146            Some(0) => return Err(ParseError::AddrMissingValue),
147            Some(len) => len,
148            None => addr.len(),
149        };
150        // Ensure the '@' is included in the span's length!
151        let len = '@'.len_utf8() + addr_len;
152
153        // Parse from hexadecimal.
154        let val = u32::from_str_radix(&addr[..addr_len], 16)?;
155        let token = Token::Addr(val);
156        let span = Span { token, len };
157
158        Ok(Some(span))
159    }
160
161    /// Try parse a value from the beginning of a string.
162    fn parse_value(s: &str) -> ParseResult<Option<Span>> {
163        // Check for hexadecimal characters in the input.
164        let len = match s.find(|c: char| !c.is_ascii_hexdigit()) {
165            Some(0) => return Ok(None),
166            Some(len) => len,
167            None => s.len(),
168        };
169
170        let val = u32::from_str_radix(&s[..len], 16)?;
171        let token = Token::Value(val);
172        let span = Span { token, len };
173
174        Ok(Some(span))
175    }
176
177    /// Try parse a comment from the beginning of a string.
178    fn parse_comment(s: &str) -> ParseResult<Option<Span>> {
179        // Look for commend identifiers and their closers.
180        let len = match s {
181            s if s.starts_with("//") => s.find('\n').unwrap_or(s.len()),
182            s if s.starts_with("/*") => {
183                // `find` gives us the _start_ of the `*/`, so include its length as well.
184                s.find("*/").ok_or(ParseError::UnclosedComment)? + "*/".len()
185            }
186            _ => return Ok(None),
187        };
188
189        let token = Token::Comment;
190        let span = Span { token, len };
191
192        Ok(Some(span))
193    }
194
195    /// Try to parse whitespace from the beginning of a string.
196    fn parse_whitespace(s: &str) -> ParseResult<Option<Span>> {
197        // Check for whitespace at the beginning of the input.
198        let len = match s.find(|c: char| !c.is_whitespace()) {
199            Some(0) => return Ok(None),
200            Some(len) => len,
201            None => s.len(),
202        };
203
204        let token = Token::Whitespace;
205        let span = Span { len, token };
206
207        Ok(Some(span))
208    }
209}
210
211#[cfg(test)]
212mod test {
213    use super::*;
214
215    #[test]
216    fn parse() {
217        let input = r#"
218            AB
219            // comment
220            CD EF
221            @42
222            12 /* comment */ 34
223        "#;
224        let expected = Vmem {
225            sections: vec![
226                Section {
227                    addr: 0x00,
228                    data: vec![0xAB, 0xCD, 0xEF],
229                },
230                Section {
231                    addr: 0x108,
232                    data: vec![0x12, 0x34],
233                },
234            ],
235        };
236
237        assert_eq!(VmemParser::parse(input).unwrap(), expected);
238    }
239
240    #[test]
241    fn token() {
242        // Check we can pick out the correct token from a string:
243        let expected = [
244            ("", Token::Eof, 0),
245            ("@ff", Token::Addr(0xff), 3),
246            ("ff", Token::Value(0xff), 2),
247            ("// X", Token::Comment, 4),
248            ("/* X */", Token::Comment, 7),
249            (" 	", Token::Whitespace, 2),
250        ];
251
252        for (s, token, len) in expected {
253            let span = Span { token, len };
254            assert_eq!(VmemParser::token(s), Ok(span));
255        }
256
257        // Unknown non-token:
258        assert_eq!(VmemParser::token("X"), Err(ParseError::UnknownChar('X')));
259    }
260
261    #[test]
262    fn eof() {
263        // Not EOF:
264        assert_eq!(VmemParser::parse_eof(" ").unwrap(), None);
265
266        // EOF:
267        let expected = Some(Span {
268            len: 0,
269            token: Token::Eof,
270        });
271        assert_eq!(VmemParser::parse_eof("").unwrap(), expected);
272    }
273
274    #[test]
275    fn addr() {
276        // No address:
277        assert_eq!(VmemParser::parse_addr("/* X */").unwrap(), None);
278
279        let expected = Some(Span {
280            len: 9,
281            token: Token::Addr(0x0123abcd),
282        });
283        // Partially an address:
284        assert_eq!(VmemParser::parse_addr("@0123ABCD FF").unwrap(), expected);
285        // Entirely an address:
286        assert_eq!(VmemParser::parse_addr("@0123ABCD").unwrap(), expected);
287        // Lower-case hex characters:
288        assert_eq!(VmemParser::parse_addr("@0123abcd").unwrap(), expected);
289
290        // u32 overflow:
291        assert!(VmemParser::parse_addr("@123456789").is_err());
292        // Missing address after '@':
293        assert!(VmemParser::parse_addr("@").is_err());
294        assert!(VmemParser::parse_addr("@ FF").is_err());
295    }
296
297    #[test]
298    fn value() {
299        // No value:
300        assert_eq!(VmemParser::parse_value("/* X */").unwrap(), None);
301
302        let expected = Some(Span {
303            len: 8,
304            token: Token::Value(0x0123abcd),
305        });
306        // Partially a value:
307        assert_eq!(VmemParser::parse_value("0123ABCD FF").unwrap(), expected);
308        // Entirely a value:
309        assert_eq!(VmemParser::parse_value("0123ABCD").unwrap(), expected);
310        // Lower-case hex characters:
311        assert_eq!(VmemParser::parse_value("0123abcd").unwrap(), expected);
312
313        // u32 overflow:
314        assert!(VmemParser::parse_value("123456789").is_err());
315    }
316
317    #[test]
318    fn comment() {
319        // No whitespace:
320        assert_eq!(VmemParser::parse_comment("FF").unwrap(), None);
321
322        let expected = Some(Span {
323            len: 7,
324            token: Token::Comment,
325        });
326
327        // Partial block comment:
328        assert_eq!(VmemParser::parse_comment("/* X */ FF").unwrap(), expected);
329        // Entirely a block comment:
330        assert_eq!(VmemParser::parse_comment("/* X */").unwrap(), expected);
331        // Unclosed block comment:
332        assert!(VmemParser::parse_comment("/* X").is_err());
333
334        // Line comment ending in newline:
335        assert_eq!(
336            VmemParser::parse_comment(concat!("// XXXX", '\n', "FF")).unwrap(),
337            expected
338        );
339        // Line comment ending at EOF:
340        assert_eq!(VmemParser::parse_comment("// XXXX").unwrap(), expected);
341    }
342
343    #[test]
344    fn whitespace() {
345        // No whitespace:
346        assert_eq!(VmemParser::parse_whitespace("FF").unwrap(), None);
347
348        let expected = Some(Span {
349            len: 2,
350            token: Token::Whitespace,
351        });
352        // Partial whitespace:
353        assert_eq!(VmemParser::parse_whitespace(" 	FF").unwrap(), expected);
354        // Entirely whitespace:
355        assert_eq!(VmemParser::parse_whitespace(" 	").unwrap(), expected);
356    }
357}