opentitanlib/util/vmem/
parser.rs

1// Copyright lowRISC contributors (OpenTitan project).
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5//! Parsing of Verilog VMEM files into the [`Vmem`] representation.
6//!
7//! See the [srec_vmem] documentation for a description of the file format.
8//!
9//! To summarise:
10//! * Files specify hexadecimal data for sequential addresses.
11//! * Start addresses for a run can be specified in hex with '@____'.
12//! * Address and data values are separated by whitespace or comments.
13//! * C-style '//' and '/* */' comments are supported.
14//!
15//! [srec_vmem]: https://srecord.sourceforge.net/man/man5/srec_vmem.5.html
16
17use std::num::ParseIntError;
18
19use thiserror::Error;
20
21use super::{Section, Vmem, Word};
22
23pub type ParseResult<T> = Result<T, ParseError>;
24
25/// Errors that can occur when parsing VMEM files.
26#[derive(Clone, Debug, Error, PartialEq, Eq)]
27pub enum ParseError {
28    /// Failure to parse an integer from hexadecimal.
29    #[error("failed to parse as hexadecimal integer")]
30    DecodeHexAddr(#[from] ParseIntError),
31
32    #[error("failed to parse as hexadecimal integer")]
33    DecodeHexValue(String),
34
35    /// An opened comment was not closed.
36    #[error("unclosed comment")]
37    UnclosedComment,
38
39    /// An address was started with an '@' character, but no address value followed.
40    #[error("address is missing a value")]
41    AddrMissingValue,
42
43    /// Catch-all for any characters that don't belong in VMEM files.
44    #[error("unknown character '{0}'")]
45    UnknownChar(char),
46}
47
48impl From<hex::FromHexError> for ParseError {
49    // hex::FromHexError does not support PartialEq/Eq, so convert the error to a String.
50    fn from(err: hex::FromHexError) -> Self {
51        Self::DecodeHexValue(err.to_string())
52    }
53}
54
55/// Representation of the possible tokens found in VMEM files.
56#[derive(Clone, Debug, PartialEq, Eq)]
57enum Token {
58    /// End of file.
59    Eof,
60    /// Address directive, e.g. `@123abc`.
61    Addr(u32),
62    /// Data value, e.g. `abc123`.
63    Value(Vec<u8>),
64    /// Comments, e.g. `/* comment */` or `// comment`.
65    Comment,
66    /// Whitespace, including newlines.
67    Whitespace,
68}
69
70/// Some span of the input text representing a token.
71#[derive(Clone, Debug, PartialEq, Eq)]
72struct Span {
73    token: Token,
74    len: usize,
75}
76
77/// Parser for VMEM files.
78pub struct VmemParser;
79
80impl VmemParser {
81    /// Parse a complete VMEM file from a string.
82    pub fn parse(mut s: &str, addr_stride: Option<usize>) -> ParseResult<Vmem> {
83        // Build up the vmem file as sections.
84        let mut vmem = Vmem::default();
85        vmem.sections.push(Section::default());
86
87        loop {
88            // Parse a token from the input string, and move along by its span.
89            let Span { len, token } = Self::token(s)?;
90            s = &s[len..];
91
92            match token {
93                Token::Eof => break,
94                Token::Addr(addr) => {
95                    // Add a new section to the `Vmem` at this address.
96                    // Here we translate between a "word index" to a byte address.
97                    if addr != 0 || vmem.sections.last().unwrap().addr != 0 {
98                        vmem.sections.push(Section {
99                            addr: addr * addr_stride.unwrap_or(1) as u32,
100                            data: Vec::new(),
101                        });
102                    }
103                }
104                Token::Value(value) => {
105                    // Add the value to the current (last added) section's data.
106                    let section = vmem.sections.last_mut().unwrap();
107                    section.data.push(Word::new(value))
108                }
109                // Whitespace and comments are ignored.
110                Token::Whitespace => continue,
111                Token::Comment => continue,
112            }
113        }
114
115        Ok(vmem)
116    }
117
118    /// Parse a single token from the beginning of a string.
119    fn token(s: &str) -> ParseResult<Span> {
120        let parsers = [
121            Self::parse_eof,
122            Self::parse_addr,
123            Self::parse_value,
124            Self::parse_comment,
125            Self::parse_whitespace,
126        ];
127
128        // Run each parser in order, stopping when one gets a matching parse.
129        let span = parsers.iter().find_map(|p| p(s).transpose());
130
131        // If no parsers succeeded, return an error.
132        match span {
133            Some(span) => span,
134            None => Err(ParseError::UnknownChar(s.chars().next().unwrap())),
135        }
136    }
137
138    /// Try to parse an EOF from the beginning of a string.
139    fn parse_eof(s: &str) -> ParseResult<Option<Span>> {
140        // Empty strings give a 0-length `Token::Eof` span.
141        match s.is_empty() {
142            true => Ok(Some(Span {
143                len: 0,
144                token: Token::Eof,
145            })),
146            false => Ok(None),
147        }
148    }
149
150    /// Try to parse an address from the beginning of a string.
151    fn parse_addr(s: &str) -> ParseResult<Option<Span>> {
152        // Check for the beginning '@' symbol.
153        let Some(addr) = s.strip_prefix('@') else {
154            return Ok(None);
155        };
156
157        // Find the length of the actual address string.
158        let addr_len = match addr.find(|c: char| !c.is_ascii_hexdigit()) {
159            Some(0) => return Err(ParseError::AddrMissingValue),
160            Some(len) => len,
161            None => addr.len(),
162        };
163        // Ensure the '@' is included in the span's length!
164        let len = '@'.len_utf8() + addr_len;
165
166        // Parse from hexadecimal.
167        let val = u32::from_str_radix(&addr[..addr_len], 16)?;
168        let token = Token::Addr(val);
169        let span = Span { token, len };
170
171        Ok(Some(span))
172    }
173
174    /// Try parse a value from the beginning of a string.
175    fn parse_value(s: &str) -> ParseResult<Option<Span>> {
176        // Check for hexadecimal characters in the input.
177        let len = match s.find(|c: char| !c.is_ascii_hexdigit()) {
178            Some(0) => return Ok(None),
179            Some(len) => len,
180            None => s.len(),
181        };
182        let s = if len % 2 == 1 {
183            format!("0{s}")
184        } else {
185            s.to_string()
186        };
187
188        let val = hex::decode(&s[..len.div_ceil(2) * 2])?;
189        let token = Token::Value(val);
190        let span = Span { token, len };
191
192        Ok(Some(span))
193    }
194
195    /// Try parse a comment from the beginning of a string.
196    fn parse_comment(s: &str) -> ParseResult<Option<Span>> {
197        // Look for commend identifiers and their closers.
198        let len = match s {
199            s if s.starts_with("//") => s.find('\n').unwrap_or(s.len()),
200            s if s.starts_with("/*") => {
201                // `find` gives us the _start_ of the `*/`, so include its length as well.
202                s.find("*/").ok_or(ParseError::UnclosedComment)? + "*/".len()
203            }
204            _ => return Ok(None),
205        };
206
207        let token = Token::Comment;
208        let span = Span { token, len };
209
210        Ok(Some(span))
211    }
212
213    /// Try to parse whitespace from the beginning of a string.
214    fn parse_whitespace(s: &str) -> ParseResult<Option<Span>> {
215        // Check for whitespace at the beginning of the input.
216        let len = match s.find(|c: char| !c.is_whitespace()) {
217            Some(0) => return Ok(None),
218            Some(len) => len,
219            None => s.len(),
220        };
221
222        let token = Token::Whitespace;
223        let span = Span { len, token };
224
225        Ok(Some(span))
226    }
227}
228
229#[cfg(test)]
230mod test {
231    use super::*;
232
233    #[test]
234    fn parse() {
235        let input = r#"
236            AB
237            // comment
238            CD EF
239            @42
240            12 /* comment */ 34
241        "#;
242        let expected = Vmem {
243            sections: vec![
244                Section {
245                    addr: 0x00,
246                    data: [0xAB, 0xCD, 0xEF]
247                        .iter()
248                        .map(|&b| Word::new(vec![b]))
249                        .collect(),
250                },
251                Section {
252                    addr: 0x108,
253                    data: [0x12, 0x34].iter().map(|&b| Word::new(vec![b])).collect(),
254                },
255            ],
256        };
257
258        assert_eq!(VmemParser::parse(input, Some(4)).unwrap(), expected);
259    }
260
261    #[test]
262    fn token() {
263        // Check we can pick out the correct token from a string:
264        let expected = [
265            ("", Token::Eof, 0),
266            ("@ff", Token::Addr(0xff), 3),
267            ("12345678", Token::Value(vec![0x12, 0x34, 0x56, 0x78]), 8),
268            ("// X", Token::Comment, 4),
269            ("/* X */", Token::Comment, 7),
270            (" 	", Token::Whitespace, 2),
271        ];
272
273        for (s, token, len) in expected {
274            let span = Span { token, len };
275            assert_eq!(VmemParser::token(s), Ok(span));
276        }
277
278        // Unknown non-token:
279        assert_eq!(VmemParser::token("X"), Err(ParseError::UnknownChar('X')));
280    }
281
282    #[test]
283    fn eof() {
284        // Not EOF:
285        assert_eq!(VmemParser::parse_eof(" ").unwrap(), None);
286
287        // EOF:
288        let expected = Some(Span {
289            len: 0,
290            token: Token::Eof,
291        });
292        assert_eq!(VmemParser::parse_eof("").unwrap(), expected);
293    }
294
295    #[test]
296    fn addr() {
297        // No address:
298        assert_eq!(VmemParser::parse_addr("/* X */").unwrap(), None);
299
300        let expected = Some(Span {
301            len: 9,
302            token: Token::Addr(0x0123abcd),
303        });
304        // Partially an address:
305        assert_eq!(VmemParser::parse_addr("@0123ABCD FF").unwrap(), expected);
306        // Entirely an address:
307        assert_eq!(VmemParser::parse_addr("@0123ABCD").unwrap(), expected);
308        // Lower-case hex characters:
309        assert_eq!(VmemParser::parse_addr("@0123abcd").unwrap(), expected);
310
311        // u32 overflow:
312        assert!(VmemParser::parse_addr("@123456789").is_err());
313        // Missing address after '@':
314        assert!(VmemParser::parse_addr("@").is_err());
315        assert!(VmemParser::parse_addr("@ FF").is_err());
316    }
317
318    #[test]
319    fn value() {
320        // No value:
321        assert_eq!(VmemParser::parse_value("/* X */").unwrap(), None);
322
323        let token = Token::Value(vec![0x01, 0x23, 0xab, 0xcd]);
324        let expected = Some(Span {
325            len: 8,
326            token: token.clone(),
327        });
328        // Partially a value:
329        assert_eq!(VmemParser::parse_value("0123ABCD FF").unwrap(), expected);
330        // Entirely a value:
331        assert_eq!(VmemParser::parse_value("0123ABCD").unwrap(), expected);
332        // Lower-case hex characters:
333        assert_eq!(VmemParser::parse_value("0123abcd").unwrap(), expected);
334
335        // Odd number of nibbles:
336        let expected = Some(Span { len: 7, token });
337        assert_eq!(VmemParser::parse_value("123ABCD").unwrap(), expected);
338
339        // Word sizes larger than u32:
340        let expected = Some(Span {
341            len: 10,
342            token: Token::Value(vec![0x01, 0x23, 0xab, 0xcd, 0xef]),
343        });
344        assert_eq!(VmemParser::parse_value("0123abcdef").unwrap(), expected);
345    }
346
347    #[test]
348    fn comment() {
349        // No whitespace:
350        assert_eq!(VmemParser::parse_comment("FF").unwrap(), None);
351
352        let expected = Some(Span {
353            len: 7,
354            token: Token::Comment,
355        });
356
357        // Partial block comment:
358        assert_eq!(VmemParser::parse_comment("/* X */ FF").unwrap(), expected);
359        // Entirely a block comment:
360        assert_eq!(VmemParser::parse_comment("/* X */").unwrap(), expected);
361        // Unclosed block comment:
362        assert!(VmemParser::parse_comment("/* X").is_err());
363
364        // Line comment ending in newline:
365        assert_eq!(
366            VmemParser::parse_comment(concat!("// XXXX", '\n', "FF")).unwrap(),
367            expected
368        );
369        // Line comment ending at EOF:
370        assert_eq!(VmemParser::parse_comment("// XXXX").unwrap(), expected);
371    }
372
373    #[test]
374    fn whitespace() {
375        // No whitespace:
376        assert_eq!(VmemParser::parse_whitespace("FF").unwrap(), None);
377
378        let expected = Some(Span {
379            len: 2,
380            token: Token::Whitespace,
381        });
382        // Partial whitespace:
383        assert_eq!(VmemParser::parse_whitespace(" 	FF").unwrap(), expected);
384        // Entirely whitespace:
385        assert_eq!(VmemParser::parse_whitespace(" 	").unwrap(), expected);
386    }
387
388    #[test]
389    fn addr_stride() {
390        let input = r#"
391            @000 000000
392            @010 012345 6789AB CDEFFE
393            @200 DCBA98 765432
394        "#;
395        let mut expected = Vmem {
396            sections: vec![
397                Section {
398                    addr: 0x0,
399                    data: vec![Word::new(vec![0x00, 0x00, 0x00])],
400                },
401                Section {
402                    addr: 0x10,
403                    data: vec![
404                        Word::new(vec![0x01, 0x23, 0x45]),
405                        Word::new(vec![0x67, 0x89, 0xab]),
406                        Word::new(vec![0xcd, 0xef, 0xfe]),
407                    ],
408                },
409                Section {
410                    addr: 0x200,
411                    data: vec![
412                        Word::new(vec![0xdc, 0xba, 0x98]),
413                        Word::new(vec![0x76, 0x54, 0x32]),
414                    ],
415                },
416            ],
417        };
418
419        // Using a stride of 1 word per address/index
420        assert_eq!(VmemParser::parse(input, None).unwrap(), expected);
421        // Using a stride of 1 byte per address/index, where each word is 3 bytes
422        expected.sections[1].addr = 0x10 * 3;
423        expected.sections[2].addr = 0x200 * 3;
424        assert_eq!(VmemParser::parse(input, Some(3)).unwrap(), expected);
425        // Using a stride of 1 byte per address/index, where each word is 10 bytes
426        expected.sections[1].addr = 0x10 * 10;
427        expected.sections[2].addr = 0x200 * 10;
428        assert_eq!(VmemParser::parse(input, Some(10)).unwrap(), expected);
429    }
430}