|
|
@@ -1,6 +1,7 @@
|
|
|
use std::collections::HashMap;
|
|
|
use std::error::Error;
|
|
|
-use std::fmt;
|
|
|
+use std::{fmt, io, fs};
|
|
|
+use std::io::Read;
|
|
|
use super::*;
|
|
|
use unicode_xid::UnicodeXID;
|
|
|
|
|
|
@@ -13,6 +14,7 @@ pub struct Lexemes {
|
|
|
esc_oct: char,
|
|
|
com_outer: char,
|
|
|
com_inner: char,
|
|
|
+ include_delim: char,
|
|
|
escapes: HashMap<char, char>
|
|
|
}
|
|
|
|
|
|
@@ -27,6 +29,7 @@ impl Default for Lexemes {
|
|
|
esc_oct: 'o',
|
|
|
com_outer: '/',
|
|
|
com_inner: '*',
|
|
|
+ include_delim: '#',
|
|
|
escapes: HashMap::new(),
|
|
|
};
|
|
|
|
|
|
@@ -44,6 +47,7 @@ impl Default for Lexemes {
|
|
|
pub enum Location {
|
|
|
InString,
|
|
|
InStringEscape,
|
|
|
+ InInclude,
|
|
|
}
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
@@ -64,6 +68,8 @@ pub enum ErrorKind {
|
|
|
BadEscapeValue(EscapeKind, String, Option<Box<Error>>),
|
|
|
BadNumericLiteral(NumericKind, String, Option<Box<Error>>),
|
|
|
UnknownChar(char),
|
|
|
+ IncludeError(io::Error),
|
|
|
+ TooManyRecursions(usize),
|
|
|
}
|
|
|
|
|
|
#[derive(Debug)]
|
|
|
@@ -83,6 +89,7 @@ impl ErrorType {
|
|
|
ErrorKind::UnexpectedEOF(ref loc) => format!("Unexpected EOF {}", match *loc {
|
|
|
Location::InString => "in string constant",
|
|
|
Location::InStringEscape => "in string escape",
|
|
|
+ Location::InInclude => "in include",
|
|
|
}),
|
|
|
ErrorKind::BadEscapeValue(ref kind, ref val, ref err) => format!("Bad {} escape {}: {:?}", match *kind {
|
|
|
EscapeKind::Hexadecimal => "hexadecimal",
|
|
|
@@ -93,6 +100,8 @@ impl ErrorType {
|
|
|
NumericKind::Float => "floating point",
|
|
|
}, val, err),
|
|
|
ErrorKind::UnknownChar(c) => format!("Unknown character {}", c),
|
|
|
+ ErrorKind::IncludeError(ref e) => format!("Error including file: {:?}", e),
|
|
|
+ ErrorKind::TooManyRecursions(n) => format!("Include recursed too many times ({})", n),
|
|
|
};
|
|
|
|
|
|
ret
|
|
|
@@ -117,7 +126,7 @@ impl Error for ErrorType {
|
|
|
Some(ref err) => Some(&**err),
|
|
|
None => None,
|
|
|
},
|
|
|
- ErrorKind::UnexpectedEOF(_) | ErrorKind::UnknownChar(_) => None,
|
|
|
+ _ => None,
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
@@ -134,16 +143,56 @@ fn char_in(s: &str, c: char) -> bool {
|
|
|
s.chars().find(|&x| x == c).map_or(false, |_| true)
|
|
|
}
|
|
|
|
|
|
+pub struct ResumableChars {
|
|
|
+ string: String,
|
|
|
+ pos: usize,
|
|
|
+}
|
|
|
+
|
|
|
+impl ResumableChars {
|
|
|
+ pub fn new(s: String) -> ResumableChars {
|
|
|
+ ResumableChars {
|
|
|
+ string: s,
|
|
|
+ pos: 0,
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+impl Iterator for ResumableChars {
|
|
|
+ type Item = char;
|
|
|
+
|
|
|
+ fn next(&mut self) -> Option<char> {
|
|
|
+ if self.pos >= self.string.len() {
|
|
|
+ None
|
|
|
+ } else {
|
|
|
+ let mut iter = self.string[self.pos..].char_indices();
|
|
|
+ match iter.next() {
|
|
|
+ Some((pos, ch)) => {
|
|
|
+ self.pos += match iter.next() {
|
|
|
+ Some((pos, _)) => pos,
|
|
|
+ None => self.string.len(),
|
|
|
+ };
|
|
|
+ Some(ch)
|
|
|
+ },
|
|
|
+ None => None,
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
pub struct Tokenizer<T: Iterator<Item=char>> {
|
|
|
reader: T,
|
|
|
+ reader_stack: Vec<ResumableChars>,
|
|
|
pushback: Option<char>,
|
|
|
lexemes: Lexemes,
|
|
|
}
|
|
|
|
|
|
impl<T: Iterator<Item=char>> Tokenizer<T> {
|
|
|
+ const MAX_INCLUDE_RECURSIONS: usize = 256;
|
|
|
+
|
|
|
pub fn new(reader: T) -> Tokenizer<T> {
|
|
|
Tokenizer {
|
|
|
reader: reader,
|
|
|
+ reader_stack: Vec::new(),
|
|
|
pushback: None,
|
|
|
lexemes: Default::default(),
|
|
|
}
|
|
|
@@ -159,23 +208,49 @@ impl<T: Iterator<Item=char>> Tokenizer<T> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ pub fn push_reader(&mut self, rc: ResumableChars) -> Result<(), ErrorType> {
|
|
|
+ if self.reader_stack.len() > Self::MAX_INCLUDE_RECURSIONS {
|
|
|
+ Err(ErrorType::new(ErrorKind::TooManyRecursions(self.reader_stack.len())))
|
|
|
+ } else {
|
|
|
+ self.reader_stack.push(rc);
|
|
|
+ Ok(())
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
fn next_char(&mut self) -> Option<char> {
|
|
|
match self.pushback {
|
|
|
Some(c) => {
|
|
|
self.pushback = None;
|
|
|
Some(c)
|
|
|
},
|
|
|
- None => self.reader.next(),
|
|
|
+ None => {
|
|
|
+ let mut ret = None;
|
|
|
+ let mut produced_idx: usize = 0;
|
|
|
+ let len = self.reader_stack.len();
|
|
|
+
|
|
|
+ for (idx, rc) in self.reader_stack.iter_mut().enumerate().rev() {
|
|
|
+ match rc.next() {
|
|
|
+ Some(c) => {
|
|
|
+ ret = Some(c);
|
|
|
+ produced_idx = idx;
|
|
|
+ break;
|
|
|
+ },
|
|
|
+ None => {},
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ match ret {
|
|
|
+ Some(c) => {
|
|
|
+ self.reader_stack.truncate(produced_idx + 1);
|
|
|
+ Some(c)
|
|
|
+ },
|
|
|
+ None => self.reader.next(),
|
|
|
+ }
|
|
|
+ },
|
|
|
}
|
|
|
}
|
|
|
|
|
|
pub fn next_token(&mut self) -> Result<Token, ErrorType> {
|
|
|
- let res = self._next_token();
|
|
|
- eprintln!("next_token: {:?}", res);
|
|
|
- res
|
|
|
- }
|
|
|
-
|
|
|
- fn _next_token(&mut self) -> Result<Token, ErrorType> {
|
|
|
let mut c = self.next_char();
|
|
|
if c == None {
|
|
|
return Ok(Token::EOF);
|
|
|
@@ -216,6 +291,34 @@ impl<T: Iterator<Item=char>> Tokenizer<T> {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ /* Inclusion */
|
|
|
+ if cc == self.lexemes.include_delim {
|
|
|
+ let mut buffer = String::new();
|
|
|
+
|
|
|
+ loop {
|
|
|
+ let nc = self.next_char();
|
|
|
+ if nc == None {
|
|
|
+ return Err(ErrorType::new(ErrorKind::UnexpectedEOF(Location::InInclude)));
|
|
|
+ }
|
|
|
+ let ncc = nc.unwrap();
|
|
|
+
|
|
|
+ if ncc == self.lexemes.include_delim {
|
|
|
+ break;
|
|
|
+ } else {
|
|
|
+ buffer.push(ncc);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ let mut f = match fs::File::open(buffer) {
|
|
|
+ Err(err) => return Err(ErrorType::new(ErrorKind::IncludeError(err))),
|
|
|
+ Ok(f) => f,
|
|
|
+ };
|
|
|
+ let mut contents = String::new();
|
|
|
+ f.read_to_string(&mut contents);
|
|
|
+ self.push_reader(ResumableChars::new(contents))?;
|
|
|
+ return self.next_token()
|
|
|
+ }
|
|
|
+
|
|
|
/* Strings */
|
|
|
if char_in(&self.lexemes.string_delim, cc) {
|
|
|
let mut buffer = String::new();
|