Auto merge of #30684 - tshepang:rustfmt-lexer-part2, r=nrc

This commit is contained in:
bors
2016-01-13 12:22:51 +00:00
2 changed files with 764 additions and 519 deletions
+61 -49
View File
@@ -43,10 +43,8 @@ pub struct Comment {
}
pub fn is_doc_comment(s: &str) -> bool {
(s.starts_with("///") && super::is_doc_comment(s)) ||
s.starts_with("//!") ||
(s.starts_with("/**") && is_block_doc_comment(s)) ||
s.starts_with("/*!")
(s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
(s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
}
pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
@@ -64,18 +62,18 @@ fn vertical_trim(lines: Vec<String>) -> Vec<String> {
let mut i = 0;
let mut j = lines.len();
// first line of all-stars should be omitted
if !lines.is_empty() &&
lines[0].chars().all(|c| c == '*') {
if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
i += 1;
}
while i < j && lines[i].trim().is_empty() {
i += 1;
}
// like the first, a last line of all stars should be omitted
if j > i && lines[j - 1]
.chars()
.skip(1)
.all(|c| c == '*') {
if j > i &&
lines[j - 1]
.chars()
.skip(1)
.all(|c| c == '*') {
j -= 1;
}
while j > i && lines[j - 1].trim().is_empty() {
@@ -85,7 +83,7 @@ fn vertical_trim(lines: Vec<String>) -> Vec<String> {
}
/// remove a "[ \t]*\*" block from each line, if possible
fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
let mut i = usize::MAX;
let mut can_trim = true;
let mut first = true;
@@ -114,9 +112,9 @@ fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
}
if can_trim {
lines.iter().map(|line| {
(&line[i + 1..line.len()]).to_string()
}).collect()
lines.iter()
.map(|line| (&line[i + 1..line.len()]).to_string())
.collect()
} else {
lines
}
@@ -132,9 +130,9 @@ fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
if comment.starts_with("/*") {
let lines = comment[3..comment.len() - 2]
.lines()
.map(|s| s.to_string())
.collect::<Vec<String> >();
.lines()
.map(|s| s.to_string())
.collect::<Vec<String>>();
let lines = vertical_trim(lines);
let lines = horizontal_trim(lines);
@@ -154,8 +152,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
});
}
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
comments: &mut Vec<Comment>) {
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) {
while is_whitespace(rdr.curr) && !rdr.is_eof() {
if rdr.col == CharPos(0) && rdr.curr_is('\n') {
push_blank_line_comment(rdr, &mut *comments);
@@ -165,19 +162,21 @@ fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
}
fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
fn read_shebang_comment(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment>) {
debug!(">>> shebang comment");
let p = rdr.last_pos;
debug!("<<< shebang comment");
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
lines: vec!(rdr.read_one_line_comment()),
pos: p
lines: vec![rdr.read_one_line_comment()],
pos: p,
});
}
fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
fn read_line_comments(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment>) {
debug!(">>> line comments");
let p = rdr.last_pos;
@@ -197,7 +196,7 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
comments.push(Comment {
style: if code_to_the_left { Trailing } else { Isolated },
lines: lines,
pos: p
pos: p,
});
}
}
@@ -220,8 +219,7 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
return Some(cursor);
}
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
s: String, col: CharPos) {
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) {
let len = s.len();
let s1 = match all_whitespace(&s[..], col) {
Some(col) => {
@@ -239,7 +237,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
fn read_block_comment(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment> ) {
comments: &mut Vec<Comment>) {
debug!(">>> block comment");
let p = rdr.last_pos;
let mut lines: Vec<String> = Vec::new();
@@ -261,7 +259,7 @@ fn read_block_comment(rdr: &mut StringReader,
rdr.bump();
}
if is_block_doc_comment(&curr_line[..]) {
return
return;
}
assert!(!curr_line.contains('\n'));
lines.push(curr_line);
@@ -273,9 +271,7 @@ fn read_block_comment(rdr: &mut StringReader,
panic!(rdr.fatal("unterminated block comment"));
}
if rdr.curr_is('\n') {
trim_whitespace_prefix_and_push_line(&mut lines,
curr_line,
col);
trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
curr_line = String::new();
rdr.bump();
} else {
@@ -291,30 +287,36 @@ fn read_block_comment(rdr: &mut StringReader,
rdr.bump();
curr_line.push('/');
level -= 1;
} else { rdr.bump(); }
} else {
rdr.bump();
}
}
}
}
if !curr_line.is_empty() {
trim_whitespace_prefix_and_push_line(&mut lines,
curr_line,
col);
trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
}
}
let mut style = if code_to_the_left { Trailing } else { Isolated };
let mut style = if code_to_the_left {
Trailing
} else {
Isolated
};
rdr.consume_non_eol_whitespace();
if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 {
style = Mixed;
}
debug!("<<< block comment");
comments.push(Comment {style: style, lines: lines, pos: p});
comments.push(Comment {
style: style,
lines: lines,
pos: p,
});
}
fn consume_comment(rdr: &mut StringReader,
code_to_the_left: bool,
comments: &mut Vec<Comment> ) {
fn consume_comment(rdr: &mut StringReader, code_to_the_left: bool, comments: &mut Vec<Comment>) {
debug!(">>> consume comment");
if rdr.curr_is('/') && rdr.nextch_is('/') {
read_line_comments(rdr, code_to_the_left, comments);
@@ -322,7 +324,9 @@ fn consume_comment(rdr: &mut StringReader,
read_block_comment(rdr, code_to_the_left, comments);
} else if rdr.curr_is('#') && rdr.nextch_is('!') {
read_shebang_comment(rdr, code_to_the_left, comments);
} else { panic!(); }
} else {
panic!();
}
debug!("<<< consume comment");
}
@@ -337,7 +341,7 @@ pub struct Literal {
pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
path: String,
srdr: &mut Read)
-> (Vec<Comment>, Vec<Literal>) {
-> (Vec<Comment>, Vec<Literal>) {
let mut src = Vec::new();
srdr.read_to_end(&mut src).unwrap();
let src = String::from_utf8(src).unwrap();
@@ -366,12 +370,15 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
let bstart = rdr.last_pos;
rdr.next_token();
//discard, and look ahead; we're working with internal state
// discard, and look ahead; we're working with internal state
let TokenAndSpan { tok, sp } = rdr.peek();
if tok.is_lit() {
rdr.with_str_from(bstart, |s| {
debug!("tok lit: {}", s);
literals.push(Literal {lit: s.to_string(), pos: sp.lo});
literals.push(Literal {
lit: s.to_string(),
pos: sp.lo,
});
})
} else {
debug!("tok: {}", pprust::token_to_string(&tok));
@@ -386,31 +393,36 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
mod tests {
use super::*;
#[test] fn test_block_doc_comment_1() {
#[test]
fn test_block_doc_comment_1() {
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " Test \n* Test\n Test");
}
#[test] fn test_block_doc_comment_2() {
#[test]
fn test_block_doc_comment_2() {
let comment = "/**\n * Test\n * Test\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " Test\n Test");
}
#[test] fn test_block_doc_comment_3() {
#[test]
fn test_block_doc_comment_3() {
let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
}
#[test] fn test_block_doc_comment_4() {
#[test]
fn test_block_doc_comment_4() {
let comment = "/*******************\n test\n *********************/";
let stripped = strip_doc_comment_decoration(comment);
assert_eq!(stripped, " test");
}
#[test] fn test_line_doc_comment() {
#[test]
fn test_line_doc_comment() {
let stripped = strip_doc_comment_decoration("/// test");
assert_eq!(stripped, " test");
let stripped = strip_doc_comment_decoration("///! test");
+703 -470
View File
@@ -42,8 +42,8 @@ fn real_token(&mut self) -> TokenAndSpan {
match t.tok {
token::Whitespace | token::Comment | token::Shebang(_) => {
t = self.next_token();
},
_ => break
}
_ => break,
}
}
t
@@ -67,17 +67,19 @@ pub struct StringReader<'a> {
/// The last character to be read
pub curr: Option<char>,
pub filemap: Rc<codemap::FileMap>,
/* cached: */
// cached:
pub peek_tok: token::Token,
pub peek_span: Span,
// cache a direct reference to the source text, so that we don't have to
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
source_text: Rc<String>
source_text: Rc<String>,
}
impl<'a> Reader for StringReader<'a> {
fn is_eof(&self) -> bool { self.curr.is_none() }
fn is_eof(&self) -> bool {
self.curr.is_none()
}
/// Return the next token. EFFECT: advances the string_reader.
fn next_token(&mut self) -> TokenAndSpan {
let ret_val = TokenAndSpan {
@@ -128,10 +130,12 @@ fn peek(&self) -> TokenAndSpan {
impl<'a> StringReader<'a> {
/// For comments.rs, which hackily pokes into pos and curr
pub fn new_raw<'b>(span_diagnostic: &'b Handler,
filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
filemap: Rc<codemap::FileMap>)
-> StringReader<'b> {
if filemap.src.is_none() {
span_diagnostic.bug(&format!("Cannot lex filemap without source: {}",
filemap.name)[..]);
span_diagnostic.bug(&format!("Cannot lex filemap \
without source: {}",
filemap.name)[..]);
}
let source_text = (*filemap.src.as_ref().unwrap()).clone();
@@ -143,17 +147,18 @@ pub fn new_raw<'b>(span_diagnostic: &'b Handler,
col: CharPos(0),
curr: Some('\n'),
filemap: filemap,
/* dummy values; not read */
// dummy values; not read
peek_tok: token::Eof,
peek_span: codemap::DUMMY_SP,
source_text: source_text
source_text: source_text,
};
sr.bump();
sr
}
pub fn new<'b>(span_diagnostic: &'b Handler,
filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
filemap: Rc<codemap::FileMap>)
-> StringReader<'b> {
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
sr.advance_token();
sr
@@ -189,7 +194,9 @@ fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
let mut m = m.to_string();
m.push_str(": ");
for c in c.escape_default() { m.push(c) }
for c in c.escape_default() {
m.push(c)
}
self.fatal_span_(from_pos, to_pos, &m[..])
}
fn struct_fatal_span_char(&self,
@@ -197,10 +204,12 @@ fn struct_fatal_span_char(&self,
to_pos: BytePos,
m: &str,
c: char)
-> DiagnosticBuilder<'a> {
-> DiagnosticBuilder<'a> {
let mut m = m.to_string();
m.push_str(": ");
for c in c.escape_default() { m.push(c) }
for c in c.escape_default() {
m.push(c)
}
self.span_diagnostic.struct_span_fatal(codemap::mk_sp(from_pos, to_pos), &m[..])
}
@@ -209,7 +218,9 @@ fn struct_fatal_span_char(&self,
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
let mut m = m.to_string();
m.push_str(": ");
for c in c.escape_default() { m.push(c) }
for c in c.escape_default() {
m.push(c)
}
self.err_span_(from_pos, to_pos, &m[..]);
}
fn struct_err_span_char(&self,
@@ -217,10 +228,12 @@ fn struct_err_span_char(&self,
to_pos: BytePos,
m: &str,
c: char)
-> DiagnosticBuilder<'a> {
-> DiagnosticBuilder<'a> {
let mut m = m.to_string();
m.push_str(": ");
for c in c.escape_default() { m.push(c) }
for c in c.escape_default() {
m.push(c)
}
self.span_diagnostic.struct_span_err(codemap::mk_sp(from_pos, to_pos), &m[..])
}
@@ -241,7 +254,7 @@ fn advance_token(&mut self) {
Some(comment) => {
self.peek_span = comment.sp;
self.peek_tok = comment.tok;
},
}
None => {
if self.is_eof() {
self.peek_tok = token::Eof;
@@ -249,8 +262,7 @@ fn advance_token(&mut self) {
} else {
let start_bytepos = self.last_pos;
self.peek_tok = self.next_token_inner();
self.peek_span = codemap::mk_sp(start_bytepos,
self.last_pos);
self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos);
};
}
}
@@ -263,8 +275,8 @@ fn byte_offset(&self, pos: BytePos) -> BytePos {
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `self.last_pos`, meaning the slice does not include
/// the character `self.curr`.
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T where
F: FnOnce(&str) -> T,
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
{
self.with_str_from_to(start, self.last_pos, f)
}
@@ -285,16 +297,14 @@ pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
/// Calls `f` with a string slice of the source text spanning from `start`
/// up to but excluding `end`.
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T where
F: FnOnce(&str) -> T,
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
where F: FnOnce(&str) -> T
{
f(&self.source_text[self.byte_offset(start).to_usize()..
self.byte_offset(end).to_usize()])
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
}
/// Converts CRLF to LF in the given string, raising an error on bare CR.
fn translate_crlf<'b>(&self, start: BytePos,
s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
let mut i = 0;
while i < s.len() {
let ch = char_at(s, i);
@@ -311,15 +321,21 @@ fn translate_crlf<'b>(&self, start: BytePos,
}
return s.into();
fn translate_crlf_(rdr: &StringReader, start: BytePos,
s: &str, errmsg: &str, mut i: usize) -> String {
fn translate_crlf_(rdr: &StringReader,
start: BytePos,
s: &str,
errmsg: &str,
mut i: usize)
-> String {
let mut buf = String::with_capacity(s.len());
let mut j = 0;
while i < s.len() {
let ch = char_at(s, i);
let next = i + ch.len_utf8();
if ch == '\r' {
if j < i { buf.push_str(&s[j..i]); }
if j < i {
buf.push_str(&s[j..i]);
}
j = next;
if next >= s.len() || char_at(s, next) != '\n' {
let pos = start + BytePos(i as u32);
@@ -329,7 +345,9 @@ fn translate_crlf_(rdr: &StringReader, start: BytePos,
}
i = next;
}
if j < s.len() { buf.push_str(&s[j..]); }
if j < s.len() {
buf.push_str(&s[j..]);
}
buf
}
}
@@ -378,7 +396,9 @@ pub fn nextch_is(&self, c: char) -> bool {
pub fn nextnextch(&self) -> Option<char> {
let offset = self.byte_offset(self.pos).to_usize();
let s = &self.source_text[..];
if offset >= s.len() { return None }
if offset >= s.len() {
return None;
}
let next = offset + char_at(s, offset).len_utf8();
if next < s.len() {
Some(char_at(s, next))
@@ -394,7 +414,7 @@ pub fn nextnextch_is(&self, c: char) -> bool {
/// Eats <XID_start><XID_continue>*, if possible.
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
if !ident_start(self.curr) {
return None
return None;
}
let start = self.last_pos;
while ident_continue(self.curr) {
@@ -417,10 +437,11 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
Some(c) => {
if c.is_whitespace() {
self.span_diagnostic.span_err(codemap::mk_sp(self.last_pos, self.last_pos),
"called consume_any_line_comment, but there was whitespace");
"called consume_any_line_comment, but there \
was whitespace");
}
},
None => { }
}
None => {}
}
if self.curr_is('/') {
@@ -443,13 +464,14 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
'\r' => {
if self.nextch_is('\n') {
// CRLF
break
break;
} else if doc_comment {
self.err_span_(self.last_pos, self.pos,
self.err_span_(self.last_pos,
self.pos,
"bare CR not allowed in doc-comment");
}
}
_ => ()
_ => (),
}
self.bump();
}
@@ -465,21 +487,22 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
Some(TokenAndSpan {
tok: tok,
sp: codemap::mk_sp(start_bpos, self.last_pos)
sp: codemap::mk_sp(start_bpos, self.last_pos),
})
})
} else {
Some(TokenAndSpan {
tok: token::Comment,
sp: codemap::mk_sp(start_bpos, self.last_pos)
sp: codemap::mk_sp(start_bpos, self.last_pos),
})
}
};
}
Some('*') => {
self.bump(); self.bump();
self.bump();
self.bump();
self.scan_block_comment()
}
_ => None
_ => None,
}
} else if self.curr_is('#') {
if self.nextch_is('!') {
@@ -498,10 +521,12 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
if loc.line == 1 && loc.col == CharPos(0) {
// FIXME: Add shebang "token", return it
let start = self.last_pos;
while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
while !self.curr_is('\n') && !self.is_eof() {
self.bump();
}
return Some(TokenAndSpan {
tok: token::Shebang(self.name_from(start)),
sp: codemap::mk_sp(start, self.last_pos)
sp: codemap::mk_sp(start, self.last_pos),
});
}
}
@@ -521,18 +546,20 @@ fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
let c = self.scan_comment();
debug!("scanning a comment {:?}", c);
c
},
}
c if is_whitespace(Some(c)) => {
let start_bpos = self.last_pos;
while is_whitespace(self.curr) { self.bump(); }
while is_whitespace(self.curr) {
self.bump();
}
let c = Some(TokenAndSpan {
tok: token::Whitespace,
sp: codemap::mk_sp(start_bpos, self.last_pos)
sp: codemap::mk_sp(start_bpos, self.last_pos),
});
debug!("scanning whitespace: {:?}", c);
c
},
_ => None
}
_ => None,
}
}
@@ -567,7 +594,7 @@ fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
'\r' => {
has_cr = true;
}
_ => ()
_ => (),
}
self.bump();
}
@@ -576,17 +603,20 @@ fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
// but comments with only "*"s between two "/"s are not
let tok = if is_block_doc_comment(string) {
let string = if has_cr {
self.translate_crlf(start_bpos, string,
self.translate_crlf(start_bpos,
string,
"bare CR not allowed in block doc-comment")
} else { string.into() };
} else {
string.into()
};
token::DocComment(token::intern(&string[..]))
} else {
token::Comment
};
Some(TokenAndSpan{
Some(TokenAndSpan {
tok: tok,
sp: codemap::mk_sp(start_bpos, self.last_pos)
sp: codemap::mk_sp(start_bpos, self.last_pos),
})
})
}
@@ -602,23 +632,27 @@ fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
let mut len = 0;
loop {
let c = self.curr;
if c == Some('_') { debug!("skipping a _"); self.bump(); continue; }
if c == Some('_') {
debug!("skipping a _");
self.bump();
continue;
}
match c.and_then(|cc| cc.to_digit(scan_radix)) {
Some(_) => {
debug!("{:?} in scan_digits", c);
// check that the hypothetical digit is actually
// in range for the true radix
if c.unwrap().to_digit(real_radix).is_none() {
self.err_span_(self.last_pos, self.pos,
&format!("invalid digit for a base {} literal",
real_radix));
self.err_span_(self.last_pos,
self.pos,
&format!("invalid digit for a base {} literal", real_radix));
}
len += 1;
self.bump();
}
_ => return len
_ => return len,
}
};
}
}
/// Lex a LIT_INTEGER or a LIT_FLOAT
@@ -631,9 +665,21 @@ fn scan_number(&mut self, c: char) -> token::Lit {
if c == '0' {
match self.curr.unwrap_or('\0') {
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); }
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); }
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); }
'b' => {
self.bump();
base = 2;
num_digits = self.scan_digits(2, 10);
}
'o' => {
self.bump();
base = 8;
num_digits = self.scan_digits(8, 10);
}
'x' => {
self.bump();
base = 16;
num_digits = self.scan_digits(16, 16);
}
'0'...'9' | '_' | '.' => {
num_digits = self.scan_digits(10, 10) + 1;
}
@@ -649,15 +695,19 @@ fn scan_number(&mut self, c: char) -> token::Lit {
}
if num_digits == 0 {
self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
self.err_span_(start_bpos,
self.last_pos,
"no valid digits found for number");
return token::Integer(token::intern("0"));
}
// might be a float, but don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0')
.is_xid_start() {
if self.curr_is('.') && !self.nextch_is('.') &&
!self.nextch()
.unwrap_or('\0')
.is_xid_start() {
// might have stuff after the ., and if it does, it needs to start
// with a number
self.bump();
@@ -683,11 +733,7 @@ fn scan_number(&mut self, c: char) -> token::Lit {
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
/// error if too many or too few digits are encountered.
fn scan_hex_digits(&mut self,
n_digits: usize,
delim: char,
below_0x7f_only: bool)
-> bool {
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
debug!("scanning {} digits until {:?}", n_digits, delim);
let start_bpos = self.last_pos;
let mut accum_int = 0;
@@ -702,15 +748,19 @@ fn scan_hex_digits(&mut self,
}
if self.curr_is(delim) {
let last_bpos = self.last_pos;
self.err_span_(start_bpos, last_bpos, "numeric character escape is too short");
self.err_span_(start_bpos,
last_bpos,
"numeric character escape is too short");
valid = false;
break;
}
let c = self.curr.unwrap_or('\x00');
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
self.err_span_char(self.last_pos, self.pos,
"invalid character in numeric character escape", c);
self.err_span_char(self.last_pos,
self.pos,
"invalid character in numeric character escape",
c);
valid = false;
0
@@ -721,8 +771,8 @@ fn scan_hex_digits(&mut self,
if below_0x7f_only && accum_int >= 0x80 {
self.err_span_(start_bpos,
self.last_pos,
"this form of character escape may only be used \
with characters in the range [\\x00-\\x7f]");
"this form of character escape may only be used with characters in \
the range [\\x00-\\x7f]");
valid = false;
}
@@ -741,8 +791,12 @@ fn scan_hex_digits(&mut self,
/// `start` is the position of `first_source_char`, which is already consumed.
///
/// Returns true if there was a valid char/byte, false otherwise.
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
ascii_only: bool, delim: char) -> bool {
fn scan_char_or_byte(&mut self,
start: BytePos,
first_source_char: char,
ascii_only: bool,
delim: char)
-> bool {
match first_source_char {
'\\' => {
// '\X' for some X must be a character constant:
@@ -750,7 +804,7 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
let escaped_pos = self.last_pos;
self.bump();
match escaped {
None => {}, // EOF here is an error that will be checked later.
None => {} // EOF here is an error that will be checked later.
Some(e) => {
return match e {
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
@@ -760,18 +814,19 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
self.scan_unicode_escape(delim) && !ascii_only
} else {
let span = codemap::mk_sp(start, self.last_pos);
self.span_diagnostic.struct_span_err(span,
"incorrect unicode escape sequence")
self.span_diagnostic
.struct_span_err(span, "incorrect unicode escape sequence")
.span_help(span,
"format of unicode escape sequences is `\\u{…}`")
"format of unicode escape sequences is \
`\\u{…}`")
.emit();
false
};
if ascii_only {
self.err_span_(start, self.last_pos,
"unicode escape sequences cannot be used as a byte or in \
a byte string"
);
self.err_span_(start,
self.last_pos,
"unicode escape sequences cannot be used as a \
byte or in a byte string");
}
valid
@@ -779,27 +834,32 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
'\n' if delim == '"' => {
self.consume_whitespace();
true
},
}
'\r' if delim == '"' && self.curr_is('\n') => {
self.consume_whitespace();
true
}
c => {
let last_pos = self.last_pos;
let mut err = self.struct_err_span_char(
escaped_pos, last_pos,
if ascii_only { "unknown byte escape" }
else { "unknown character escape" },
c);
let mut err = self.struct_err_span_char(escaped_pos,
last_pos,
if ascii_only {
"unknown byte escape"
} else {
"unknown character \
escape"
},
c);
if e == '\r' {
err.span_help(codemap::mk_sp(escaped_pos, last_pos),
"this is an isolated carriage return; consider checking \
your editor and version control settings");
"this is an isolated carriage return; consider \
checking your editor and version control \
settings");
}
if (e == '{' || e == '}') && !ascii_only {
err.span_help(codemap::mk_sp(escaped_pos, last_pos),
"if used in a formatting string, \
curly braces are escaped with `{{` and `}}`");
"if used in a formatting string, curly braces \
are escaped with `{{` and `}}`");
}
err.emit();
false
@@ -810,11 +870,14 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
}
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
let last_pos = self.last_pos;
self.err_span_char(
start, last_pos,
if ascii_only { "byte constant must be escaped" }
else { "character constant must be escaped" },
first_source_char);
self.err_span_char(start,
last_pos,
if ascii_only {
"byte constant must be escaped"
} else {
"character constant must be escaped"
},
first_source_char);
return false;
}
'\r' => {
@@ -822,18 +885,22 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
self.bump();
return true;
} else {
self.err_span_(start, self.last_pos,
self.err_span_(start,
self.last_pos,
"bare CR not allowed in string, use \\r instead");
return false;
}
}
_ => if ascii_only && first_source_char > '\x7F' {
let last_pos = self.last_pos;
self.err_span_char(
start, last_pos,
"byte constant must be ASCII. \
Use a \\xHH escape for a non-ASCII byte", first_source_char);
return false;
_ => {
if ascii_only && first_source_char > '\x7F' {
let last_pos = self.last_pos;
self.err_span_char(start,
last_pos,
"byte constant must be ASCII. Use a \\xHH escape for a \
non-ASCII byte",
first_source_char);
return false;
}
}
}
true
@@ -854,18 +921,22 @@ fn scan_unicode_escape(&mut self, delim: char) -> bool {
let c = match self.curr {
Some(c) => c,
None => {
panic!(self.fatal_span_(start_bpos, self.last_pos,
panic!(self.fatal_span_(start_bpos,
self.last_pos,
"unterminated unicode escape (found EOF)"));
}
};
accum_int *= 16;
accum_int += c.to_digit(16).unwrap_or_else(|| {
if c == delim {
panic!(self.fatal_span_(self.last_pos, self.pos,
panic!(self.fatal_span_(self.last_pos,
self.pos,
"unterminated unicode escape (needed a `}`)"));
} else {
self.err_span_char(self.last_pos, self.pos,
"invalid character in unicode escape", c);
self.err_span_char(self.last_pos,
self.pos,
"invalid character in unicode escape",
c);
}
valid = false;
0
@@ -875,13 +946,16 @@ fn scan_unicode_escape(&mut self, delim: char) -> bool {
}
if count > 6 {
self.err_span_(start_bpos, self.last_pos,
"overlong unicode escape (can have at most 6 hex digits)");
self.err_span_(start_bpos,
self.last_pos,
"overlong unicode escape (can have at most 6 hex digits)");
valid = false;
}
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
self.err_span_(start_bpos, self.last_pos, "invalid unicode character escape");
self.err_span_(start_bpos,
self.last_pos,
"invalid unicode character escape");
valid = false;
}
@@ -897,7 +971,9 @@ fn scan_float_exponent(&mut self) {
self.bump();
}
if self.scan_digits(10, 10) == 0 {
self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent")
self.err_span_(self.last_pos,
self.pos,
"expected at least one digit in exponent")
}
}
}
@@ -906,11 +982,22 @@ fn scan_float_exponent(&mut self) {
/// error if it isn't.
fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
match base {
16 => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \
supported"),
8 => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"),
2 => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"),
_ => ()
16 => {
self.err_span_(start_bpos,
last_bpos,
"hexadecimal float literal is not supported")
}
8 => {
self.err_span_(start_bpos,
last_bpos,
"octal float literal is not supported")
}
2 => {
self.err_span_(start_bpos,
last_bpos,
"binary float literal is not supported")
}
_ => (),
}
}
@@ -928,14 +1015,18 @@ fn binop(&mut self, op: token::BinOpToken) -> token::Token {
/// token, and updates the interner
fn next_token_inner(&mut self) -> token::Token {
let c = self.curr;
if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
if ident_start(c) &&
match (c.unwrap(), self.nextch(), self.nextnextch()) {
// Note: r as in r" or r#" is part of a raw string literal,
// b as in b' is part of a byte literal.
// They are not identifiers, and are handled further down.
('r', Some('"'), _) | ('r', Some('#'), _) |
('b', Some('"'), _) | ('b', Some('\''), _) |
('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
_ => true
('r', Some('"'), _) |
('r', Some('#'), _) |
('b', Some('"'), _) |
('b', Some('\''), _) |
('b', Some('r'), Some('"')) |
('b', Some('r'), Some('#')) => false,
_ => true,
} {
let start = self.last_pos;
while ident_continue(self.curr) {
@@ -960,299 +1051,393 @@ fn next_token_inner(&mut self) -> token::Token {
let num = self.scan_number(c.unwrap());
let suffix = self.scan_optional_raw_name();
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
return token::Literal(num, suffix)
return token::Literal(num, suffix);
}
match c.expect("next_token_inner called at EOF") {
// One-byte tokens.
';' => { self.bump(); return token::Semi; }
',' => { self.bump(); return token::Comma; }
'.' => {
self.bump();
return if self.curr_is('.') {
self.bump();
if self.curr_is('.') {
self.bump();
token::DotDotDot
} else {
token::DotDot
}
} else {
token::Dot
};
}
'(' => { self.bump(); return token::OpenDelim(token::Paren); }
')' => { self.bump(); return token::CloseDelim(token::Paren); }
'{' => { self.bump(); return token::OpenDelim(token::Brace); }
'}' => { self.bump(); return token::CloseDelim(token::Brace); }
'[' => { self.bump(); return token::OpenDelim(token::Bracket); }
']' => { self.bump(); return token::CloseDelim(token::Bracket); }
'@' => { self.bump(); return token::At; }
'#' => { self.bump(); return token::Pound; }
'~' => { self.bump(); return token::Tilde; }
'?' => { self.bump(); return token::Question; }
':' => {
self.bump();
if self.curr_is(':') {
// One-byte tokens.
';' => {
self.bump();
return token::ModSep;
} else {
return token::Colon;
return token::Semi;
}
}
'$' => { self.bump(); return token::Dollar; }
// Multi-byte tokens.
'=' => {
self.bump();
if self.curr_is('=') {
',' => {
self.bump();
return token::EqEq;
} else if self.curr_is('>') {
self.bump();
return token::FatArrow;
} else {
return token::Eq;
return token::Comma;
}
}
'!' => {
self.bump();
if self.curr_is('=') {
'.' => {
self.bump();
return token::Ne;
} else { return token::Not; }
}
'<' => {
self.bump();
match self.curr.unwrap_or('\x00') {
'=' => { self.bump(); return token::Le; }
'<' => { return self.binop(token::Shl); }
'-' => {
return if self.curr_is('.') {
self.bump();
if self.curr_is('.') {
self.bump();
token::DotDotDot
} else {
token::DotDot
}
} else {
token::Dot
};
}
'(' => {
self.bump();
return token::OpenDelim(token::Paren);
}
')' => {
self.bump();
return token::CloseDelim(token::Paren);
}
'{' => {
self.bump();
return token::OpenDelim(token::Brace);
}
'}' => {
self.bump();
return token::CloseDelim(token::Brace);
}
'[' => {
self.bump();
return token::OpenDelim(token::Bracket);
}
']' => {
self.bump();
return token::CloseDelim(token::Bracket);
}
'@' => {
self.bump();
return token::At;
}
'#' => {
self.bump();
return token::Pound;
}
'~' => {
self.bump();
return token::Tilde;
}
'?' => {
self.bump();
return token::Question;
}
':' => {
self.bump();
if self.curr_is(':') {
self.bump();
return token::ModSep;
} else {
return token::Colon;
}
}
'$' => {
self.bump();
return token::Dollar;
}
// Multi-byte tokens.
'=' => {
self.bump();
if self.curr_is('=') {
self.bump();
return token::EqEq;
} else if self.curr_is('>') {
self.bump();
return token::FatArrow;
} else {
return token::Eq;
}
}
'!' => {
self.bump();
if self.curr_is('=') {
self.bump();
return token::Ne;
} else {
return token::Not;
}
}
'<' => {
self.bump();
match self.curr.unwrap_or('\x00') {
_ => { return token::LArrow; }
'=' => {
self.bump();
return token::Le;
}
'<' => {
return self.binop(token::Shl);
}
'-' => {
self.bump();
match self.curr.unwrap_or('\x00') {
_ => {
return token::LArrow;
}
}
}
_ => {
return token::Lt;
}
}
}
_ => { return token::Lt; }
}
}
'>' => {
self.bump();
match self.curr.unwrap_or('\x00') {
'=' => { self.bump(); return token::Ge; }
'>' => { return self.binop(token::Shr); }
_ => { return token::Gt; }
}
}
'\'' => {
// Either a character constant 'a' OR a lifetime name 'abc
self.bump();
let start = self.last_pos;
// the eof will be picked up by the final `'` check below
let c2 = self.curr.unwrap_or('\x00');
self.bump();
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if ident_start(Some(c2)) && !self.curr_is('\'') {
while ident_continue(self.curr) {
self.bump();
'>' => {
self.bump();
match self.curr.unwrap_or('\x00') {
'=' => {
self.bump();
return token::Ge;
}
'>' => {
return self.binop(token::Shr);
}
_ => {
return token::Gt;
}
}
}
'\'' => {
// Either a character constant 'a' OR a lifetime name 'abc
self.bump();
let start = self.last_pos;
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let ident = self.with_str_from(start, |lifetime_name| {
str_to_ident(&format!("'{}", lifetime_name))
});
// the eof will be picked up by the final `'` check below
let c2 = self.curr.unwrap_or('\x00');
self.bump();
// Conjure up a "keyword checking ident" to make sure that
// the lifetime name is not a keyword.
let keyword_checking_ident =
self.with_str_from(start, |lifetime_name| {
// If the character is an ident start not followed by another single
// quote, then this is a lifetime name:
if ident_start(Some(c2)) && !self.curr_is('\'') {
while ident_continue(self.curr) {
self.bump();
}
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let ident = self.with_str_from(start, |lifetime_name| {
str_to_ident(&format!("'{}", lifetime_name))
});
// Conjure up a "keyword checking ident" to make sure that
// the lifetime name is not a keyword.
let keyword_checking_ident = self.with_str_from(start, |lifetime_name| {
str_to_ident(lifetime_name)
});
let keyword_checking_token =
&token::Ident(keyword_checking_ident, token::Plain);
let last_bpos = self.last_pos;
if keyword_checking_token.is_keyword(token::keywords::SelfValue) {
self.err_span_(start,
last_bpos,
"invalid lifetime name: 'self \
is no longer a special lifetime");
} else if keyword_checking_token.is_any_keyword() &&
!keyword_checking_token.is_keyword(token::keywords::Static)
{
self.err_span_(start,
last_bpos,
"invalid lifetime name");
}
return token::Lifetime(ident);
}
// Otherwise it is a character constant:
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'');
if !self.curr_is('\'') {
let last_bpos = self.last_pos;
panic!(self.fatal_span_verbose(
// Byte offsetting here is okay because the
// character before position `start` is an
// ascii single quote.
start - BytePos(1), last_bpos,
String::from("character literal may only contain one codepoint")));
}
let id = if valid { self.name_from(start) } else { token::intern("0") };
self.bump(); // advance curr past token
let suffix = self.scan_optional_raw_name();
return token::Literal(token::Char(id), suffix);
}
'b' => {
self.bump();
let lit = match self.curr {
Some('\'') => self.scan_byte(),
Some('"') => self.scan_byte_string(),
Some('r') => self.scan_raw_byte_string(),
_ => unreachable!() // Should have been a token::Ident above.
};
let suffix = self.scan_optional_raw_name();
return token::Literal(lit, suffix);
}
'"' => {
let start_bpos = self.last_pos;
let mut valid = true;
self.bump();
while !self.curr_is('"') {
if self.is_eof() {
let keyword_checking_token = &token::Ident(keyword_checking_ident,
token::Plain);
let last_bpos = self.last_pos;
panic!(self.fatal_span_(start_bpos,
last_bpos,
"unterminated double quote string"));
if keyword_checking_token.is_keyword(token::keywords::SelfValue) {
self.err_span_(start,
last_bpos,
"invalid lifetime name: 'self is no longer a special \
lifetime");
} else if keyword_checking_token.is_any_keyword() &&
!keyword_checking_token.is_keyword(token::keywords::Static) {
self.err_span_(start, last_bpos, "invalid lifetime name");
}
return token::Lifetime(ident);
}
let ch_start = self.last_pos;
let ch = self.curr.unwrap();
self.bump();
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"');
}
// adjust for the ASCII " at the start of the literal
let id = if valid { self.name_from(start_bpos + BytePos(1)) }
else { token::intern("??") };
self.bump();
let suffix = self.scan_optional_raw_name();
return token::Literal(token::Str_(id), suffix);
}
'r' => {
let start_bpos = self.last_pos;
self.bump();
let mut hash_count = 0;
while self.curr_is('#') {
self.bump();
hash_count += 1;
}
// Otherwise it is a character constant:
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
false,
'\'');
if !self.curr_is('\'') {
let last_bpos = self.last_pos;
panic!(self.fatal_span_verbose(// Byte offsetting here is okay because the
// character before position `start` is an
// ascii single quote.
start - BytePos(1),
last_bpos,
if self.is_eof() {
let last_bpos = self.last_pos;
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
} else if !self.curr_is('"') {
let last_bpos = self.last_pos;
let curr_char = self.curr.unwrap();
panic!(self.fatal_span_char(start_bpos, last_bpos,
"found invalid character; \
only `#` is allowed in raw string delimitation",
curr_char));
String::from("character literal may only \
contain one codepoint")));
}
let id = if valid {
self.name_from(start)
} else {
token::intern("0")
};
self.bump(); // advance curr past token
let suffix = self.scan_optional_raw_name();
return token::Literal(token::Char(id), suffix);
}
self.bump();
let content_start_bpos = self.last_pos;
let mut content_end_bpos;
let mut valid = true;
'outer: loop {
'b' => {
self.bump();
let lit = match self.curr {
Some('\'') => self.scan_byte(),
Some('"') => self.scan_byte_string(),
Some('r') => self.scan_raw_byte_string(),
_ => unreachable!(), // Should have been a token::Ident above.
};
let suffix = self.scan_optional_raw_name();
return token::Literal(lit, suffix);
}
'"' => {
let start_bpos = self.last_pos;
let mut valid = true;
self.bump();
while !self.curr_is('"') {
if self.is_eof() {
let last_bpos = self.last_pos;
panic!(self.fatal_span_(start_bpos,
last_bpos,
"unterminated double quote string"));
}
let ch_start = self.last_pos;
let ch = self.curr.unwrap();
self.bump();
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
false,
'"');
}
// adjust for the ASCII " at the start of the literal
let id = if valid {
self.name_from(start_bpos + BytePos(1))
} else {
token::intern("??")
};
self.bump();
let suffix = self.scan_optional_raw_name();
return token::Literal(token::Str_(id), suffix);
}
'r' => {
let start_bpos = self.last_pos;
self.bump();
let mut hash_count = 0;
while self.curr_is('#') {
self.bump();
hash_count += 1;
}
if self.is_eof() {
let last_bpos = self.last_pos;
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
} else if !self.curr_is('"') {
let last_bpos = self.last_pos;
let curr_char = self.curr.unwrap();
panic!(self.fatal_span_char(start_bpos,
last_bpos,
"found invalid character; only `#` is allowed \
in raw string delimitation",
curr_char));
}
//if self.curr_is('"') {
//content_end_bpos = self.last_pos;
//for _ in 0..hash_count {
//self.bump();
//if !self.curr_is('#') {
//continue 'outer;
let c = self.curr.unwrap();
match c {
'"' => {
content_end_bpos = self.last_pos;
for _ in 0..hash_count {
self.bump();
if !self.curr_is('#') {
continue 'outer;
self.bump();
let content_start_bpos = self.last_pos;
let mut content_end_bpos;
let mut valid = true;
'outer: loop {
if self.is_eof() {
let last_bpos = self.last_pos;
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
}
// if self.curr_is('"') {
// content_end_bpos = self.last_pos;
// for _ in 0..hash_count {
// self.bump();
// if !self.curr_is('#') {
// continue 'outer;
let c = self.curr.unwrap();
match c {
'"' => {
content_end_bpos = self.last_pos;
for _ in 0..hash_count {
self.bump();
if !self.curr_is('#') {
continue 'outer;
}
}
break;
}
'\r' => {
if !self.nextch_is('\n') {
let last_bpos = self.last_pos;
self.err_span_(start_bpos,
last_bpos,
"bare CR not allowed in raw string, use \\r \
instead");
valid = false;
}
}
break;
},
'\r' => {
if !self.nextch_is('\n') {
let last_bpos = self.last_pos;
self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \
string, use \\r instead");
valid = false;
}
_ => (),
}
_ => ()
self.bump();
}
self.bump();
let id = if valid {
self.name_from_to(content_start_bpos, content_end_bpos)
} else {
token::intern("??")
};
let suffix = self.scan_optional_raw_name();
return token::Literal(token::StrRaw(id, hash_count), suffix);
}
self.bump();
let id = if valid {
self.name_from_to(content_start_bpos, content_end_bpos)
} else {
token::intern("??")
};
let suffix = self.scan_optional_raw_name();
return token::Literal(token::StrRaw(id, hash_count), suffix);
}
'-' => {
if self.nextch_is('>') {
self.bump();
self.bump();
return token::RArrow;
} else { return self.binop(token::Minus); }
}
'&' => {
if self.nextch_is('&') {
self.bump();
self.bump();
return token::AndAnd;
} else { return self.binop(token::And); }
}
'|' => {
match self.nextch() {
Some('|') => { self.bump(); self.bump(); return token::OrOr; }
_ => { return self.binop(token::Or); }
'-' => {
if self.nextch_is('>') {
self.bump();
self.bump();
return token::RArrow;
} else {
return self.binop(token::Minus);
}
}
'&' => {
if self.nextch_is('&') {
self.bump();
self.bump();
return token::AndAnd;
} else {
return self.binop(token::And);
}
}
'|' => {
match self.nextch() {
Some('|') => {
self.bump();
self.bump();
return token::OrOr;
}
_ => {
return self.binop(token::Or);
}
}
}
'+' => {
return self.binop(token::Plus);
}
'*' => {
return self.binop(token::Star);
}
'/' => {
return self.binop(token::Slash);
}
'^' => {
return self.binop(token::Caret);
}
'%' => {
return self.binop(token::Percent);
}
c => {
let last_bpos = self.last_pos;
let bpos = self.pos;
let mut err = self.struct_fatal_span_char(last_bpos,
bpos,
"unknown start of token",
c);
unicode_chars::check_for_substitution(&self, c, &mut err);
err.emit();
panic!(FatalError);
}
}
'+' => { return self.binop(token::Plus); }
'*' => { return self.binop(token::Star); }
'/' => { return self.binop(token::Slash); }
'^' => { return self.binop(token::Caret); }
'%' => { return self.binop(token::Percent); }
c => {
let last_bpos = self.last_pos;
let bpos = self.pos;
let mut err = self.struct_fatal_span_char(last_bpos,
bpos,
"unknown start of token",
c);
unicode_chars::check_for_substitution(&self, c, &mut err);
err.emit();
panic!(FatalError);
}
}
}
fn consume_whitespace(&mut self) {
while is_whitespace(self.curr) && !self.is_eof() { self.bump(); }
while is_whitespace(self.curr) && !self.is_eof() {
self.bump();
}
}
fn read_to_eol(&mut self) -> String {
@@ -1261,14 +1446,16 @@ fn read_to_eol(&mut self) -> String {
val.push(self.curr.unwrap());
self.bump();
}
if self.curr_is('\n') { self.bump(); }
return val
if self.curr_is('\n') {
self.bump();
}
return val;
}
fn read_one_line_comment(&mut self) -> String {
let val = self.read_to_eol();
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/')
|| (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
(val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
return val;
}
@@ -1279,10 +1466,9 @@ fn consume_non_eol_whitespace(&mut self) {
}
fn peeking_at_comment(&self) -> bool {
(self.curr_is('/') && self.nextch_is('/'))
|| (self.curr_is('/') && self.nextch_is('*'))
// consider shebangs comments, but not inner attributes
|| (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
(self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) ||
// consider shebangs comments, but not inner attributes
(self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
}
fn scan_byte(&mut self) -> token::Lit {
@@ -1293,18 +1479,26 @@ fn scan_byte(&mut self) -> token::Lit {
let c2 = self.curr.unwrap_or('\x00');
self.bump();
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'');
let valid = self.scan_char_or_byte(start,
c2,
// ascii_only =
true,
'\'');
if !self.curr_is('\'') {
// Byte offsetting here is okay because the
// character before position `start` are an
// ascii single quote and ascii 'b'.
let last_pos = self.last_pos;
panic!(self.fatal_span_verbose(
start - BytePos(2), last_pos,
"unterminated byte constant".to_string()));
panic!(self.fatal_span_verbose(start - BytePos(2),
last_pos,
"unterminated byte constant".to_string()));
}
let id = if valid { self.name_from(start) } else { token::intern("?") };
let id = if valid {
self.name_from(start)
} else {
token::intern("?")
};
self.bump(); // advance curr past token
return token::Byte(id);
}
@@ -1327,9 +1521,17 @@ fn scan_byte_string(&mut self) -> token::Lit {
let ch_start = self.last_pos;
let ch = self.curr.unwrap();
self.bump();
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"');
valid &= self.scan_char_or_byte(ch_start,
ch,
// ascii_only =
true,
'"');
}
let id = if valid { self.name_from(start) } else { token::intern("??") };
let id = if valid {
self.name_from(start)
} else {
token::intern("??")
};
self.bump();
return token::ByteStr(id);
}
@@ -1349,10 +1551,11 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
} else if !self.curr_is('"') {
let last_pos = self.last_pos;
let ch = self.curr.unwrap();
panic!(self.fatal_span_char(start_bpos, last_pos,
"found invalid character; \
only `#` is allowed in raw string delimitation",
ch));
panic!(self.fatal_span_char(start_bpos,
last_pos,
"found invalid character; only `#` is allowed in raw \
string delimitation",
ch));
}
self.bump();
let content_start_bpos = self.last_pos;
@@ -1362,7 +1565,7 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
None => {
let last_pos = self.last_pos;
panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string"))
},
}
Some('"') => {
content_end_bpos = self.last_pos;
for _ in 0..hash_count {
@@ -1372,70 +1575,72 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
}
}
break;
},
Some(c) => if c > '\x7F' {
let last_pos = self.last_pos;
self.err_span_char(
last_pos, last_pos, "raw byte string must be ASCII", c);
}
Some(c) => {
if c > '\x7F' {
let last_pos = self.last_pos;
self.err_span_char(last_pos, last_pos, "raw byte string must be ASCII", c);
}
}
}
self.bump();
}
self.bump();
return token::ByteStrRaw(self.name_from_to(content_start_bpos,
content_end_bpos),
hash_count);
return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
hash_count);
}
}
pub fn is_whitespace(c: Option<char>) -> bool {
match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
' ' | '\n' | '\t' | '\r' => true,
_ => false
_ => false,
}
}
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
match c {
Some(c) => lo <= c && c <= hi,
_ => false
_ => false,
}
}
fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
fn is_dec_digit(c: Option<char>) -> bool {
return in_range(c, '0', '9');
}
pub fn is_doc_comment(s: &str) -> bool {
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
|| s.starts_with("//!");
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
s.starts_with("//!");
debug!("is {:?} a doc comment? {}", s, res);
res
}
pub fn is_block_doc_comment(s: &str) -> bool {
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
|| s.starts_with("/*!"))
&& s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment
// Prevent `/**/` from being parsed as a doc comment
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
s.starts_with("/*!")) && s.len() >= 5;
debug!("is {:?} a doc comment? {}", s, res);
res
}
fn ident_start(c: Option<char>) -> bool {
let c = match c { Some(c) => c, None => return false };
let c = match c {
Some(c) => c,
None => return false,
};
(c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| c == '_'
|| (c > '\x7f' && c.is_xid_start())
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
}
fn ident_continue(c: Option<char>) -> bool {
let c = match c { Some(c) => c, None => return false };
let c = match c {
Some(c) => c,
None => return false,
};
(c >= 'a' && c <= 'z')
|| (c >= 'A' && c <= 'Z')
|| (c >= '0' && c <= '9')
|| c == '_'
|| (c > '\x7f' && c.is_xid_continue())
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
(c > '\x7f' && c.is_xid_continue())
}
#[cfg(test)]
@@ -1445,7 +1650,7 @@ mod tests {
use codemap::{BytePos, CodeMap, Span, NO_EXPANSION};
use errors;
use parse::token;
use parse::token::{str_to_ident};
use parse::token::str_to_ident;
use std::io;
use std::rc::Rc;
@@ -1458,41 +1663,54 @@ fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler {
// open a string reader for the given string
fn setup<'a>(cm: &CodeMap,
span_handler: &'a errors::Handler,
teststr: String) -> StringReader<'a> {
teststr: String)
-> StringReader<'a> {
let fm = cm.new_filemap("zebra.rs".to_string(), teststr);
StringReader::new(span_handler, fm)
}
#[test] fn t1 () {
#[test]
fn t1() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
let mut string_reader = setup(&cm, &sh,
"/* my source file */ \
fn main() { println!(\"zebra\"); }\n".to_string());
let mut string_reader = setup(&cm,
&sh,
"/* my source file */ fn main() { println!(\"zebra\"); }\n"
.to_string());
let id = str_to_ident("fn");
assert_eq!(string_reader.next_token().tok, token::Comment);
assert_eq!(string_reader.next_token().tok, token::Whitespace);
let tok1 = string_reader.next_token();
let tok2 = TokenAndSpan{
tok:token::Ident(id, token::Plain),
sp:Span {lo:BytePos(21),hi:BytePos(23),expn_id: NO_EXPANSION}};
assert_eq!(tok1,tok2);
let tok2 = TokenAndSpan {
tok: token::Ident(id, token::Plain),
sp: Span {
lo: BytePos(21),
hi: BytePos(23),
expn_id: NO_EXPANSION,
},
};
assert_eq!(tok1, tok2);
assert_eq!(string_reader.next_token().tok, token::Whitespace);
// the 'main' id is already read:
assert_eq!(string_reader.last_pos.clone(), BytePos(28));
// read another token:
let tok3 = string_reader.next_token();
let tok4 = TokenAndSpan{
tok:token::Ident(str_to_ident("main"), token::Plain),
sp:Span {lo:BytePos(24),hi:BytePos(28),expn_id: NO_EXPANSION}};
assert_eq!(tok3,tok4);
let tok4 = TokenAndSpan {
tok: token::Ident(str_to_ident("main"), token::Plain),
sp: Span {
lo: BytePos(24),
hi: BytePos(28),
expn_id: NO_EXPANSION,
},
};
assert_eq!(tok3, tok4);
// the lparen is already read:
assert_eq!(string_reader.last_pos.clone(), BytePos(29))
}
// check that the given reader produces the desired stream
// of tokens (stop checking after exhausting the expected vec)
fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
for expected_tok in &expected {
assert_eq!(&string_reader.next_token().tok, expected_tok);
}
@@ -1503,7 +1721,8 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
token::Ident(str_to_ident(id), style)
}
#[test] fn doublecolonparsing () {
#[test]
fn doublecolonparsing() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a b".to_string()),
@@ -1512,16 +1731,18 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
mk_ident("b", token::Plain)]);
}
#[test] fn dcparsing_2 () {
#[test]
fn dcparsing_2() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a::b".to_string()),
vec![mk_ident("a",token::ModName),
vec![mk_ident("a", token::ModName),
token::ModSep,
mk_ident("b", token::Plain)]);
}
#[test] fn dcparsing_3 () {
#[test]
fn dcparsing_3() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
@@ -1531,54 +1752,61 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
mk_ident("b", token::Plain)]);
}
#[test] fn dcparsing_4 () {
#[test]
fn dcparsing_4() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
vec![mk_ident("a",token::ModName),
vec![mk_ident("a", token::ModName),
token::ModSep,
token::Whitespace,
mk_ident("b", token::Plain)]);
}
#[test] fn character_a() {
#[test]
fn character_a() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
token::Literal(token::Char(token::intern("a")), None));
}
#[test] fn character_space() {
#[test]
fn character_space() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
token::Literal(token::Char(token::intern(" ")), None));
}
#[test] fn character_escaped() {
#[test]
fn character_escaped() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
token::Literal(token::Char(token::intern("\\n")), None));
}
#[test] fn lifetime_name() {
#[test]
fn lifetime_name() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
token::Lifetime(token::str_to_ident("'abc")));
}
#[test] fn raw_string() {
#[test]
fn raw_string() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
assert_eq!(setup(&cm, &sh,
"r###\"\"#a\\b\x00c\"\"###".to_string()).next_token()
.tok,
assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
.next_token()
.tok,
token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
}
#[test] fn literal_suffixes() {
#[test]
fn literal_suffixes() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
macro_rules! test {
@@ -1614,24 +1842,28 @@ macro_rules! test {
Some(token::intern("suffix"))));
}
#[test] fn line_doc_comments() {
#[test]
fn line_doc_comments() {
assert!(is_doc_comment("///"));
assert!(is_doc_comment("/// blah"));
assert!(!is_doc_comment("////"));
}
#[test] fn nested_block_comments() {
#[test]
fn nested_block_comments() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
match lexer.next_token().tok {
token::Comment => { },
_ => panic!("expected a comment!")
token::Comment => {}
_ => panic!("expected a comment!"),
}
assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None));
assert_eq!(lexer.next_token().tok,
token::Literal(token::Char(token::intern("a")), None));
}
#[test] fn crlf_comments() {
#[test]
fn crlf_comments() {
let cm = Rc::new(CodeMap::new());
let sh = mk_sh(cm.clone());
let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
@@ -1639,6 +1871,7 @@ macro_rules! test {
assert_eq!(comment.tok, token::Comment);
assert_eq!(comment.sp, ::codemap::mk_sp(BytePos(0), BytePos(7)));
assert_eq!(lexer.next_token().tok, token::Whitespace);
assert_eq!(lexer.next_token().tok, token::DocComment(token::intern("/// test")));
assert_eq!(lexer.next_token().tok,
token::DocComment(token::intern("/// test")));
}
}