mirror of
https://github.com/rust-lang/rust.git
synced 2026-04-27 18:57:42 +03:00
Auto merge of #30684 - tshepang:rustfmt-lexer-part2, r=nrc
This commit is contained in:
@@ -43,10 +43,8 @@ pub struct Comment {
|
||||
}
|
||||
|
||||
pub fn is_doc_comment(s: &str) -> bool {
|
||||
(s.starts_with("///") && super::is_doc_comment(s)) ||
|
||||
s.starts_with("//!") ||
|
||||
(s.starts_with("/**") && is_block_doc_comment(s)) ||
|
||||
s.starts_with("/*!")
|
||||
(s.starts_with("///") && super::is_doc_comment(s)) || s.starts_with("//!") ||
|
||||
(s.starts_with("/**") && is_block_doc_comment(s)) || s.starts_with("/*!")
|
||||
}
|
||||
|
||||
pub fn doc_comment_style(comment: &str) -> ast::AttrStyle {
|
||||
@@ -64,18 +62,18 @@ fn vertical_trim(lines: Vec<String>) -> Vec<String> {
|
||||
let mut i = 0;
|
||||
let mut j = lines.len();
|
||||
// first line of all-stars should be omitted
|
||||
if !lines.is_empty() &&
|
||||
lines[0].chars().all(|c| c == '*') {
|
||||
if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
|
||||
i += 1;
|
||||
}
|
||||
while i < j && lines[i].trim().is_empty() {
|
||||
i += 1;
|
||||
}
|
||||
// like the first, a last line of all stars should be omitted
|
||||
if j > i && lines[j - 1]
|
||||
.chars()
|
||||
.skip(1)
|
||||
.all(|c| c == '*') {
|
||||
if j > i &&
|
||||
lines[j - 1]
|
||||
.chars()
|
||||
.skip(1)
|
||||
.all(|c| c == '*') {
|
||||
j -= 1;
|
||||
}
|
||||
while j > i && lines[j - 1].trim().is_empty() {
|
||||
@@ -85,7 +83,7 @@ fn vertical_trim(lines: Vec<String>) -> Vec<String> {
|
||||
}
|
||||
|
||||
/// remove a "[ \t]*\*" block from each line, if possible
|
||||
fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
|
||||
fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
|
||||
let mut i = usize::MAX;
|
||||
let mut can_trim = true;
|
||||
let mut first = true;
|
||||
@@ -114,9 +112,9 @@ fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
|
||||
}
|
||||
|
||||
if can_trim {
|
||||
lines.iter().map(|line| {
|
||||
(&line[i + 1..line.len()]).to_string()
|
||||
}).collect()
|
||||
lines.iter()
|
||||
.map(|line| (&line[i + 1..line.len()]).to_string())
|
||||
.collect()
|
||||
} else {
|
||||
lines
|
||||
}
|
||||
@@ -132,9 +130,9 @@ fn horizontal_trim(lines: Vec<String> ) -> Vec<String> {
|
||||
|
||||
if comment.starts_with("/*") {
|
||||
let lines = comment[3..comment.len() - 2]
|
||||
.lines()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String> >();
|
||||
.lines()
|
||||
.map(|s| s.to_string())
|
||||
.collect::<Vec<String>>();
|
||||
|
||||
let lines = vertical_trim(lines);
|
||||
let lines = horizontal_trim(lines);
|
||||
@@ -154,8 +152,7 @@ fn push_blank_line_comment(rdr: &StringReader, comments: &mut Vec<Comment>) {
|
||||
});
|
||||
}
|
||||
|
||||
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
|
||||
comments: &mut Vec<Comment>) {
|
||||
fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader, comments: &mut Vec<Comment>) {
|
||||
while is_whitespace(rdr.curr) && !rdr.is_eof() {
|
||||
if rdr.col == CharPos(0) && rdr.curr_is('\n') {
|
||||
push_blank_line_comment(rdr, &mut *comments);
|
||||
@@ -165,19 +162,21 @@ fn consume_whitespace_counting_blank_lines(rdr: &mut StringReader,
|
||||
}
|
||||
|
||||
|
||||
fn read_shebang_comment(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
fn read_shebang_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment>) {
|
||||
debug!(">>> shebang comment");
|
||||
let p = rdr.last_pos;
|
||||
debug!("<<< shebang comment");
|
||||
comments.push(Comment {
|
||||
style: if code_to_the_left { Trailing } else { Isolated },
|
||||
lines: vec!(rdr.read_one_line_comment()),
|
||||
pos: p
|
||||
lines: vec![rdr.read_one_line_comment()],
|
||||
pos: p,
|
||||
});
|
||||
}
|
||||
|
||||
fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
fn read_line_comments(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment>) {
|
||||
debug!(">>> line comments");
|
||||
let p = rdr.last_pos;
|
||||
@@ -197,7 +196,7 @@ fn read_line_comments(rdr: &mut StringReader, code_to_the_left: bool,
|
||||
comments.push(Comment {
|
||||
style: if code_to_the_left { Trailing } else { Isolated },
|
||||
lines: lines,
|
||||
pos: p
|
||||
pos: p,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -220,8 +219,7 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
|
||||
return Some(cursor);
|
||||
}
|
||||
|
||||
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
|
||||
s: String, col: CharPos) {
|
||||
fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String>, s: String, col: CharPos) {
|
||||
let len = s.len();
|
||||
let s1 = match all_whitespace(&s[..], col) {
|
||||
Some(col) => {
|
||||
@@ -239,7 +237,7 @@ fn trim_whitespace_prefix_and_push_line(lines: &mut Vec<String> ,
|
||||
|
||||
fn read_block_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment> ) {
|
||||
comments: &mut Vec<Comment>) {
|
||||
debug!(">>> block comment");
|
||||
let p = rdr.last_pos;
|
||||
let mut lines: Vec<String> = Vec::new();
|
||||
@@ -261,7 +259,7 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
rdr.bump();
|
||||
}
|
||||
if is_block_doc_comment(&curr_line[..]) {
|
||||
return
|
||||
return;
|
||||
}
|
||||
assert!(!curr_line.contains('\n'));
|
||||
lines.push(curr_line);
|
||||
@@ -273,9 +271,7 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
panic!(rdr.fatal("unterminated block comment"));
|
||||
}
|
||||
if rdr.curr_is('\n') {
|
||||
trim_whitespace_prefix_and_push_line(&mut lines,
|
||||
curr_line,
|
||||
col);
|
||||
trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
|
||||
curr_line = String::new();
|
||||
rdr.bump();
|
||||
} else {
|
||||
@@ -291,30 +287,36 @@ fn read_block_comment(rdr: &mut StringReader,
|
||||
rdr.bump();
|
||||
curr_line.push('/');
|
||||
level -= 1;
|
||||
} else { rdr.bump(); }
|
||||
} else {
|
||||
rdr.bump();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !curr_line.is_empty() {
|
||||
trim_whitespace_prefix_and_push_line(&mut lines,
|
||||
curr_line,
|
||||
col);
|
||||
trim_whitespace_prefix_and_push_line(&mut lines, curr_line, col);
|
||||
}
|
||||
}
|
||||
|
||||
let mut style = if code_to_the_left { Trailing } else { Isolated };
|
||||
let mut style = if code_to_the_left {
|
||||
Trailing
|
||||
} else {
|
||||
Isolated
|
||||
};
|
||||
rdr.consume_non_eol_whitespace();
|
||||
if !rdr.is_eof() && !rdr.curr_is('\n') && lines.len() == 1 {
|
||||
style = Mixed;
|
||||
}
|
||||
debug!("<<< block comment");
|
||||
comments.push(Comment {style: style, lines: lines, pos: p});
|
||||
comments.push(Comment {
|
||||
style: style,
|
||||
lines: lines,
|
||||
pos: p,
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
fn consume_comment(rdr: &mut StringReader,
|
||||
code_to_the_left: bool,
|
||||
comments: &mut Vec<Comment> ) {
|
||||
fn consume_comment(rdr: &mut StringReader, code_to_the_left: bool, comments: &mut Vec<Comment>) {
|
||||
debug!(">>> consume comment");
|
||||
if rdr.curr_is('/') && rdr.nextch_is('/') {
|
||||
read_line_comments(rdr, code_to_the_left, comments);
|
||||
@@ -322,7 +324,9 @@ fn consume_comment(rdr: &mut StringReader,
|
||||
read_block_comment(rdr, code_to_the_left, comments);
|
||||
} else if rdr.curr_is('#') && rdr.nextch_is('!') {
|
||||
read_shebang_comment(rdr, code_to_the_left, comments);
|
||||
} else { panic!(); }
|
||||
} else {
|
||||
panic!();
|
||||
}
|
||||
debug!("<<< consume comment");
|
||||
}
|
||||
|
||||
@@ -337,7 +341,7 @@ pub struct Literal {
|
||||
pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
|
||||
path: String,
|
||||
srdr: &mut Read)
|
||||
-> (Vec<Comment>, Vec<Literal>) {
|
||||
-> (Vec<Comment>, Vec<Literal>) {
|
||||
let mut src = Vec::new();
|
||||
srdr.read_to_end(&mut src).unwrap();
|
||||
let src = String::from_utf8(src).unwrap();
|
||||
@@ -366,12 +370,15 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
|
||||
|
||||
let bstart = rdr.last_pos;
|
||||
rdr.next_token();
|
||||
//discard, and look ahead; we're working with internal state
|
||||
// discard, and look ahead; we're working with internal state
|
||||
let TokenAndSpan { tok, sp } = rdr.peek();
|
||||
if tok.is_lit() {
|
||||
rdr.with_str_from(bstart, |s| {
|
||||
debug!("tok lit: {}", s);
|
||||
literals.push(Literal {lit: s.to_string(), pos: sp.lo});
|
||||
literals.push(Literal {
|
||||
lit: s.to_string(),
|
||||
pos: sp.lo,
|
||||
});
|
||||
})
|
||||
} else {
|
||||
debug!("tok: {}", pprust::token_to_string(&tok));
|
||||
@@ -386,31 +393,36 @@ pub fn gather_comments_and_literals(span_diagnostic: &errors::Handler,
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test] fn test_block_doc_comment_1() {
|
||||
#[test]
|
||||
fn test_block_doc_comment_1() {
|
||||
let comment = "/**\n * Test \n ** Test\n * Test\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " Test \n* Test\n Test");
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_2() {
|
||||
#[test]
|
||||
fn test_block_doc_comment_2() {
|
||||
let comment = "/**\n * Test\n * Test\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " Test\n Test");
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_3() {
|
||||
#[test]
|
||||
fn test_block_doc_comment_3() {
|
||||
let comment = "/**\n let a: *i32;\n *a = 5;\n*/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " let a: *i32;\n *a = 5;");
|
||||
}
|
||||
|
||||
#[test] fn test_block_doc_comment_4() {
|
||||
#[test]
|
||||
fn test_block_doc_comment_4() {
|
||||
let comment = "/*******************\n test\n *********************/";
|
||||
let stripped = strip_doc_comment_decoration(comment);
|
||||
assert_eq!(stripped, " test");
|
||||
}
|
||||
|
||||
#[test] fn test_line_doc_comment() {
|
||||
#[test]
|
||||
fn test_line_doc_comment() {
|
||||
let stripped = strip_doc_comment_decoration("/// test");
|
||||
assert_eq!(stripped, " test");
|
||||
let stripped = strip_doc_comment_decoration("///! test");
|
||||
|
||||
+703
-470
@@ -42,8 +42,8 @@ fn real_token(&mut self) -> TokenAndSpan {
|
||||
match t.tok {
|
||||
token::Whitespace | token::Comment | token::Shebang(_) => {
|
||||
t = self.next_token();
|
||||
},
|
||||
_ => break
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
t
|
||||
@@ -67,17 +67,19 @@ pub struct StringReader<'a> {
|
||||
/// The last character to be read
|
||||
pub curr: Option<char>,
|
||||
pub filemap: Rc<codemap::FileMap>,
|
||||
/* cached: */
|
||||
// cached:
|
||||
pub peek_tok: token::Token,
|
||||
pub peek_span: Span,
|
||||
|
||||
// cache a direct reference to the source text, so that we don't have to
|
||||
// retrieve it via `self.filemap.src.as_ref().unwrap()` all the time.
|
||||
source_text: Rc<String>
|
||||
source_text: Rc<String>,
|
||||
}
|
||||
|
||||
impl<'a> Reader for StringReader<'a> {
|
||||
fn is_eof(&self) -> bool { self.curr.is_none() }
|
||||
fn is_eof(&self) -> bool {
|
||||
self.curr.is_none()
|
||||
}
|
||||
/// Return the next token. EFFECT: advances the string_reader.
|
||||
fn next_token(&mut self) -> TokenAndSpan {
|
||||
let ret_val = TokenAndSpan {
|
||||
@@ -128,10 +130,12 @@ fn peek(&self) -> TokenAndSpan {
|
||||
impl<'a> StringReader<'a> {
|
||||
/// For comments.rs, which hackily pokes into pos and curr
|
||||
pub fn new_raw<'b>(span_diagnostic: &'b Handler,
|
||||
filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
|
||||
filemap: Rc<codemap::FileMap>)
|
||||
-> StringReader<'b> {
|
||||
if filemap.src.is_none() {
|
||||
span_diagnostic.bug(&format!("Cannot lex filemap without source: {}",
|
||||
filemap.name)[..]);
|
||||
span_diagnostic.bug(&format!("Cannot lex filemap \
|
||||
without source: {}",
|
||||
filemap.name)[..]);
|
||||
}
|
||||
|
||||
let source_text = (*filemap.src.as_ref().unwrap()).clone();
|
||||
@@ -143,17 +147,18 @@ pub fn new_raw<'b>(span_diagnostic: &'b Handler,
|
||||
col: CharPos(0),
|
||||
curr: Some('\n'),
|
||||
filemap: filemap,
|
||||
/* dummy values; not read */
|
||||
// dummy values; not read
|
||||
peek_tok: token::Eof,
|
||||
peek_span: codemap::DUMMY_SP,
|
||||
source_text: source_text
|
||||
source_text: source_text,
|
||||
};
|
||||
sr.bump();
|
||||
sr
|
||||
}
|
||||
|
||||
pub fn new<'b>(span_diagnostic: &'b Handler,
|
||||
filemap: Rc<codemap::FileMap>) -> StringReader<'b> {
|
||||
filemap: Rc<codemap::FileMap>)
|
||||
-> StringReader<'b> {
|
||||
let mut sr = StringReader::new_raw(span_diagnostic, filemap);
|
||||
sr.advance_token();
|
||||
sr
|
||||
@@ -189,7 +194,9 @@ fn err_span_(&self, from_pos: BytePos, to_pos: BytePos, m: &str) {
|
||||
fn fatal_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) -> FatalError {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
for c in c.escape_default() { m.push(c) }
|
||||
for c in c.escape_default() {
|
||||
m.push(c)
|
||||
}
|
||||
self.fatal_span_(from_pos, to_pos, &m[..])
|
||||
}
|
||||
fn struct_fatal_span_char(&self,
|
||||
@@ -197,10 +204,12 @@ fn struct_fatal_span_char(&self,
|
||||
to_pos: BytePos,
|
||||
m: &str,
|
||||
c: char)
|
||||
-> DiagnosticBuilder<'a> {
|
||||
-> DiagnosticBuilder<'a> {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
for c in c.escape_default() { m.push(c) }
|
||||
for c in c.escape_default() {
|
||||
m.push(c)
|
||||
}
|
||||
self.span_diagnostic.struct_span_fatal(codemap::mk_sp(from_pos, to_pos), &m[..])
|
||||
}
|
||||
|
||||
@@ -209,7 +218,9 @@ fn struct_fatal_span_char(&self,
|
||||
fn err_span_char(&self, from_pos: BytePos, to_pos: BytePos, m: &str, c: char) {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
for c in c.escape_default() { m.push(c) }
|
||||
for c in c.escape_default() {
|
||||
m.push(c)
|
||||
}
|
||||
self.err_span_(from_pos, to_pos, &m[..]);
|
||||
}
|
||||
fn struct_err_span_char(&self,
|
||||
@@ -217,10 +228,12 @@ fn struct_err_span_char(&self,
|
||||
to_pos: BytePos,
|
||||
m: &str,
|
||||
c: char)
|
||||
-> DiagnosticBuilder<'a> {
|
||||
-> DiagnosticBuilder<'a> {
|
||||
let mut m = m.to_string();
|
||||
m.push_str(": ");
|
||||
for c in c.escape_default() { m.push(c) }
|
||||
for c in c.escape_default() {
|
||||
m.push(c)
|
||||
}
|
||||
self.span_diagnostic.struct_span_err(codemap::mk_sp(from_pos, to_pos), &m[..])
|
||||
}
|
||||
|
||||
@@ -241,7 +254,7 @@ fn advance_token(&mut self) {
|
||||
Some(comment) => {
|
||||
self.peek_span = comment.sp;
|
||||
self.peek_tok = comment.tok;
|
||||
},
|
||||
}
|
||||
None => {
|
||||
if self.is_eof() {
|
||||
self.peek_tok = token::Eof;
|
||||
@@ -249,8 +262,7 @@ fn advance_token(&mut self) {
|
||||
} else {
|
||||
let start_bytepos = self.last_pos;
|
||||
self.peek_tok = self.next_token_inner();
|
||||
self.peek_span = codemap::mk_sp(start_bytepos,
|
||||
self.last_pos);
|
||||
self.peek_span = codemap::mk_sp(start_bytepos, self.last_pos);
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -263,8 +275,8 @@ fn byte_offset(&self, pos: BytePos) -> BytePos {
|
||||
/// Calls `f` with a string slice of the source text spanning from `start`
|
||||
/// up to but excluding `self.last_pos`, meaning the slice does not include
|
||||
/// the character `self.curr`.
|
||||
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T where
|
||||
F: FnOnce(&str) -> T,
|
||||
pub fn with_str_from<T, F>(&self, start: BytePos, f: F) -> T
|
||||
where F: FnOnce(&str) -> T
|
||||
{
|
||||
self.with_str_from_to(start, self.last_pos, f)
|
||||
}
|
||||
@@ -285,16 +297,14 @@ pub fn name_from_to(&self, start: BytePos, end: BytePos) -> ast::Name {
|
||||
|
||||
/// Calls `f` with a string slice of the source text spanning from `start`
|
||||
/// up to but excluding `end`.
|
||||
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T where
|
||||
F: FnOnce(&str) -> T,
|
||||
fn with_str_from_to<T, F>(&self, start: BytePos, end: BytePos, f: F) -> T
|
||||
where F: FnOnce(&str) -> T
|
||||
{
|
||||
f(&self.source_text[self.byte_offset(start).to_usize()..
|
||||
self.byte_offset(end).to_usize()])
|
||||
f(&self.source_text[self.byte_offset(start).to_usize()..self.byte_offset(end).to_usize()])
|
||||
}
|
||||
|
||||
/// Converts CRLF to LF in the given string, raising an error on bare CR.
|
||||
fn translate_crlf<'b>(&self, start: BytePos,
|
||||
s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
|
||||
fn translate_crlf<'b>(&self, start: BytePos, s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
|
||||
let mut i = 0;
|
||||
while i < s.len() {
|
||||
let ch = char_at(s, i);
|
||||
@@ -311,15 +321,21 @@ fn translate_crlf<'b>(&self, start: BytePos,
|
||||
}
|
||||
return s.into();
|
||||
|
||||
fn translate_crlf_(rdr: &StringReader, start: BytePos,
|
||||
s: &str, errmsg: &str, mut i: usize) -> String {
|
||||
fn translate_crlf_(rdr: &StringReader,
|
||||
start: BytePos,
|
||||
s: &str,
|
||||
errmsg: &str,
|
||||
mut i: usize)
|
||||
-> String {
|
||||
let mut buf = String::with_capacity(s.len());
|
||||
let mut j = 0;
|
||||
while i < s.len() {
|
||||
let ch = char_at(s, i);
|
||||
let next = i + ch.len_utf8();
|
||||
if ch == '\r' {
|
||||
if j < i { buf.push_str(&s[j..i]); }
|
||||
if j < i {
|
||||
buf.push_str(&s[j..i]);
|
||||
}
|
||||
j = next;
|
||||
if next >= s.len() || char_at(s, next) != '\n' {
|
||||
let pos = start + BytePos(i as u32);
|
||||
@@ -329,7 +345,9 @@ fn translate_crlf_(rdr: &StringReader, start: BytePos,
|
||||
}
|
||||
i = next;
|
||||
}
|
||||
if j < s.len() { buf.push_str(&s[j..]); }
|
||||
if j < s.len() {
|
||||
buf.push_str(&s[j..]);
|
||||
}
|
||||
buf
|
||||
}
|
||||
}
|
||||
@@ -378,7 +396,9 @@ pub fn nextch_is(&self, c: char) -> bool {
|
||||
pub fn nextnextch(&self) -> Option<char> {
|
||||
let offset = self.byte_offset(self.pos).to_usize();
|
||||
let s = &self.source_text[..];
|
||||
if offset >= s.len() { return None }
|
||||
if offset >= s.len() {
|
||||
return None;
|
||||
}
|
||||
let next = offset + char_at(s, offset).len_utf8();
|
||||
if next < s.len() {
|
||||
Some(char_at(s, next))
|
||||
@@ -394,7 +414,7 @@ pub fn nextnextch_is(&self, c: char) -> bool {
|
||||
/// Eats <XID_start><XID_continue>*, if possible.
|
||||
fn scan_optional_raw_name(&mut self) -> Option<ast::Name> {
|
||||
if !ident_start(self.curr) {
|
||||
return None
|
||||
return None;
|
||||
}
|
||||
let start = self.last_pos;
|
||||
while ident_continue(self.curr) {
|
||||
@@ -417,10 +437,11 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
Some(c) => {
|
||||
if c.is_whitespace() {
|
||||
self.span_diagnostic.span_err(codemap::mk_sp(self.last_pos, self.last_pos),
|
||||
"called consume_any_line_comment, but there was whitespace");
|
||||
"called consume_any_line_comment, but there \
|
||||
was whitespace");
|
||||
}
|
||||
},
|
||||
None => { }
|
||||
}
|
||||
None => {}
|
||||
}
|
||||
|
||||
if self.curr_is('/') {
|
||||
@@ -443,13 +464,14 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
'\r' => {
|
||||
if self.nextch_is('\n') {
|
||||
// CRLF
|
||||
break
|
||||
break;
|
||||
} else if doc_comment {
|
||||
self.err_span_(self.last_pos, self.pos,
|
||||
self.err_span_(self.last_pos,
|
||||
self.pos,
|
||||
"bare CR not allowed in doc-comment");
|
||||
}
|
||||
}
|
||||
_ => ()
|
||||
_ => (),
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
@@ -465,21 +487,22 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
|
||||
Some(TokenAndSpan {
|
||||
tok: tok,
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos)
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos),
|
||||
})
|
||||
})
|
||||
} else {
|
||||
Some(TokenAndSpan {
|
||||
tok: token::Comment,
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos)
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos),
|
||||
})
|
||||
}
|
||||
};
|
||||
}
|
||||
Some('*') => {
|
||||
self.bump(); self.bump();
|
||||
self.bump();
|
||||
self.bump();
|
||||
self.scan_block_comment()
|
||||
}
|
||||
_ => None
|
||||
_ => None,
|
||||
}
|
||||
} else if self.curr_is('#') {
|
||||
if self.nextch_is('!') {
|
||||
@@ -498,10 +521,12 @@ fn scan_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
if loc.line == 1 && loc.col == CharPos(0) {
|
||||
// FIXME: Add shebang "token", return it
|
||||
let start = self.last_pos;
|
||||
while !self.curr_is('\n') && !self.is_eof() { self.bump(); }
|
||||
while !self.curr_is('\n') && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
return Some(TokenAndSpan {
|
||||
tok: token::Shebang(self.name_from(start)),
|
||||
sp: codemap::mk_sp(start, self.last_pos)
|
||||
sp: codemap::mk_sp(start, self.last_pos),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -521,18 +546,20 @@ fn scan_whitespace_or_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
let c = self.scan_comment();
|
||||
debug!("scanning a comment {:?}", c);
|
||||
c
|
||||
},
|
||||
}
|
||||
c if is_whitespace(Some(c)) => {
|
||||
let start_bpos = self.last_pos;
|
||||
while is_whitespace(self.curr) { self.bump(); }
|
||||
while is_whitespace(self.curr) {
|
||||
self.bump();
|
||||
}
|
||||
let c = Some(TokenAndSpan {
|
||||
tok: token::Whitespace,
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos)
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos),
|
||||
});
|
||||
debug!("scanning whitespace: {:?}", c);
|
||||
c
|
||||
},
|
||||
_ => None
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -567,7 +594,7 @@ fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
'\r' => {
|
||||
has_cr = true;
|
||||
}
|
||||
_ => ()
|
||||
_ => (),
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
@@ -576,17 +603,20 @@ fn scan_block_comment(&mut self) -> Option<TokenAndSpan> {
|
||||
// but comments with only "*"s between two "/"s are not
|
||||
let tok = if is_block_doc_comment(string) {
|
||||
let string = if has_cr {
|
||||
self.translate_crlf(start_bpos, string,
|
||||
self.translate_crlf(start_bpos,
|
||||
string,
|
||||
"bare CR not allowed in block doc-comment")
|
||||
} else { string.into() };
|
||||
} else {
|
||||
string.into()
|
||||
};
|
||||
token::DocComment(token::intern(&string[..]))
|
||||
} else {
|
||||
token::Comment
|
||||
};
|
||||
|
||||
Some(TokenAndSpan{
|
||||
Some(TokenAndSpan {
|
||||
tok: tok,
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos)
|
||||
sp: codemap::mk_sp(start_bpos, self.last_pos),
|
||||
})
|
||||
})
|
||||
}
|
||||
@@ -602,23 +632,27 @@ fn scan_digits(&mut self, real_radix: u32, scan_radix: u32) -> usize {
|
||||
let mut len = 0;
|
||||
loop {
|
||||
let c = self.curr;
|
||||
if c == Some('_') { debug!("skipping a _"); self.bump(); continue; }
|
||||
if c == Some('_') {
|
||||
debug!("skipping a _");
|
||||
self.bump();
|
||||
continue;
|
||||
}
|
||||
match c.and_then(|cc| cc.to_digit(scan_radix)) {
|
||||
Some(_) => {
|
||||
debug!("{:?} in scan_digits", c);
|
||||
// check that the hypothetical digit is actually
|
||||
// in range for the true radix
|
||||
if c.unwrap().to_digit(real_radix).is_none() {
|
||||
self.err_span_(self.last_pos, self.pos,
|
||||
&format!("invalid digit for a base {} literal",
|
||||
real_radix));
|
||||
self.err_span_(self.last_pos,
|
||||
self.pos,
|
||||
&format!("invalid digit for a base {} literal", real_radix));
|
||||
}
|
||||
len += 1;
|
||||
self.bump();
|
||||
}
|
||||
_ => return len
|
||||
_ => return len,
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// Lex a LIT_INTEGER or a LIT_FLOAT
|
||||
@@ -631,9 +665,21 @@ fn scan_number(&mut self, c: char) -> token::Lit {
|
||||
|
||||
if c == '0' {
|
||||
match self.curr.unwrap_or('\0') {
|
||||
'b' => { self.bump(); base = 2; num_digits = self.scan_digits(2, 10); }
|
||||
'o' => { self.bump(); base = 8; num_digits = self.scan_digits(8, 10); }
|
||||
'x' => { self.bump(); base = 16; num_digits = self.scan_digits(16, 16); }
|
||||
'b' => {
|
||||
self.bump();
|
||||
base = 2;
|
||||
num_digits = self.scan_digits(2, 10);
|
||||
}
|
||||
'o' => {
|
||||
self.bump();
|
||||
base = 8;
|
||||
num_digits = self.scan_digits(8, 10);
|
||||
}
|
||||
'x' => {
|
||||
self.bump();
|
||||
base = 16;
|
||||
num_digits = self.scan_digits(16, 16);
|
||||
}
|
||||
'0'...'9' | '_' | '.' => {
|
||||
num_digits = self.scan_digits(10, 10) + 1;
|
||||
}
|
||||
@@ -649,15 +695,19 @@ fn scan_number(&mut self, c: char) -> token::Lit {
|
||||
}
|
||||
|
||||
if num_digits == 0 {
|
||||
self.err_span_(start_bpos, self.last_pos, "no valid digits found for number");
|
||||
self.err_span_(start_bpos,
|
||||
self.last_pos,
|
||||
"no valid digits found for number");
|
||||
return token::Integer(token::intern("0"));
|
||||
}
|
||||
|
||||
// might be a float, but don't be greedy if this is actually an
|
||||
// integer literal followed by field/method access or a range pattern
|
||||
// (`0..2` and `12.foo()`)
|
||||
if self.curr_is('.') && !self.nextch_is('.') && !self.nextch().unwrap_or('\0')
|
||||
.is_xid_start() {
|
||||
if self.curr_is('.') && !self.nextch_is('.') &&
|
||||
!self.nextch()
|
||||
.unwrap_or('\0')
|
||||
.is_xid_start() {
|
||||
// might have stuff after the ., and if it does, it needs to start
|
||||
// with a number
|
||||
self.bump();
|
||||
@@ -683,11 +733,7 @@ fn scan_number(&mut self, c: char) -> token::Lit {
|
||||
|
||||
/// Scan over `n_digits` hex digits, stopping at `delim`, reporting an
|
||||
/// error if too many or too few digits are encountered.
|
||||
fn scan_hex_digits(&mut self,
|
||||
n_digits: usize,
|
||||
delim: char,
|
||||
below_0x7f_only: bool)
|
||||
-> bool {
|
||||
fn scan_hex_digits(&mut self, n_digits: usize, delim: char, below_0x7f_only: bool) -> bool {
|
||||
debug!("scanning {} digits until {:?}", n_digits, delim);
|
||||
let start_bpos = self.last_pos;
|
||||
let mut accum_int = 0;
|
||||
@@ -702,15 +748,19 @@ fn scan_hex_digits(&mut self,
|
||||
}
|
||||
if self.curr_is(delim) {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_(start_bpos, last_bpos, "numeric character escape is too short");
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"numeric character escape is too short");
|
||||
valid = false;
|
||||
break;
|
||||
}
|
||||
let c = self.curr.unwrap_or('\x00');
|
||||
accum_int *= 16;
|
||||
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
||||
self.err_span_char(self.last_pos, self.pos,
|
||||
"invalid character in numeric character escape", c);
|
||||
self.err_span_char(self.last_pos,
|
||||
self.pos,
|
||||
"invalid character in numeric character escape",
|
||||
c);
|
||||
|
||||
valid = false;
|
||||
0
|
||||
@@ -721,8 +771,8 @@ fn scan_hex_digits(&mut self,
|
||||
if below_0x7f_only && accum_int >= 0x80 {
|
||||
self.err_span_(start_bpos,
|
||||
self.last_pos,
|
||||
"this form of character escape may only be used \
|
||||
with characters in the range [\\x00-\\x7f]");
|
||||
"this form of character escape may only be used with characters in \
|
||||
the range [\\x00-\\x7f]");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
@@ -741,8 +791,12 @@ fn scan_hex_digits(&mut self,
|
||||
/// `start` is the position of `first_source_char`, which is already consumed.
|
||||
///
|
||||
/// Returns true if there was a valid char/byte, false otherwise.
|
||||
fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
ascii_only: bool, delim: char) -> bool {
|
||||
fn scan_char_or_byte(&mut self,
|
||||
start: BytePos,
|
||||
first_source_char: char,
|
||||
ascii_only: bool,
|
||||
delim: char)
|
||||
-> bool {
|
||||
match first_source_char {
|
||||
'\\' => {
|
||||
// '\X' for some X must be a character constant:
|
||||
@@ -750,7 +804,7 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
let escaped_pos = self.last_pos;
|
||||
self.bump();
|
||||
match escaped {
|
||||
None => {}, // EOF here is an error that will be checked later.
|
||||
None => {} // EOF here is an error that will be checked later.
|
||||
Some(e) => {
|
||||
return match e {
|
||||
'n' | 'r' | 't' | '\\' | '\'' | '"' | '0' => true,
|
||||
@@ -760,18 +814,19 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
self.scan_unicode_escape(delim) && !ascii_only
|
||||
} else {
|
||||
let span = codemap::mk_sp(start, self.last_pos);
|
||||
self.span_diagnostic.struct_span_err(span,
|
||||
"incorrect unicode escape sequence")
|
||||
self.span_diagnostic
|
||||
.struct_span_err(span, "incorrect unicode escape sequence")
|
||||
.span_help(span,
|
||||
"format of unicode escape sequences is `\\u{…}`")
|
||||
"format of unicode escape sequences is \
|
||||
`\\u{…}`")
|
||||
.emit();
|
||||
false
|
||||
};
|
||||
if ascii_only {
|
||||
self.err_span_(start, self.last_pos,
|
||||
"unicode escape sequences cannot be used as a byte or in \
|
||||
a byte string"
|
||||
);
|
||||
self.err_span_(start,
|
||||
self.last_pos,
|
||||
"unicode escape sequences cannot be used as a \
|
||||
byte or in a byte string");
|
||||
}
|
||||
valid
|
||||
|
||||
@@ -779,27 +834,32 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
'\n' if delim == '"' => {
|
||||
self.consume_whitespace();
|
||||
true
|
||||
},
|
||||
}
|
||||
'\r' if delim == '"' && self.curr_is('\n') => {
|
||||
self.consume_whitespace();
|
||||
true
|
||||
}
|
||||
c => {
|
||||
let last_pos = self.last_pos;
|
||||
let mut err = self.struct_err_span_char(
|
||||
escaped_pos, last_pos,
|
||||
if ascii_only { "unknown byte escape" }
|
||||
else { "unknown character escape" },
|
||||
c);
|
||||
let mut err = self.struct_err_span_char(escaped_pos,
|
||||
last_pos,
|
||||
if ascii_only {
|
||||
"unknown byte escape"
|
||||
} else {
|
||||
"unknown character \
|
||||
escape"
|
||||
},
|
||||
c);
|
||||
if e == '\r' {
|
||||
err.span_help(codemap::mk_sp(escaped_pos, last_pos),
|
||||
"this is an isolated carriage return; consider checking \
|
||||
your editor and version control settings");
|
||||
"this is an isolated carriage return; consider \
|
||||
checking your editor and version control \
|
||||
settings");
|
||||
}
|
||||
if (e == '{' || e == '}') && !ascii_only {
|
||||
err.span_help(codemap::mk_sp(escaped_pos, last_pos),
|
||||
"if used in a formatting string, \
|
||||
curly braces are escaped with `{{` and `}}`");
|
||||
"if used in a formatting string, curly braces \
|
||||
are escaped with `{{` and `}}`");
|
||||
}
|
||||
err.emit();
|
||||
false
|
||||
@@ -810,11 +870,14 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
}
|
||||
'\t' | '\n' | '\r' | '\'' if delim == '\'' => {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
if ascii_only { "byte constant must be escaped" }
|
||||
else { "character constant must be escaped" },
|
||||
first_source_char);
|
||||
self.err_span_char(start,
|
||||
last_pos,
|
||||
if ascii_only {
|
||||
"byte constant must be escaped"
|
||||
} else {
|
||||
"character constant must be escaped"
|
||||
},
|
||||
first_source_char);
|
||||
return false;
|
||||
}
|
||||
'\r' => {
|
||||
@@ -822,18 +885,22 @@ fn scan_char_or_byte(&mut self, start: BytePos, first_source_char: char,
|
||||
self.bump();
|
||||
return true;
|
||||
} else {
|
||||
self.err_span_(start, self.last_pos,
|
||||
self.err_span_(start,
|
||||
self.last_pos,
|
||||
"bare CR not allowed in string, use \\r instead");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
_ => if ascii_only && first_source_char > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
start, last_pos,
|
||||
"byte constant must be ASCII. \
|
||||
Use a \\xHH escape for a non-ASCII byte", first_source_char);
|
||||
return false;
|
||||
_ => {
|
||||
if ascii_only && first_source_char > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(start,
|
||||
last_pos,
|
||||
"byte constant must be ASCII. Use a \\xHH escape for a \
|
||||
non-ASCII byte",
|
||||
first_source_char);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
@@ -854,18 +921,22 @@ fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
||||
let c = match self.curr {
|
||||
Some(c) => c,
|
||||
None => {
|
||||
panic!(self.fatal_span_(start_bpos, self.last_pos,
|
||||
panic!(self.fatal_span_(start_bpos,
|
||||
self.last_pos,
|
||||
"unterminated unicode escape (found EOF)"));
|
||||
}
|
||||
};
|
||||
accum_int *= 16;
|
||||
accum_int += c.to_digit(16).unwrap_or_else(|| {
|
||||
if c == delim {
|
||||
panic!(self.fatal_span_(self.last_pos, self.pos,
|
||||
panic!(self.fatal_span_(self.last_pos,
|
||||
self.pos,
|
||||
"unterminated unicode escape (needed a `}`)"));
|
||||
} else {
|
||||
self.err_span_char(self.last_pos, self.pos,
|
||||
"invalid character in unicode escape", c);
|
||||
self.err_span_char(self.last_pos,
|
||||
self.pos,
|
||||
"invalid character in unicode escape",
|
||||
c);
|
||||
}
|
||||
valid = false;
|
||||
0
|
||||
@@ -875,13 +946,16 @@ fn scan_unicode_escape(&mut self, delim: char) -> bool {
|
||||
}
|
||||
|
||||
if count > 6 {
|
||||
self.err_span_(start_bpos, self.last_pos,
|
||||
"overlong unicode escape (can have at most 6 hex digits)");
|
||||
self.err_span_(start_bpos,
|
||||
self.last_pos,
|
||||
"overlong unicode escape (can have at most 6 hex digits)");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
if valid && (char::from_u32(accum_int).is_none() || count == 0) {
|
||||
self.err_span_(start_bpos, self.last_pos, "invalid unicode character escape");
|
||||
self.err_span_(start_bpos,
|
||||
self.last_pos,
|
||||
"invalid unicode character escape");
|
||||
valid = false;
|
||||
}
|
||||
|
||||
@@ -897,7 +971,9 @@ fn scan_float_exponent(&mut self) {
|
||||
self.bump();
|
||||
}
|
||||
if self.scan_digits(10, 10) == 0 {
|
||||
self.err_span_(self.last_pos, self.pos, "expected at least one digit in exponent")
|
||||
self.err_span_(self.last_pos,
|
||||
self.pos,
|
||||
"expected at least one digit in exponent")
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -906,11 +982,22 @@ fn scan_float_exponent(&mut self) {
|
||||
/// error if it isn't.
|
||||
fn check_float_base(&mut self, start_bpos: BytePos, last_bpos: BytePos, base: usize) {
|
||||
match base {
|
||||
16 => self.err_span_(start_bpos, last_bpos, "hexadecimal float literal is not \
|
||||
supported"),
|
||||
8 => self.err_span_(start_bpos, last_bpos, "octal float literal is not supported"),
|
||||
2 => self.err_span_(start_bpos, last_bpos, "binary float literal is not supported"),
|
||||
_ => ()
|
||||
16 => {
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"hexadecimal float literal is not supported")
|
||||
}
|
||||
8 => {
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"octal float literal is not supported")
|
||||
}
|
||||
2 => {
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"binary float literal is not supported")
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -928,14 +1015,18 @@ fn binop(&mut self, op: token::BinOpToken) -> token::Token {
|
||||
/// token, and updates the interner
|
||||
fn next_token_inner(&mut self) -> token::Token {
|
||||
let c = self.curr;
|
||||
if ident_start(c) && match (c.unwrap(), self.nextch(), self.nextnextch()) {
|
||||
if ident_start(c) &&
|
||||
match (c.unwrap(), self.nextch(), self.nextnextch()) {
|
||||
// Note: r as in r" or r#" is part of a raw string literal,
|
||||
// b as in b' is part of a byte literal.
|
||||
// They are not identifiers, and are handled further down.
|
||||
('r', Some('"'), _) | ('r', Some('#'), _) |
|
||||
('b', Some('"'), _) | ('b', Some('\''), _) |
|
||||
('b', Some('r'), Some('"')) | ('b', Some('r'), Some('#')) => false,
|
||||
_ => true
|
||||
('r', Some('"'), _) |
|
||||
('r', Some('#'), _) |
|
||||
('b', Some('"'), _) |
|
||||
('b', Some('\''), _) |
|
||||
('b', Some('r'), Some('"')) |
|
||||
('b', Some('r'), Some('#')) => false,
|
||||
_ => true,
|
||||
} {
|
||||
let start = self.last_pos;
|
||||
while ident_continue(self.curr) {
|
||||
@@ -960,299 +1051,393 @@ fn next_token_inner(&mut self) -> token::Token {
|
||||
let num = self.scan_number(c.unwrap());
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
debug!("next_token_inner: scanned number {:?}, {:?}", num, suffix);
|
||||
return token::Literal(num, suffix)
|
||||
return token::Literal(num, suffix);
|
||||
}
|
||||
|
||||
match c.expect("next_token_inner called at EOF") {
|
||||
// One-byte tokens.
|
||||
';' => { self.bump(); return token::Semi; }
|
||||
',' => { self.bump(); return token::Comma; }
|
||||
'.' => {
|
||||
self.bump();
|
||||
return if self.curr_is('.') {
|
||||
self.bump();
|
||||
if self.curr_is('.') {
|
||||
self.bump();
|
||||
token::DotDotDot
|
||||
} else {
|
||||
token::DotDot
|
||||
}
|
||||
} else {
|
||||
token::Dot
|
||||
};
|
||||
}
|
||||
'(' => { self.bump(); return token::OpenDelim(token::Paren); }
|
||||
')' => { self.bump(); return token::CloseDelim(token::Paren); }
|
||||
'{' => { self.bump(); return token::OpenDelim(token::Brace); }
|
||||
'}' => { self.bump(); return token::CloseDelim(token::Brace); }
|
||||
'[' => { self.bump(); return token::OpenDelim(token::Bracket); }
|
||||
']' => { self.bump(); return token::CloseDelim(token::Bracket); }
|
||||
'@' => { self.bump(); return token::At; }
|
||||
'#' => { self.bump(); return token::Pound; }
|
||||
'~' => { self.bump(); return token::Tilde; }
|
||||
'?' => { self.bump(); return token::Question; }
|
||||
':' => {
|
||||
self.bump();
|
||||
if self.curr_is(':') {
|
||||
// One-byte tokens.
|
||||
';' => {
|
||||
self.bump();
|
||||
return token::ModSep;
|
||||
} else {
|
||||
return token::Colon;
|
||||
return token::Semi;
|
||||
}
|
||||
}
|
||||
|
||||
'$' => { self.bump(); return token::Dollar; }
|
||||
|
||||
// Multi-byte tokens.
|
||||
'=' => {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
',' => {
|
||||
self.bump();
|
||||
return token::EqEq;
|
||||
} else if self.curr_is('>') {
|
||||
self.bump();
|
||||
return token::FatArrow;
|
||||
} else {
|
||||
return token::Eq;
|
||||
return token::Comma;
|
||||
}
|
||||
}
|
||||
'!' => {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
'.' => {
|
||||
self.bump();
|
||||
return token::Ne;
|
||||
} else { return token::Not; }
|
||||
}
|
||||
'<' => {
|
||||
self.bump();
|
||||
match self.curr.unwrap_or('\x00') {
|
||||
'=' => { self.bump(); return token::Le; }
|
||||
'<' => { return self.binop(token::Shl); }
|
||||
'-' => {
|
||||
return if self.curr_is('.') {
|
||||
self.bump();
|
||||
if self.curr_is('.') {
|
||||
self.bump();
|
||||
token::DotDotDot
|
||||
} else {
|
||||
token::DotDot
|
||||
}
|
||||
} else {
|
||||
token::Dot
|
||||
};
|
||||
}
|
||||
'(' => {
|
||||
self.bump();
|
||||
return token::OpenDelim(token::Paren);
|
||||
}
|
||||
')' => {
|
||||
self.bump();
|
||||
return token::CloseDelim(token::Paren);
|
||||
}
|
||||
'{' => {
|
||||
self.bump();
|
||||
return token::OpenDelim(token::Brace);
|
||||
}
|
||||
'}' => {
|
||||
self.bump();
|
||||
return token::CloseDelim(token::Brace);
|
||||
}
|
||||
'[' => {
|
||||
self.bump();
|
||||
return token::OpenDelim(token::Bracket);
|
||||
}
|
||||
']' => {
|
||||
self.bump();
|
||||
return token::CloseDelim(token::Bracket);
|
||||
}
|
||||
'@' => {
|
||||
self.bump();
|
||||
return token::At;
|
||||
}
|
||||
'#' => {
|
||||
self.bump();
|
||||
return token::Pound;
|
||||
}
|
||||
'~' => {
|
||||
self.bump();
|
||||
return token::Tilde;
|
||||
}
|
||||
'?' => {
|
||||
self.bump();
|
||||
return token::Question;
|
||||
}
|
||||
':' => {
|
||||
self.bump();
|
||||
if self.curr_is(':') {
|
||||
self.bump();
|
||||
return token::ModSep;
|
||||
} else {
|
||||
return token::Colon;
|
||||
}
|
||||
}
|
||||
|
||||
'$' => {
|
||||
self.bump();
|
||||
return token::Dollar;
|
||||
}
|
||||
|
||||
// Multi-byte tokens.
|
||||
'=' => {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
self.bump();
|
||||
return token::EqEq;
|
||||
} else if self.curr_is('>') {
|
||||
self.bump();
|
||||
return token::FatArrow;
|
||||
} else {
|
||||
return token::Eq;
|
||||
}
|
||||
}
|
||||
'!' => {
|
||||
self.bump();
|
||||
if self.curr_is('=') {
|
||||
self.bump();
|
||||
return token::Ne;
|
||||
} else {
|
||||
return token::Not;
|
||||
}
|
||||
}
|
||||
'<' => {
|
||||
self.bump();
|
||||
match self.curr.unwrap_or('\x00') {
|
||||
_ => { return token::LArrow; }
|
||||
'=' => {
|
||||
self.bump();
|
||||
return token::Le;
|
||||
}
|
||||
'<' => {
|
||||
return self.binop(token::Shl);
|
||||
}
|
||||
'-' => {
|
||||
self.bump();
|
||||
match self.curr.unwrap_or('\x00') {
|
||||
_ => {
|
||||
return token::LArrow;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return token::Lt;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => { return token::Lt; }
|
||||
}
|
||||
}
|
||||
'>' => {
|
||||
self.bump();
|
||||
match self.curr.unwrap_or('\x00') {
|
||||
'=' => { self.bump(); return token::Ge; }
|
||||
'>' => { return self.binop(token::Shr); }
|
||||
_ => { return token::Gt; }
|
||||
}
|
||||
}
|
||||
'\'' => {
|
||||
// Either a character constant 'a' OR a lifetime name 'abc
|
||||
self.bump();
|
||||
let start = self.last_pos;
|
||||
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let c2 = self.curr.unwrap_or('\x00');
|
||||
self.bump();
|
||||
|
||||
// If the character is an ident start not followed by another single
|
||||
// quote, then this is a lifetime name:
|
||||
if ident_start(Some(c2)) && !self.curr_is('\'') {
|
||||
while ident_continue(self.curr) {
|
||||
self.bump();
|
||||
'>' => {
|
||||
self.bump();
|
||||
match self.curr.unwrap_or('\x00') {
|
||||
'=' => {
|
||||
self.bump();
|
||||
return token::Ge;
|
||||
}
|
||||
'>' => {
|
||||
return self.binop(token::Shr);
|
||||
}
|
||||
_ => {
|
||||
return token::Gt;
|
||||
}
|
||||
}
|
||||
}
|
||||
'\'' => {
|
||||
// Either a character constant 'a' OR a lifetime name 'abc
|
||||
self.bump();
|
||||
let start = self.last_pos;
|
||||
|
||||
// Include the leading `'` in the real identifier, for macro
|
||||
// expansion purposes. See #12512 for the gory details of why
|
||||
// this is necessary.
|
||||
let ident = self.with_str_from(start, |lifetime_name| {
|
||||
str_to_ident(&format!("'{}", lifetime_name))
|
||||
});
|
||||
// the eof will be picked up by the final `'` check below
|
||||
let c2 = self.curr.unwrap_or('\x00');
|
||||
self.bump();
|
||||
|
||||
// Conjure up a "keyword checking ident" to make sure that
|
||||
// the lifetime name is not a keyword.
|
||||
let keyword_checking_ident =
|
||||
self.with_str_from(start, |lifetime_name| {
|
||||
// If the character is an ident start not followed by another single
|
||||
// quote, then this is a lifetime name:
|
||||
if ident_start(Some(c2)) && !self.curr_is('\'') {
|
||||
while ident_continue(self.curr) {
|
||||
self.bump();
|
||||
}
|
||||
|
||||
// Include the leading `'` in the real identifier, for macro
|
||||
// expansion purposes. See #12512 for the gory details of why
|
||||
// this is necessary.
|
||||
let ident = self.with_str_from(start, |lifetime_name| {
|
||||
str_to_ident(&format!("'{}", lifetime_name))
|
||||
});
|
||||
|
||||
// Conjure up a "keyword checking ident" to make sure that
|
||||
// the lifetime name is not a keyword.
|
||||
let keyword_checking_ident = self.with_str_from(start, |lifetime_name| {
|
||||
str_to_ident(lifetime_name)
|
||||
});
|
||||
let keyword_checking_token =
|
||||
&token::Ident(keyword_checking_ident, token::Plain);
|
||||
let last_bpos = self.last_pos;
|
||||
if keyword_checking_token.is_keyword(token::keywords::SelfValue) {
|
||||
self.err_span_(start,
|
||||
last_bpos,
|
||||
"invalid lifetime name: 'self \
|
||||
is no longer a special lifetime");
|
||||
} else if keyword_checking_token.is_any_keyword() &&
|
||||
!keyword_checking_token.is_keyword(token::keywords::Static)
|
||||
{
|
||||
self.err_span_(start,
|
||||
last_bpos,
|
||||
"invalid lifetime name");
|
||||
}
|
||||
return token::Lifetime(ident);
|
||||
}
|
||||
|
||||
// Otherwise it is a character constant:
|
||||
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ false, '\'');
|
||||
if !self.curr_is('\'') {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_verbose(
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` is an
|
||||
// ascii single quote.
|
||||
start - BytePos(1), last_bpos,
|
||||
|
||||
String::from("character literal may only contain one codepoint")));
|
||||
}
|
||||
let id = if valid { self.name_from(start) } else { token::intern("0") };
|
||||
self.bump(); // advance curr past token
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::Char(id), suffix);
|
||||
}
|
||||
'b' => {
|
||||
self.bump();
|
||||
let lit = match self.curr {
|
||||
Some('\'') => self.scan_byte(),
|
||||
Some('"') => self.scan_byte_string(),
|
||||
Some('r') => self.scan_raw_byte_string(),
|
||||
_ => unreachable!() // Should have been a token::Ident above.
|
||||
};
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(lit, suffix);
|
||||
}
|
||||
'"' => {
|
||||
let start_bpos = self.last_pos;
|
||||
let mut valid = true;
|
||||
self.bump();
|
||||
while !self.curr_is('"') {
|
||||
if self.is_eof() {
|
||||
let keyword_checking_token = &token::Ident(keyword_checking_ident,
|
||||
token::Plain);
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos,
|
||||
last_bpos,
|
||||
"unterminated double quote string"));
|
||||
if keyword_checking_token.is_keyword(token::keywords::SelfValue) {
|
||||
self.err_span_(start,
|
||||
last_bpos,
|
||||
"invalid lifetime name: 'self is no longer a special \
|
||||
lifetime");
|
||||
} else if keyword_checking_token.is_any_keyword() &&
|
||||
!keyword_checking_token.is_keyword(token::keywords::Static) {
|
||||
self.err_span_(start, last_bpos, "invalid lifetime name");
|
||||
}
|
||||
return token::Lifetime(ident);
|
||||
}
|
||||
|
||||
let ch_start = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
self.bump();
|
||||
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ false, '"');
|
||||
}
|
||||
// adjust for the ASCII " at the start of the literal
|
||||
let id = if valid { self.name_from(start_bpos + BytePos(1)) }
|
||||
else { token::intern("??") };
|
||||
self.bump();
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::Str_(id), suffix);
|
||||
}
|
||||
'r' => {
|
||||
let start_bpos = self.last_pos;
|
||||
self.bump();
|
||||
let mut hash_count = 0;
|
||||
while self.curr_is('#') {
|
||||
self.bump();
|
||||
hash_count += 1;
|
||||
}
|
||||
// Otherwise it is a character constant:
|
||||
let valid = self.scan_char_or_byte(start,
|
||||
c2,
|
||||
// ascii_only =
|
||||
false,
|
||||
'\'');
|
||||
if !self.curr_is('\'') {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_verbose(// Byte offsetting here is okay because the
|
||||
// character before position `start` is an
|
||||
// ascii single quote.
|
||||
start - BytePos(1),
|
||||
last_bpos,
|
||||
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
|
||||
} else if !self.curr_is('"') {
|
||||
let last_bpos = self.last_pos;
|
||||
let curr_char = self.curr.unwrap();
|
||||
panic!(self.fatal_span_char(start_bpos, last_bpos,
|
||||
"found invalid character; \
|
||||
only `#` is allowed in raw string delimitation",
|
||||
curr_char));
|
||||
String::from("character literal may only \
|
||||
contain one codepoint")));
|
||||
}
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
token::intern("0")
|
||||
};
|
||||
self.bump(); // advance curr past token
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::Char(id), suffix);
|
||||
}
|
||||
self.bump();
|
||||
let content_start_bpos = self.last_pos;
|
||||
let mut content_end_bpos;
|
||||
let mut valid = true;
|
||||
'outer: loop {
|
||||
'b' => {
|
||||
self.bump();
|
||||
let lit = match self.curr {
|
||||
Some('\'') => self.scan_byte(),
|
||||
Some('"') => self.scan_byte_string(),
|
||||
Some('r') => self.scan_raw_byte_string(),
|
||||
_ => unreachable!(), // Should have been a token::Ident above.
|
||||
};
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(lit, suffix);
|
||||
}
|
||||
'"' => {
|
||||
let start_bpos = self.last_pos;
|
||||
let mut valid = true;
|
||||
self.bump();
|
||||
while !self.curr_is('"') {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos,
|
||||
last_bpos,
|
||||
"unterminated double quote string"));
|
||||
}
|
||||
|
||||
let ch_start = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
self.bump();
|
||||
valid &= self.scan_char_or_byte(ch_start,
|
||||
ch,
|
||||
// ascii_only =
|
||||
false,
|
||||
'"');
|
||||
}
|
||||
// adjust for the ASCII " at the start of the literal
|
||||
let id = if valid {
|
||||
self.name_from(start_bpos + BytePos(1))
|
||||
} else {
|
||||
token::intern("??")
|
||||
};
|
||||
self.bump();
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::Str_(id), suffix);
|
||||
}
|
||||
'r' => {
|
||||
let start_bpos = self.last_pos;
|
||||
self.bump();
|
||||
let mut hash_count = 0;
|
||||
while self.curr_is('#') {
|
||||
self.bump();
|
||||
hash_count += 1;
|
||||
}
|
||||
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
|
||||
} else if !self.curr_is('"') {
|
||||
let last_bpos = self.last_pos;
|
||||
let curr_char = self.curr.unwrap();
|
||||
panic!(self.fatal_span_char(start_bpos,
|
||||
last_bpos,
|
||||
"found invalid character; only `#` is allowed \
|
||||
in raw string delimitation",
|
||||
curr_char));
|
||||
}
|
||||
//if self.curr_is('"') {
|
||||
//content_end_bpos = self.last_pos;
|
||||
//for _ in 0..hash_count {
|
||||
//self.bump();
|
||||
//if !self.curr_is('#') {
|
||||
//continue 'outer;
|
||||
let c = self.curr.unwrap();
|
||||
match c {
|
||||
'"' => {
|
||||
content_end_bpos = self.last_pos;
|
||||
for _ in 0..hash_count {
|
||||
self.bump();
|
||||
if !self.curr_is('#') {
|
||||
continue 'outer;
|
||||
self.bump();
|
||||
let content_start_bpos = self.last_pos;
|
||||
let mut content_end_bpos;
|
||||
let mut valid = true;
|
||||
'outer: loop {
|
||||
if self.is_eof() {
|
||||
let last_bpos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos, last_bpos, "unterminated raw string"));
|
||||
}
|
||||
// if self.curr_is('"') {
|
||||
// content_end_bpos = self.last_pos;
|
||||
// for _ in 0..hash_count {
|
||||
// self.bump();
|
||||
// if !self.curr_is('#') {
|
||||
// continue 'outer;
|
||||
let c = self.curr.unwrap();
|
||||
match c {
|
||||
'"' => {
|
||||
content_end_bpos = self.last_pos;
|
||||
for _ in 0..hash_count {
|
||||
self.bump();
|
||||
if !self.curr_is('#') {
|
||||
continue 'outer;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
'\r' => {
|
||||
if !self.nextch_is('\n') {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_(start_bpos,
|
||||
last_bpos,
|
||||
"bare CR not allowed in raw string, use \\r \
|
||||
instead");
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
break;
|
||||
},
|
||||
'\r' => {
|
||||
if !self.nextch_is('\n') {
|
||||
let last_bpos = self.last_pos;
|
||||
self.err_span_(start_bpos, last_bpos, "bare CR not allowed in raw \
|
||||
string, use \\r instead");
|
||||
valid = false;
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
_ => ()
|
||||
self.bump();
|
||||
}
|
||||
self.bump();
|
||||
let id = if valid {
|
||||
self.name_from_to(content_start_bpos, content_end_bpos)
|
||||
} else {
|
||||
token::intern("??")
|
||||
};
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::StrRaw(id, hash_count), suffix);
|
||||
}
|
||||
self.bump();
|
||||
let id = if valid {
|
||||
self.name_from_to(content_start_bpos, content_end_bpos)
|
||||
} else {
|
||||
token::intern("??")
|
||||
};
|
||||
let suffix = self.scan_optional_raw_name();
|
||||
return token::Literal(token::StrRaw(id, hash_count), suffix);
|
||||
}
|
||||
'-' => {
|
||||
if self.nextch_is('>') {
|
||||
self.bump();
|
||||
self.bump();
|
||||
return token::RArrow;
|
||||
} else { return self.binop(token::Minus); }
|
||||
}
|
||||
'&' => {
|
||||
if self.nextch_is('&') {
|
||||
self.bump();
|
||||
self.bump();
|
||||
return token::AndAnd;
|
||||
} else { return self.binop(token::And); }
|
||||
}
|
||||
'|' => {
|
||||
match self.nextch() {
|
||||
Some('|') => { self.bump(); self.bump(); return token::OrOr; }
|
||||
_ => { return self.binop(token::Or); }
|
||||
'-' => {
|
||||
if self.nextch_is('>') {
|
||||
self.bump();
|
||||
self.bump();
|
||||
return token::RArrow;
|
||||
} else {
|
||||
return self.binop(token::Minus);
|
||||
}
|
||||
}
|
||||
'&' => {
|
||||
if self.nextch_is('&') {
|
||||
self.bump();
|
||||
self.bump();
|
||||
return token::AndAnd;
|
||||
} else {
|
||||
return self.binop(token::And);
|
||||
}
|
||||
}
|
||||
'|' => {
|
||||
match self.nextch() {
|
||||
Some('|') => {
|
||||
self.bump();
|
||||
self.bump();
|
||||
return token::OrOr;
|
||||
}
|
||||
_ => {
|
||||
return self.binop(token::Or);
|
||||
}
|
||||
}
|
||||
}
|
||||
'+' => {
|
||||
return self.binop(token::Plus);
|
||||
}
|
||||
'*' => {
|
||||
return self.binop(token::Star);
|
||||
}
|
||||
'/' => {
|
||||
return self.binop(token::Slash);
|
||||
}
|
||||
'^' => {
|
||||
return self.binop(token::Caret);
|
||||
}
|
||||
'%' => {
|
||||
return self.binop(token::Percent);
|
||||
}
|
||||
c => {
|
||||
let last_bpos = self.last_pos;
|
||||
let bpos = self.pos;
|
||||
let mut err = self.struct_fatal_span_char(last_bpos,
|
||||
bpos,
|
||||
"unknown start of token",
|
||||
c);
|
||||
unicode_chars::check_for_substitution(&self, c, &mut err);
|
||||
err.emit();
|
||||
panic!(FatalError);
|
||||
}
|
||||
}
|
||||
'+' => { return self.binop(token::Plus); }
|
||||
'*' => { return self.binop(token::Star); }
|
||||
'/' => { return self.binop(token::Slash); }
|
||||
'^' => { return self.binop(token::Caret); }
|
||||
'%' => { return self.binop(token::Percent); }
|
||||
c => {
|
||||
let last_bpos = self.last_pos;
|
||||
let bpos = self.pos;
|
||||
let mut err = self.struct_fatal_span_char(last_bpos,
|
||||
bpos,
|
||||
"unknown start of token",
|
||||
c);
|
||||
unicode_chars::check_for_substitution(&self, c, &mut err);
|
||||
err.emit();
|
||||
panic!(FatalError);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn consume_whitespace(&mut self) {
|
||||
while is_whitespace(self.curr) && !self.is_eof() { self.bump(); }
|
||||
while is_whitespace(self.curr) && !self.is_eof() {
|
||||
self.bump();
|
||||
}
|
||||
}
|
||||
|
||||
fn read_to_eol(&mut self) -> String {
|
||||
@@ -1261,14 +1446,16 @@ fn read_to_eol(&mut self) -> String {
|
||||
val.push(self.curr.unwrap());
|
||||
self.bump();
|
||||
}
|
||||
if self.curr_is('\n') { self.bump(); }
|
||||
return val
|
||||
if self.curr_is('\n') {
|
||||
self.bump();
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
fn read_one_line_comment(&mut self) -> String {
|
||||
let val = self.read_to_eol();
|
||||
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/')
|
||||
|| (val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
|
||||
assert!((val.as_bytes()[0] == b'/' && val.as_bytes()[1] == b'/') ||
|
||||
(val.as_bytes()[0] == b'#' && val.as_bytes()[1] == b'!'));
|
||||
return val;
|
||||
}
|
||||
|
||||
@@ -1279,10 +1466,9 @@ fn consume_non_eol_whitespace(&mut self) {
|
||||
}
|
||||
|
||||
fn peeking_at_comment(&self) -> bool {
|
||||
(self.curr_is('/') && self.nextch_is('/'))
|
||||
|| (self.curr_is('/') && self.nextch_is('*'))
|
||||
// consider shebangs comments, but not inner attributes
|
||||
|| (self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
||||
(self.curr_is('/') && self.nextch_is('/')) || (self.curr_is('/') && self.nextch_is('*')) ||
|
||||
// consider shebangs comments, but not inner attributes
|
||||
(self.curr_is('#') && self.nextch_is('!') && !self.nextnextch_is('['))
|
||||
}
|
||||
|
||||
fn scan_byte(&mut self) -> token::Lit {
|
||||
@@ -1293,18 +1479,26 @@ fn scan_byte(&mut self) -> token::Lit {
|
||||
let c2 = self.curr.unwrap_or('\x00');
|
||||
self.bump();
|
||||
|
||||
let valid = self.scan_char_or_byte(start, c2, /* ascii_only = */ true, '\'');
|
||||
let valid = self.scan_char_or_byte(start,
|
||||
c2,
|
||||
// ascii_only =
|
||||
true,
|
||||
'\'');
|
||||
if !self.curr_is('\'') {
|
||||
// Byte offsetting here is okay because the
|
||||
// character before position `start` are an
|
||||
// ascii single quote and ascii 'b'.
|
||||
let last_pos = self.last_pos;
|
||||
panic!(self.fatal_span_verbose(
|
||||
start - BytePos(2), last_pos,
|
||||
"unterminated byte constant".to_string()));
|
||||
panic!(self.fatal_span_verbose(start - BytePos(2),
|
||||
last_pos,
|
||||
"unterminated byte constant".to_string()));
|
||||
}
|
||||
|
||||
let id = if valid { self.name_from(start) } else { token::intern("?") };
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
token::intern("?")
|
||||
};
|
||||
self.bump(); // advance curr past token
|
||||
return token::Byte(id);
|
||||
}
|
||||
@@ -1327,9 +1521,17 @@ fn scan_byte_string(&mut self) -> token::Lit {
|
||||
let ch_start = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
self.bump();
|
||||
valid &= self.scan_char_or_byte(ch_start, ch, /* ascii_only = */ true, '"');
|
||||
valid &= self.scan_char_or_byte(ch_start,
|
||||
ch,
|
||||
// ascii_only =
|
||||
true,
|
||||
'"');
|
||||
}
|
||||
let id = if valid { self.name_from(start) } else { token::intern("??") };
|
||||
let id = if valid {
|
||||
self.name_from(start)
|
||||
} else {
|
||||
token::intern("??")
|
||||
};
|
||||
self.bump();
|
||||
return token::ByteStr(id);
|
||||
}
|
||||
@@ -1349,10 +1551,11 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
|
||||
} else if !self.curr_is('"') {
|
||||
let last_pos = self.last_pos;
|
||||
let ch = self.curr.unwrap();
|
||||
panic!(self.fatal_span_char(start_bpos, last_pos,
|
||||
"found invalid character; \
|
||||
only `#` is allowed in raw string delimitation",
|
||||
ch));
|
||||
panic!(self.fatal_span_char(start_bpos,
|
||||
last_pos,
|
||||
"found invalid character; only `#` is allowed in raw \
|
||||
string delimitation",
|
||||
ch));
|
||||
}
|
||||
self.bump();
|
||||
let content_start_bpos = self.last_pos;
|
||||
@@ -1362,7 +1565,7 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
|
||||
None => {
|
||||
let last_pos = self.last_pos;
|
||||
panic!(self.fatal_span_(start_bpos, last_pos, "unterminated raw string"))
|
||||
},
|
||||
}
|
||||
Some('"') => {
|
||||
content_end_bpos = self.last_pos;
|
||||
for _ in 0..hash_count {
|
||||
@@ -1372,70 +1575,72 @@ fn scan_raw_byte_string(&mut self) -> token::Lit {
|
||||
}
|
||||
}
|
||||
break;
|
||||
},
|
||||
Some(c) => if c > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(
|
||||
last_pos, last_pos, "raw byte string must be ASCII", c);
|
||||
}
|
||||
Some(c) => {
|
||||
if c > '\x7F' {
|
||||
let last_pos = self.last_pos;
|
||||
self.err_span_char(last_pos, last_pos, "raw byte string must be ASCII", c);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.bump();
|
||||
}
|
||||
self.bump();
|
||||
return token::ByteStrRaw(self.name_from_to(content_start_bpos,
|
||||
content_end_bpos),
|
||||
hash_count);
|
||||
return token::ByteStrRaw(self.name_from_to(content_start_bpos, content_end_bpos),
|
||||
hash_count);
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_whitespace(c: Option<char>) -> bool {
|
||||
match c.unwrap_or('\x00') { // None can be null for now... it's not whitespace
|
||||
' ' | '\n' | '\t' | '\r' => true,
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn in_range(c: Option<char>, lo: char, hi: char) -> bool {
|
||||
match c {
|
||||
Some(c) => lo <= c && c <= hi,
|
||||
_ => false
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_dec_digit(c: Option<char>) -> bool { return in_range(c, '0', '9'); }
|
||||
fn is_dec_digit(c: Option<char>) -> bool {
|
||||
return in_range(c, '0', '9');
|
||||
}
|
||||
|
||||
pub fn is_doc_comment(s: &str) -> bool {
|
||||
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/')
|
||||
|| s.starts_with("//!");
|
||||
let res = (s.starts_with("///") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'/') ||
|
||||
s.starts_with("//!");
|
||||
debug!("is {:?} a doc comment? {}", s, res);
|
||||
res
|
||||
}
|
||||
|
||||
pub fn is_block_doc_comment(s: &str) -> bool {
|
||||
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*')
|
||||
|| s.starts_with("/*!"))
|
||||
&& s.len() >= 5; // Prevent `/**/` from being parsed as a doc comment
|
||||
// Prevent `/**/` from being parsed as a doc comment
|
||||
let res = ((s.starts_with("/**") && *s.as_bytes().get(3).unwrap_or(&b' ') != b'*') ||
|
||||
s.starts_with("/*!")) && s.len() >= 5;
|
||||
debug!("is {:?} a doc comment? {}", s, res);
|
||||
res
|
||||
}
|
||||
|
||||
fn ident_start(c: Option<char>) -> bool {
|
||||
let c = match c { Some(c) => c, None => return false };
|
||||
let c = match c {
|
||||
Some(c) => c,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && c.is_xid_start())
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' || (c > '\x7f' && c.is_xid_start())
|
||||
}
|
||||
|
||||
fn ident_continue(c: Option<char>) -> bool {
|
||||
let c = match c { Some(c) => c, None => return false };
|
||||
let c = match c {
|
||||
Some(c) => c,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
(c >= 'a' && c <= 'z')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= '0' && c <= '9')
|
||||
|| c == '_'
|
||||
|| (c > '\x7f' && c.is_xid_continue())
|
||||
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_' ||
|
||||
(c > '\x7f' && c.is_xid_continue())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1445,7 +1650,7 @@ mod tests {
|
||||
use codemap::{BytePos, CodeMap, Span, NO_EXPANSION};
|
||||
use errors;
|
||||
use parse::token;
|
||||
use parse::token::{str_to_ident};
|
||||
use parse::token::str_to_ident;
|
||||
use std::io;
|
||||
use std::rc::Rc;
|
||||
|
||||
@@ -1458,41 +1663,54 @@ fn mk_sh(cm: Rc<CodeMap>) -> errors::Handler {
|
||||
// open a string reader for the given string
|
||||
fn setup<'a>(cm: &CodeMap,
|
||||
span_handler: &'a errors::Handler,
|
||||
teststr: String) -> StringReader<'a> {
|
||||
teststr: String)
|
||||
-> StringReader<'a> {
|
||||
let fm = cm.new_filemap("zebra.rs".to_string(), teststr);
|
||||
StringReader::new(span_handler, fm)
|
||||
}
|
||||
|
||||
#[test] fn t1 () {
|
||||
#[test]
|
||||
fn t1() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
let mut string_reader = setup(&cm, &sh,
|
||||
"/* my source file */ \
|
||||
fn main() { println!(\"zebra\"); }\n".to_string());
|
||||
let mut string_reader = setup(&cm,
|
||||
&sh,
|
||||
"/* my source file */ fn main() { println!(\"zebra\"); }\n"
|
||||
.to_string());
|
||||
let id = str_to_ident("fn");
|
||||
assert_eq!(string_reader.next_token().tok, token::Comment);
|
||||
assert_eq!(string_reader.next_token().tok, token::Whitespace);
|
||||
let tok1 = string_reader.next_token();
|
||||
let tok2 = TokenAndSpan{
|
||||
tok:token::Ident(id, token::Plain),
|
||||
sp:Span {lo:BytePos(21),hi:BytePos(23),expn_id: NO_EXPANSION}};
|
||||
assert_eq!(tok1,tok2);
|
||||
let tok2 = TokenAndSpan {
|
||||
tok: token::Ident(id, token::Plain),
|
||||
sp: Span {
|
||||
lo: BytePos(21),
|
||||
hi: BytePos(23),
|
||||
expn_id: NO_EXPANSION,
|
||||
},
|
||||
};
|
||||
assert_eq!(tok1, tok2);
|
||||
assert_eq!(string_reader.next_token().tok, token::Whitespace);
|
||||
// the 'main' id is already read:
|
||||
assert_eq!(string_reader.last_pos.clone(), BytePos(28));
|
||||
// read another token:
|
||||
let tok3 = string_reader.next_token();
|
||||
let tok4 = TokenAndSpan{
|
||||
tok:token::Ident(str_to_ident("main"), token::Plain),
|
||||
sp:Span {lo:BytePos(24),hi:BytePos(28),expn_id: NO_EXPANSION}};
|
||||
assert_eq!(tok3,tok4);
|
||||
let tok4 = TokenAndSpan {
|
||||
tok: token::Ident(str_to_ident("main"), token::Plain),
|
||||
sp: Span {
|
||||
lo: BytePos(24),
|
||||
hi: BytePos(28),
|
||||
expn_id: NO_EXPANSION,
|
||||
},
|
||||
};
|
||||
assert_eq!(tok3, tok4);
|
||||
// the lparen is already read:
|
||||
assert_eq!(string_reader.last_pos.clone(), BytePos(29))
|
||||
}
|
||||
|
||||
// check that the given reader produces the desired stream
|
||||
// of tokens (stop checking after exhausting the expected vec)
|
||||
fn check_tokenization (mut string_reader: StringReader, expected: Vec<token::Token> ) {
|
||||
fn check_tokenization(mut string_reader: StringReader, expected: Vec<token::Token>) {
|
||||
for expected_tok in &expected {
|
||||
assert_eq!(&string_reader.next_token().tok, expected_tok);
|
||||
}
|
||||
@@ -1503,7 +1721,8 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
|
||||
token::Ident(str_to_ident(id), style)
|
||||
}
|
||||
|
||||
#[test] fn doublecolonparsing () {
|
||||
#[test]
|
||||
fn doublecolonparsing() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
check_tokenization(setup(&cm, &sh, "a b".to_string()),
|
||||
@@ -1512,16 +1731,18 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
|
||||
mk_ident("b", token::Plain)]);
|
||||
}
|
||||
|
||||
#[test] fn dcparsing_2 () {
|
||||
#[test]
|
||||
fn dcparsing_2() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
check_tokenization(setup(&cm, &sh, "a::b".to_string()),
|
||||
vec![mk_ident("a",token::ModName),
|
||||
vec![mk_ident("a", token::ModName),
|
||||
token::ModSep,
|
||||
mk_ident("b", token::Plain)]);
|
||||
}
|
||||
|
||||
#[test] fn dcparsing_3 () {
|
||||
#[test]
|
||||
fn dcparsing_3() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
check_tokenization(setup(&cm, &sh, "a ::b".to_string()),
|
||||
@@ -1531,54 +1752,61 @@ fn mk_ident(id: &str, style: token::IdentStyle) -> token::Token {
|
||||
mk_ident("b", token::Plain)]);
|
||||
}
|
||||
|
||||
#[test] fn dcparsing_4 () {
|
||||
#[test]
|
||||
fn dcparsing_4() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
check_tokenization(setup(&cm, &sh, "a:: b".to_string()),
|
||||
vec![mk_ident("a",token::ModName),
|
||||
vec![mk_ident("a", token::ModName),
|
||||
token::ModSep,
|
||||
token::Whitespace,
|
||||
mk_ident("b", token::Plain)]);
|
||||
}
|
||||
|
||||
#[test] fn character_a() {
|
||||
#[test]
|
||||
fn character_a() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
assert_eq!(setup(&cm, &sh, "'a'".to_string()).next_token().tok,
|
||||
token::Literal(token::Char(token::intern("a")), None));
|
||||
}
|
||||
|
||||
#[test] fn character_space() {
|
||||
#[test]
|
||||
fn character_space() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
assert_eq!(setup(&cm, &sh, "' '".to_string()).next_token().tok,
|
||||
token::Literal(token::Char(token::intern(" ")), None));
|
||||
}
|
||||
|
||||
#[test] fn character_escaped() {
|
||||
#[test]
|
||||
fn character_escaped() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
assert_eq!(setup(&cm, &sh, "'\\n'".to_string()).next_token().tok,
|
||||
token::Literal(token::Char(token::intern("\\n")), None));
|
||||
}
|
||||
|
||||
#[test] fn lifetime_name() {
|
||||
#[test]
|
||||
fn lifetime_name() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
assert_eq!(setup(&cm, &sh, "'abc".to_string()).next_token().tok,
|
||||
token::Lifetime(token::str_to_ident("'abc")));
|
||||
}
|
||||
|
||||
#[test] fn raw_string() {
|
||||
#[test]
|
||||
fn raw_string() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
assert_eq!(setup(&cm, &sh,
|
||||
"r###\"\"#a\\b\x00c\"\"###".to_string()).next_token()
|
||||
.tok,
|
||||
assert_eq!(setup(&cm, &sh, "r###\"\"#a\\b\x00c\"\"###".to_string())
|
||||
.next_token()
|
||||
.tok,
|
||||
token::Literal(token::StrRaw(token::intern("\"#a\\b\x00c\""), 3), None));
|
||||
}
|
||||
|
||||
#[test] fn literal_suffixes() {
|
||||
#[test]
|
||||
fn literal_suffixes() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
macro_rules! test {
|
||||
@@ -1614,24 +1842,28 @@ macro_rules! test {
|
||||
Some(token::intern("suffix"))));
|
||||
}
|
||||
|
||||
#[test] fn line_doc_comments() {
|
||||
#[test]
|
||||
fn line_doc_comments() {
|
||||
assert!(is_doc_comment("///"));
|
||||
assert!(is_doc_comment("/// blah"));
|
||||
assert!(!is_doc_comment("////"));
|
||||
}
|
||||
|
||||
#[test] fn nested_block_comments() {
|
||||
#[test]
|
||||
fn nested_block_comments() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
let mut lexer = setup(&cm, &sh, "/* /* */ */'a'".to_string());
|
||||
match lexer.next_token().tok {
|
||||
token::Comment => { },
|
||||
_ => panic!("expected a comment!")
|
||||
token::Comment => {}
|
||||
_ => panic!("expected a comment!"),
|
||||
}
|
||||
assert_eq!(lexer.next_token().tok, token::Literal(token::Char(token::intern("a")), None));
|
||||
assert_eq!(lexer.next_token().tok,
|
||||
token::Literal(token::Char(token::intern("a")), None));
|
||||
}
|
||||
|
||||
#[test] fn crlf_comments() {
|
||||
#[test]
|
||||
fn crlf_comments() {
|
||||
let cm = Rc::new(CodeMap::new());
|
||||
let sh = mk_sh(cm.clone());
|
||||
let mut lexer = setup(&cm, &sh, "// test\r\n/// test\r\n".to_string());
|
||||
@@ -1639,6 +1871,7 @@ macro_rules! test {
|
||||
assert_eq!(comment.tok, token::Comment);
|
||||
assert_eq!(comment.sp, ::codemap::mk_sp(BytePos(0), BytePos(7)));
|
||||
assert_eq!(lexer.next_token().tok, token::Whitespace);
|
||||
assert_eq!(lexer.next_token().tok, token::DocComment(token::intern("/// test")));
|
||||
assert_eq!(lexer.next_token().tok,
|
||||
token::DocComment(token::intern("/// test")));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user