Ninja
lexer.in.cc
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "lexer.h"
16 
17 #include <stdio.h>
18 
19 #include "eval_env.h"
20 #include "util.h"
21 
22 bool Lexer::Error(const string& message, string* err) {
23  // Compute line/column.
24  int line = 1;
25  const char* context = input_.str_;
26  for (const char* p = input_.str_; p < last_token_; ++p) {
27  if (*p == '\n') {
28  ++line;
29  context = p + 1;
30  }
31  }
32  int col = last_token_ ? (int)(last_token_ - context) : 0;
33 
34  char buf[1024];
35  snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
36  *err = buf;
37  *err += message + "\n";
38 
39  // Add some context to the message.
40  const int kTruncateColumn = 72;
41  if (col > 0 && col < kTruncateColumn) {
42  int len;
43  bool truncated = true;
44  for (len = 0; len < kTruncateColumn; ++len) {
45  if (context[len] == 0 || context[len] == '\n') {
46  truncated = false;
47  break;
48  }
49  }
50  *err += string(context, len);
51  if (truncated)
52  *err += "...";
53  *err += "\n";
54  *err += string(col, ' ');
55  *err += "^ near here";
56  }
57 
58  return false;
59 }
60 
61 Lexer::Lexer(const char* input) {
62  Start("input", input);
63 }
64 
65 void Lexer::Start(StringPiece filename, StringPiece input) {
66  filename_ = filename;
67  input_ = input;
68  ofs_ = input_.str_;
69  last_token_ = NULL;
70 }
71 
72 const char* Lexer::TokenName(Token t) {
73  switch (t) {
74  case ERROR: return "lexing error";
75  case BUILD: return "'build'";
76  case COLON: return "':'";
77  case DEFAULT: return "'default'";
78  case EQUALS: return "'='";
79  case IDENT: return "identifier";
80  case INCLUDE: return "'include'";
81  case INDENT: return "indent";
82  case NEWLINE: return "newline";
83  case PIPE2: return "'||'";
84  case PIPE: return "'|'";
85  case POOL: return "'pool'";
86  case RULE: return "'rule'";
87  case SUBNINJA: return "'subninja'";
88  case TEOF: return "eof";
89  }
90  return NULL; // not reached
91 }
92 
93 const char* Lexer::TokenErrorHint(Token expected) {
94  switch (expected) {
95  case COLON:
96  return " ($ also escapes ':')";
97  default:
98  return "";
99  }
100 }
101 
102 string Lexer::DescribeLastError() {
103  if (last_token_) {
104  switch (last_token_[0]) {
105  case '\r':
106  return "carriage returns are not allowed, use newlines";
107  case '\t':
108  return "tabs are not allowed, use spaces";
109  }
110  }
111  return "lexing error";
112 }
113 
114 void Lexer::UnreadToken() {
115  ofs_ = last_token_;
116 }
117 
119  const char* p = ofs_;
120  const char* q;
121  const char* start;
122  Lexer::Token token;
123  for (;;) {
124  start = p;
125  /*!re2c
126  re2c:define:YYCTYPE = "unsigned char";
127  re2c:define:YYCURSOR = p;
128  re2c:define:YYMARKER = q;
129  re2c:yyfill:enable = 0;
130 
131  nul = "\000";
132  simple_varname = [a-zA-Z0-9_-]+;
133  varname = [a-zA-Z0-9_.-]+;
134 
135  [ ]*"#"[^\000\r\n]*"\n" { continue; }
136  [ ]*[\n] { token = NEWLINE; break; }
137  [ ]+ { token = INDENT; break; }
138  "build" { token = BUILD; break; }
139  "pool" { token = POOL; break; }
140  "rule" { token = RULE; break; }
141  "default" { token = DEFAULT; break; }
142  "=" { token = EQUALS; break; }
143  ":" { token = COLON; break; }
144  "||" { token = PIPE2; break; }
145  "|" { token = PIPE; break; }
146  "include" { token = INCLUDE; break; }
147  "subninja" { token = SUBNINJA; break; }
148  varname { token = IDENT; break; }
149  nul { token = TEOF; break; }
150  [^] { token = ERROR; break; }
151  */
152  }
153 
154  last_token_ = start;
155  ofs_ = p;
156  if (token != NEWLINE && token != TEOF)
157  EatWhitespace();
158  return token;
159 }
160 
161 bool Lexer::PeekToken(Token token) {
162  Token t = ReadToken();
163  if (t == token)
164  return true;
165  UnreadToken();
166  return false;
167 }
168 
169 void Lexer::EatWhitespace() {
170  const char* p = ofs_;
171  for (;;) {
172  ofs_ = p;
173  /*!re2c
174  [ ]+ { continue; }
175  "$\n" { continue; }
176  nul { break; }
177  [^] { break; }
178  */
179  }
180 }
181 
182 bool Lexer::ReadIdent(string* out) {
183  const char* p = ofs_;
184  for (;;) {
185  const char* start = p;
186  /*!re2c
187  varname {
188  out->assign(start, p - start);
189  break;
190  }
191  [^] { return false; }
192  */
193  }
194  ofs_ = p;
195  EatWhitespace();
196  return true;
197 }
198 
199 bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
200  const char* p = ofs_;
201  const char* q;
202  const char* start;
203  for (;;) {
204  start = p;
205  /*!re2c
206  [^$ :\r\n|\000]+ {
207  eval->AddText(StringPiece(start, p - start));
208  continue;
209  }
210  [ :|\n] {
211  if (path) {
212  p = start;
213  break;
214  } else {
215  if (*start == '\n')
216  break;
217  eval->AddText(StringPiece(start, 1));
218  continue;
219  }
220  }
221  "$$" {
222  eval->AddText(StringPiece("$", 1));
223  continue;
224  }
225  "$ " {
226  eval->AddText(StringPiece(" ", 1));
227  continue;
228  }
229  "$\n"[ ]* {
230  continue;
231  }
232  "${"varname"}" {
233  eval->AddSpecial(StringPiece(start + 2, p - start - 3));
234  continue;
235  }
236  "$"simple_varname {
237  eval->AddSpecial(StringPiece(start + 1, p - start - 1));
238  continue;
239  }
240  "$:" {
241  eval->AddText(StringPiece(":", 1));
242  continue;
243  }
244  "$". {
245  last_token_ = start;
246  return Error("bad $-escape (literal $ must be written as $$)", err);
247  }
248  nul {
249  last_token_ = start;
250  return Error("unexpected EOF", err);
251  }
252  [^] {
253  last_token_ = start;
254  return Error(DescribeLastError(), err);
255  }
256  */
257  }
258  last_token_ = start;
259  ofs_ = p;
260  if (path)
261  EatWhitespace();
262  // Non-path strings end in newlines, so there's no whitespace to eat.
263  return true;
264 }
const char * last_token_
Definition: lexer.h:102
const char * str_
Definition: string_piece.h:49
void UnreadToken()
Rewind to the last read Token.
Definition: lexer.cc:115
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:27
static const char * TokenErrorHint(Token expected)
Return a human-readable token hint, used in error messages.
Definition: lexer.cc:94
void EatWhitespace()
Skip past whitespace (called after each read token/ident/etc.).
Definition: lexer.cc:428
string AsString() const
Convert the slice into a full-fledged std::string, copying the data into a new string.
Definition: string_piece.h:45
bool PeekToken(Token token)
If the next token is token, read it and return true.
Definition: lexer.cc:420
bool Error(const string &message, string *err)
Construct an error message with context.
Definition: lexer.cc:23
StringPiece filename_
Definition: lexer.h:99
StringPiece input_
Definition: lexer.h:100
Token ReadToken()
Read a Token from the Token enum.
Definition: lexer.cc:119
string DescribeLastError()
If the last token read was an ERROR token, provide more info or the empty string. ...
Definition: lexer.cc:103
Token
Definition: lexer.h:32
bool ReadIdent(string *out)
Read a simple identifier (a rule or variable name).
Definition: lexer.cc:509
bool ReadEvalString(EvalString *eval, bool path, string *err)
Read a $-escaped string.
Definition: lexer.cc:595
static const char * TokenName(Token t)
Return a human-readable form of a token, used in error messages.
Definition: lexer.cc:73
Lexer()
Definition: lexer.h:28
A tokenized string that contains variable references.
Definition: eval_env.h:59
const char * ofs_
Definition: lexer.h:101
void Start(StringPiece filename, StringPiece input)
Start parsing some input.
Definition: lexer.cc:66