parser.js 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. // low level parser to a concrete syntax tree, derived from cpython's lib2to3
  2. /**
  3. *
  4. * @constructor
  5. * @param {Object} grammar
  6. *
  7. * p = new Parser(grammar);
  8. * p.setup([start]);
  9. * foreach input token:
  10. * if p.addtoken(...):
  11. * break
  12. * root = p.rootnode
  13. *
  14. * can throw SyntaxError
  15. */
  16. function Parser (filename, grammar) {
  17. this.filename = filename;
  18. this.grammar = grammar;
  19. this.comments = {};
  20. this.p_flags = 0;
  21. return this;
  22. }
  23. // all possible parser flags
  24. Parser.FUTURE_PRINT_FUNCTION = "print_function";
  25. Parser.FUTURE_UNICODE_LITERALS = "unicode_literals";
  26. Parser.FUTURE_DIVISION = "division";
  27. Parser.FUTURE_ABSOLUTE_IMPORT = "absolute_import";
  28. Parser.FUTURE_WITH_STATEMENT = "with_statement";
  29. Parser.FUTURE_NESTED_SCOPES = "nested_scopes";
  30. Parser.FUTURE_GENERATORS = "generators";
  31. Parser.CO_FUTURE_PRINT_FUNCTION = 0x10000;
  32. Parser.CO_FUTURE_UNICODE_LITERALS = 0x20000;
  33. Parser.CO_FUTURE_DIVISON = 0x2000;
  34. Parser.CO_FUTURE_ABSOLUTE_IMPORT = 0x4000;
  35. Parser.CO_FUTURE_WITH_STATEMENT = 0x8000;
  36. Parser.prototype.setup = function (start) {
  37. var stackentry;
  38. var newnode;
  39. start = start || this.grammar.start;
  40. //print("START:"+start);
  41. newnode =
  42. {
  43. type : start,
  44. value : null,
  45. context : null,
  46. children: []
  47. };
  48. stackentry =
  49. {
  50. dfa : this.grammar.dfas[start],
  51. state: 0,
  52. node : newnode
  53. };
  54. this.stack = [stackentry];
  55. this.used_names = {};
  56. };
  57. function findInDfa (a, obj) {
  58. var i = a.length;
  59. while (i--) {
  60. if (a[i][0] === obj[0] && a[i][1] === obj[1]) {
  61. return true;
  62. }
  63. }
  64. return false;
  65. }
  66. // Add a comment
  67. Parser.prototype.addcomment = function(value, start, end, line) {
  68. this.comments[start] = value;
  69. };
  70. // Add a token; return true if we're done
  71. Parser.prototype.addtoken = function (type, value, context) {
  72. var errline;
  73. var itsfirst;
  74. var itsdfa;
  75. var state;
  76. var v;
  77. var t;
  78. var newstate;
  79. var i;
  80. var a;
  81. var arcs;
  82. var first;
  83. var states;
  84. var tp;
  85. var ilabel = this.classify(type, value, context);
  86. //print("ilabel:"+ilabel);
  87. OUTERWHILE:
  88. while (true) {
  89. tp = this.stack[this.stack.length - 1];
  90. states = tp.dfa[0];
  91. first = tp.dfa[1];
  92. arcs = states[tp.state];
  93. // look for a state with this label
  94. for (a = 0; a < arcs.length; ++a) {
  95. i = arcs[a][0];
  96. newstate = arcs[a][1];
  97. t = this.grammar.labels[i][0];
  98. v = this.grammar.labels[i][1];
  99. if (ilabel === i) {
  100. // look it up in the list of labels
  101. goog.asserts.assert(t < 256);
  102. // shift a token; we're done with it
  103. this.shift(type, value, newstate, context);
  104. // pop while we are in an accept-only state
  105. state = newstate;
  106. //print("before:"+JSON.stringify(states[state]) + ":state:"+state+":"+JSON.stringify(states[state]));
  107. /* jshint ignore:start */
  108. while (states[state].length === 1
  109. && states[state][0][0] === 0
  110. && states[state][0][1] === state) {
  111. // states[state] == [(0, state)])
  112. this.pop();
  113. //print("in after pop:"+JSON.stringify(states[state]) + ":state:"+state+":"+JSON.stringify(states[state]));
  114. if (this.stack.length === 0) {
  115. // done!
  116. return true;
  117. }
  118. tp = this.stack[this.stack.length - 1];
  119. state = tp.state;
  120. states = tp.dfa[0];
  121. first = tp.dfa[1];
  122. //print(JSON.stringify(states), JSON.stringify(first));
  123. //print("bottom:"+JSON.stringify(states[state]) + ":state:"+state+":"+JSON.stringify(states[state]));
  124. }
  125. /* jshint ignore:end */
  126. // done with this token
  127. //print("DONE, return false");
  128. return false;
  129. } else if (t >= 256) {
  130. itsdfa = this.grammar.dfas[t];
  131. itsfirst = itsdfa[1];
  132. if (itsfirst.hasOwnProperty(ilabel)) {
  133. // push a symbol
  134. this.push(t, this.grammar.dfas[t], newstate, context);
  135. continue OUTERWHILE;
  136. }
  137. }
  138. }
  139. //print("findInDfa: " + JSON.stringify(arcs)+" vs. " + tp.state);
  140. if (findInDfa(arcs, [0, tp.state])) {
  141. // an accepting state, pop it and try somethign else
  142. //print("WAA");
  143. this.pop();
  144. if (this.stack.length === 0) {
  145. throw new Sk.builtin.SyntaxError("too much input", this.filename);
  146. }
  147. } else {
  148. // no transition
  149. errline = context[0][0];
  150. throw new Sk.builtin.SyntaxError("bad input", this.filename, errline, context);
  151. }
  152. }
  153. };
  154. // turn a token into a label
  155. Parser.prototype.classify = function (type, value, context) {
  156. var ilabel;
  157. if (type === Sk.Tokenizer.Tokens.T_NAME) {
  158. this.used_names[value] = true;
  159. ilabel = this.grammar.keywords.hasOwnProperty(value) && this.grammar.keywords[value];
  160. /* Check for handling print as an builtin function */
  161. if(value === "print" && (this.p_flags & Parser.CO_FUTURE_PRINT_FUNCTION || Sk.python3 === true)) {
  162. ilabel = false; // ilabel determines if the value is a keyword
  163. }
  164. if (ilabel) {
  165. //print("is keyword");
  166. return ilabel;
  167. }
  168. }
  169. ilabel = this.grammar.tokens.hasOwnProperty(type) && this.grammar.tokens[type];
  170. if (!ilabel) {
  171. // throw new Sk.builtin.SyntaxError("bad token", type, value, context);
  172. // Questionable modification to put line number in position 2
  173. // like everywhere else and filename in position 1.
  174. throw new Sk.builtin.SyntaxError("bad token", this.filename, context[0][0], context);
  175. }
  176. return ilabel;
  177. };
  178. // shift a token
  179. Parser.prototype.shift = function (type, value, newstate, context) {
  180. var dfa = this.stack[this.stack.length - 1].dfa;
  181. var state = this.stack[this.stack.length - 1].state;
  182. var node = this.stack[this.stack.length - 1].node;
  183. //print("context", context);
  184. var newnode = {
  185. type : type,
  186. value : value,
  187. lineno : context[0][0], // throwing away end here to match cpython
  188. col_offset: context[0][1],
  189. endlineno : context[1][0],
  190. col_endoffset: context[1][1],
  191. children : null
  192. };
  193. if (newnode) {
  194. node.children.push(newnode);
  195. }
  196. this.stack[this.stack.length - 1] = {
  197. dfa : dfa,
  198. state: newstate,
  199. node : node
  200. };
  201. };
  202. // push a nonterminal
  203. Parser.prototype.push = function (type, newdfa, newstate, context) {
  204. var dfa = this.stack[this.stack.length - 1].dfa;
  205. var node = this.stack[this.stack.length - 1].node;
  206. var newnode = {
  207. type : type,
  208. value : null,
  209. lineno : context[0][0], // throwing away end here to match cpython
  210. col_offset: context[0][1],
  211. endlineno : context[1][0],
  212. col_endoffset: context[1][1],
  213. children : []
  214. };
  215. this.stack[this.stack.length - 1] = {
  216. dfa : dfa,
  217. state: newstate,
  218. node : node
  219. };
  220. this.stack.push({
  221. dfa : newdfa,
  222. state: 0,
  223. node : newnode
  224. });
  225. };
  226. //var ac = 0;
  227. //var bc = 0;
  228. // pop a nonterminal
  229. Parser.prototype.pop = function () {
  230. var node;
  231. var pop = this.stack.pop();
  232. var newnode = pop.node;
  233. //print("POP");
  234. if (newnode) {
  235. //print("A", ac++, newnode.type);
  236. //print("stacklen:"+this.stack.length);
  237. if (this.stack.length !== 0) {
  238. //print("B", bc++);
  239. node = this.stack[this.stack.length - 1].node;
  240. node.children.push(newnode);
  241. } else {
  242. //print("C");
  243. this.rootnode = newnode;
  244. this.rootnode.used_names = this.used_names;
  245. }
  246. }
  247. };
  248. /**
  249. * parser for interactive input. returns a function that should be called with
  250. * lines of input as they are entered. the function will return false
  251. * until the input is complete, when it will return the rootnode of the parse.
  252. *
  253. * @param {string} filename
  254. * @param {string=} style root of parse tree (optional)
  255. */
  256. function makeParser(filename, style) {
  257. var tokenizer;
  258. var T_OP;
  259. var T_NL;
  260. var T_COMMENT;
  261. var prefix;
  262. var column;
  263. var lineno;
  264. var p;
  265. if (style === undefined) {
  266. style = "file_input";
  267. }
  268. p = new Parser(filename, Sk.ParseTables);
  269. // for closure's benefit
  270. if (style === "file_input") {
  271. p.setup(Sk.ParseTables.sym.file_input);
  272. } else {
  273. goog.asserts.fail("todo;");
  274. }
  275. lineno = 1;
  276. column = 0;
  277. prefix = "";
  278. T_COMMENT = Sk.Tokenizer.Tokens.T_COMMENT;
  279. T_NL = Sk.Tokenizer.Tokens.T_NL;
  280. T_OP = Sk.Tokenizer.Tokens.T_OP;
  281. tokenizer = new Sk.Tokenizer(filename, style === "single_input", function (type, value, start, end, line) {
  282. var s_lineno = start[0];
  283. var s_column = start[1];
  284. /*
  285. if (s_lineno !== lineno && s_column !== column)
  286. {
  287. // todo; update prefix and line/col
  288. }
  289. */
  290. if (type === T_COMMENT || type === T_NL) {
  291. prefix += value;
  292. lineno = end[0];
  293. column = end[1];
  294. if (value[value.length - 1] === "\n") {
  295. lineno += 1;
  296. column = 0;
  297. }
  298. if (type === T_COMMENT) {
  299. p.addcomment(value, start, end, line);
  300. }
  301. //print(" not calling addtoken");
  302. return undefined;
  303. }
  304. if (type === T_OP) {
  305. type = Sk.OpMap[value];
  306. }
  307. if (p.addtoken(type, value, [start, end, line])) {
  308. return true;
  309. }
  310. });
  311. // create parser function
  312. var parseFunc = function (line) {
  313. var ret = tokenizer.generateTokens(line);
  314. //print("tok:"+ret);
  315. if (ret) {
  316. if (ret !== "done") {
  317. throw new Sk.builtin.SyntaxError("incomplete input", this.filename);
  318. }
  319. return p.rootnode;
  320. }
  321. return false;
  322. };
  323. // set flags, and return
  324. parseFunc.p_flags = p.p_flags;
  325. parseFunc.comments = p.comments;
  326. return parseFunc;
  327. }
  328. Sk.parse = function parse(filename, input) {
  329. var i;
  330. var ret;
  331. var lines;
  332. var parseFunc = makeParser(filename);
  333. if (input.substr(input.length - 1, 1) !== "\n") {
  334. input += "\n";
  335. }
  336. //print("input:"+input);
  337. lines = input.split("\n");
  338. for (i = 0; i < lines.length; ++i) {
  339. ret = parseFunc(lines[i] + ((i === lines.length - 1) ? "" : "\n"));
  340. }
  341. /*
  342. * Small adjustments here in order to return th flags and the cst
  343. */
  344. return {"cst": ret, "flags": parseFunc.p_flags, "comments": parseFunc.comments};
  345. };
  346. Sk.parseTreeDump = function parseTreeDump(n, indent) {
  347. //return JSON.stringify(n, null, 2);
  348. var i;
  349. var ret;
  350. indent = indent || "";
  351. ret = "";
  352. ret += indent;
  353. if (n.type >= 256) { // non-term
  354. ret += Sk.ParseTables.number2symbol[n.type] + "\n";
  355. for (i = 0; i < n.children.length; ++i) {
  356. ret += Sk.parseTreeDump(n.children[i], indent + " ");
  357. }
  358. } else {
  359. ret += Sk.Tokenizer.tokenNames[n.type] + ": " + new Sk.builtin.str(n.value)["$r"]().v + "\n";
  360. }
  361. return ret;
  362. };
  363. goog.exportSymbol("Sk.parse", Sk.parse);
  364. goog.exportSymbol("Sk.parseTreeDump", Sk.parseTreeDump);