U.DC.LEX.js 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228
  1. //全局变量
  2. var type = new Array('IDENT', 'INT', 'REAL', 'RESERVED WORD', 'UNRECOGNIZED SYMBOL','EOF');
  3. var word = new Array('if', 'else', 'while', 'read', 'write', 'int', 'real','return');
  4. var specWord = new Array('==', '/*', '*/', '<>');
  5. //用于存储token序列
  6. var tokens;
  7. //用于存储错误token序列
  8. var err;
  9. var js_cmm = new Token('有思', '20130305', 0, 0);
  10. //用于格式化打印
  11. var tab = '&nbsp;&nbsp;&nbsp;&nbsp;';
  12. //行号和列号
  13. var rowNum, colNum;
  14. //用于读字符的一些变量
  15. //row 存放 \n 分段的字符数组
  16. var rows, row, ch;
  17. //javascript token类
  18. function Token(t, v, r, c) {
  19. this.type = t;
  20. this.value = v;
  21. //位置参数
  22. this.rowNum = r;//行数
  23. this.colNum = c;//第几位
  24. };
  25. //初始化
  26. function init() {
  27. U.CD.Obj('UD_CD_Source').focus();
  28. word.sort();
  29. specWord.sort();
  30. };
  31. window.onload = init;
  32. //归零
  33. function relex() {
  34. //一些归零操作
  35. rowNum = 0;
  36. colNum = 0;
  37. rows = U.CD.Obj('UD_CD_Source').value.split('\n');
  38. row = rows[rowNum] + ' ';
  39. ch = row[colNum];
  40. tokens = null;
  41. tokens = new Array();
  42. tokens.push(js_cmm);
  43. err = null;
  44. err = new Array();
  45. //清屏函数
  46. cls();
  47. };
  48. //词法分析入口函数
  49. function lex() {
  50. relex();
  51. getSym();
  52. };
  53. //取字符函数
  54. function getCh() {
  55. ch = row[colNum];
  56. if (colNum < row.length) {
  57. colNum++;
  58. }
  59. if (colNum == row.length) {
  60. if (rowNum != rows.length - 1) {
  61. rowNum++;
  62. colNum = 0;
  63. row = rows[rowNum] + ' ';
  64. }
  65. }
  66. return ch;
  67. };
  68. //处理字符函数
  69. function getSym() {
  70. //一些准备工作,初始化节点类和节点类型。
  71. var token = new Token();
  72. var tokenType;
  73. //读入一个字符
  74. ch = getCh();
  75. //定义一维数组As
  76. var As = new Array();
  77. var A;
  78. while (1) {
  79. var r = rowNum + 1;
  80. var c = colNum;
  81. //滤空格和制表符
  82. if (ch == ' ' || ch == '\t') {
  83. ch = getCh();
  84. continue;
  85. }
  86. //检测是否为标识符或保留字,标识符或保留字以字母开头
  87. else if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
  88. As = new Array();
  89. do {
  90. As.push(ch);
  91. ch = getCh();
  92. } while ( ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= "0" && ch <= '9' || ch == '_');
  93. A = As.join('');
  94. //使用折半搜索检查当前符号是否为保留字
  95. var i = 0,k = 0;
  96. var j = word.length - 1;
  97. do {
  98. k = Math.floor((i + j) / 2);
  99. Math.floor(k);
  100. if (A <= word[k]) j = k - 1;
  101. if (A >= word[k]) i = k + 1;
  102. } while ( i <= j );
  103. //匹配成功
  104. if (i - 1 > j) {
  105. tokenType = A;
  106. //存储进tokens数组
  107. }
  108. //匹配不成功则为标识符
  109. else {
  110. tokenType = type[0];
  111. }
  112. }
  113. //匹配数字
  114. else if (ch >= '0' && ch <= '9') {
  115. As = new Array();
  116. do {
  117. As.push(ch);
  118. ch = getCh();
  119. } while ( ch >= '0' && ch <= '9' || ch == '.');
  120. A = As.join('');
  121. var patrn1 = /^\d+$/; //整数
  122. var patrn2 = /\d+(\.)?((E|e)((\+|-)?))?\d+/;//实数
  123. if ( patrn1.test(A)) {
  124. tokenType = type[1];
  125. }
  126. else if ( patrn2.test(A)){
  127. tokenType = type[2];
  128. }
  129. else {
  130. tokenType = type[4];
  131. }
  132. }
  133. //匹配特殊字符
  134. else if (ch == '+' || ch == '-' || ch == '(' || ch == ')' || ch == ';' || ch == '{' ||
  135. ch == '}' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '^' || ch == ',' || ch == '*' || ch == '/' || ch == '=') {
  136. tokenType = ch;
  137. A = ch;
  138. if (ch == '/' || ch == '=' || ch == '<') {
  139. As = new Array();
  140. As.push(ch);
  141. ch = getCh();
  142. As.push(ch);
  143. A = As.join('');
  144. //使用折半搜索检查当前符号是否为特殊符号
  145. var i = 0,
  146. k = 0;
  147. var j = specWord.length - 1;
  148. do {
  149. k = Math.floor((i + j) / 2);
  150. if (A <= specWord[k]) j = k - 1;
  151. if (A >= specWord[k]) i = k + 1;
  152. } while ( i <= j );
  153. //匹配成功
  154. if (i - 1 > j) {
  155. tokenType = specWord[k];
  156. }
  157. //匹配不成功则无法识别
  158. else {
  159. A = As[0];
  160. colNum--;
  161. }
  162. }
  163. //匹配注释
  164. if (tokenType == '/*') {
  165. do {ch = getCh();} while ( typeof(ch)!='undefined' /*匹配注释没有正确结尾的情况*/
  166. && ( ch != '*' || getCh() != '/' )/*遇到/*结束*/ )
  167. tokenType = 'comment';
  168. A = 'ignore';
  169. }
  170. ch = getCh();
  171. }
  172. //ch为undefined则读完源程序,退出循环体
  173. else if (typeof(ch) == 'undefined') {
  174. token = new Token(type[5], 'EOF', r, c);
  175. tokens.push(token);
  176. token=null;
  177. break;
  178. }
  179. //都不能匹配则无法识别
  180. else {
  181. tokenType = type[4];
  182. A=ch;
  183. ch = getCh();
  184. }
  185. token = new Token(tokenType, A, r, c);
  186. if(tokenType==type[4]){
  187. err.push(token);
  188. }
  189. else if(tokenType=='comment'){
  190. //不做任何动作
  191. }
  192. else{
  193. tokens.push(token);
  194. }
  195. //javascript没有内存回收机制,手动释放内存
  196. token = null;
  197. }
  198. // printall();
  199. };
  200. //打印所有Token对象
  201. function printall() {
  202. // lexresult('<ul>');
  203. // if(err.length==0){
  204. // for (var n = 1; n != tokens.length; n++) {
  205. // if (tokens[n].rowNum > tokens[n - 1].rowNum) {
  206. // lexresult('<li><b>' + tokens[n].rowNum + '.' + rows[tokens[n].rowNum - 1] + '</b></li>');
  207. // }
  208. // lexresult('<li>' + tab + tokens[n].rowNum + '.' + tokens[n].type + ':' + tokens[n].value + '</li>');
  209. // }
  210. // }
  211. // else{
  212. // for (var n = 0; n != err.length; n++) {
  213. // console('<li><font color=red>Lexical error at row' + err[n].rowNum + ',col' + err[n].colNum + ':unrecognized symbol '+err[n].value+'</font></li>');
  214. // }
  215. // }
  216. // lexresult('</ul>');
  217. };