123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228 |
- //全局变量
- var type = new Array('IDENT', 'INT', 'REAL', 'RESERVED WORD', 'UNRECOGNIZED SYMBOL','EOF');
- var word = new Array('if', 'else', 'while', 'read', 'write', 'int', 'real','return');
- var specWord = new Array('==', '/*', '*/', '<>');
- //用于存储token序列
- var tokens;
- //用于存储错误token序列
- var err;
- var js_cmm = new Token('有思', '20130305', 0, 0);
- //用于格式化打印
- var tab = ' ';
- //行号和列号
- var rowNum, colNum;
- //用于读字符的一些变量
- //row 存放 \n 分段的字符数组
- var rows, row, ch;
- //javascript token类
- function Token(t, v, r, c) {
- this.type = t;
- this.value = v;
- //位置参数
- this.rowNum = r;//行数
- this.colNum = c;//第几位
- };
- //初始化
- function init() {
- U.CD.Obj('UD_CD_Source').focus();
- word.sort();
- specWord.sort();
- };
- window.onload = init;
- //归零
- function relex() {
- //一些归零操作
- rowNum = 0;
- colNum = 0;
- rows = U.CD.Obj('UD_CD_Source').value.split('\n');
- row = rows[rowNum] + ' ';
- ch = row[colNum];
- tokens = null;
- tokens = new Array();
- tokens.push(js_cmm);
- err = null;
- err = new Array();
- //清屏函数
- cls();
- };
- //词法分析入口函数
- function lex() {
- relex();
- getSym();
- };
- //取字符函数
- function getCh() {
- ch = row[colNum];
- if (colNum < row.length) {
-
- colNum++;
- }
- if (colNum == row.length) {
- if (rowNum != rows.length - 1) {
- rowNum++;
- colNum = 0;
- row = rows[rowNum] + ' ';
- }
- }
- return ch;
- };
- //处理字符函数
- function getSym() {
- //一些准备工作,初始化节点类和节点类型。
- var token = new Token();
- var tokenType;
- //读入一个字符
- ch = getCh();
- //定义一维数组As
- var As = new Array();
- var A;
- while (1) {
- var r = rowNum + 1;
- var c = colNum;
- //滤空格和制表符
- if (ch == ' ' || ch == '\t') {
- ch = getCh();
- continue;
- }
- //检测是否为标识符或保留字,标识符或保留字以字母开头
- else if (ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z') {
- As = new Array();
- do {
- As.push(ch);
- ch = getCh();
- } while ( ch >= 'a' && ch <= 'z' || ch >= 'A' && ch <= 'Z' || ch >= "0" && ch <= '9' || ch == '_');
- A = As.join('');
- //使用折半搜索检查当前符号是否为保留字
- var i = 0,k = 0;
- var j = word.length - 1;
- do {
- k = Math.floor((i + j) / 2);
- Math.floor(k);
- if (A <= word[k]) j = k - 1;
- if (A >= word[k]) i = k + 1;
- } while ( i <= j );
- //匹配成功
- if (i - 1 > j) {
- tokenType = A;
- //存储进tokens数组
- }
- //匹配不成功则为标识符
- else {
- tokenType = type[0];
- }
- }
- //匹配数字
- else if (ch >= '0' && ch <= '9') {
- As = new Array();
- do {
- As.push(ch);
- ch = getCh();
- } while ( ch >= '0' && ch <= '9' || ch == '.');
- A = As.join('');
- var patrn1 = /^\d+$/; //整数
- var patrn2 = /\d+(\.)?((E|e)((\+|-)?))?\d+/;//实数
- if ( patrn1.test(A)) {
- tokenType = type[1];
- }
- else if ( patrn2.test(A)){
- tokenType = type[2];
- }
- else {
- tokenType = type[4];
- }
- }
- //匹配特殊字符
- else if (ch == '+' || ch == '-' || ch == '(' || ch == ')' || ch == ';' || ch == '{' ||
- ch == '}' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '^' || ch == ',' || ch == '*' || ch == '/' || ch == '=') {
- tokenType = ch;
- A = ch;
- if (ch == '/' || ch == '=' || ch == '<') {
- As = new Array();
- As.push(ch);
- ch = getCh();
- As.push(ch);
- A = As.join('');
- //使用折半搜索检查当前符号是否为特殊符号
- var i = 0,
- k = 0;
- var j = specWord.length - 1;
- do {
- k = Math.floor((i + j) / 2);
- if (A <= specWord[k]) j = k - 1;
- if (A >= specWord[k]) i = k + 1;
- } while ( i <= j );
- //匹配成功
- if (i - 1 > j) {
- tokenType = specWord[k];
- }
- //匹配不成功则无法识别
- else {
- A = As[0];
- colNum--;
- }
- }
- //匹配注释
- if (tokenType == '/*') {
- do {ch = getCh();} while ( typeof(ch)!='undefined' /*匹配注释没有正确结尾的情况*/
- && ( ch != '*' || getCh() != '/' )/*遇到/*结束*/ )
- tokenType = 'comment';
- A = 'ignore';
- }
- ch = getCh();
- }
- //ch为undefined则读完源程序,退出循环体
- else if (typeof(ch) == 'undefined') {
- token = new Token(type[5], 'EOF', r, c);
- tokens.push(token);
- token=null;
- break;
- }
- //都不能匹配则无法识别
- else {
- tokenType = type[4];
- A=ch;
- ch = getCh();
- }
- token = new Token(tokenType, A, r, c);
- if(tokenType==type[4]){
- err.push(token);
- }
- else if(tokenType=='comment'){
- //不做任何动作
- }
- else{
- tokens.push(token);
- }
- //javascript没有内存回收机制,手动释放内存
- token = null;
- }
- // printall();
- };
- //打印所有Token对象
- function printall() {
- // lexresult('<ul>');
- // if(err.length==0){
- // for (var n = 1; n != tokens.length; n++) {
- // if (tokens[n].rowNum > tokens[n - 1].rowNum) {
- // lexresult('<li><b>' + tokens[n].rowNum + '.' + rows[tokens[n].rowNum - 1] + '</b></li>');
- // }
- // lexresult('<li>' + tab + tokens[n].rowNum + '.' + tokens[n].type + ':' + tokens[n].value + '</li>');
- // }
- // }
- // else{
- // for (var n = 0; n != err.length; n++) {
- // console('<li><font color=red>Lexical error at row' + err[n].rowNum + ',col' + err[n].colNum + ':unrecognized symbol '+err[n].value+'</font></li>');
- // }
- // }
- // lexresult('</ul>');
- };
|