123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744 |
- // Copyright 2015 The Closure Library Authors. All Rights Reserved.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS-IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- /**
- * @fileoverview the default JSON stream parser.
- *
- * The default JSON parser decodes the input stream (string) under the
- * following rules:
- * 1. The stream represents a valid JSON array (must start with a "[" and close
- * with the corresponding "]"). Each element of this array is assumed to be
- * either an array or an object, and will be decoded as a JS object and
- * delivered. Compact array format that is not valid JSON is also supported,
- * e.g. [1,,2].
- * 2. All JSON elements in the buffer will be decoded and delivered in a batch.
- * 3. If a high-level API does not support batch delivery (e.g. grpc), then
- * a wrapper is expected to deliver individual elements separately
- * and in order.
- * 4. The parser is expected to drop any data (without breaking the
- * specified MIME format) that is not visible to the client: e.g. new lines
- * for pretty printing; no-op data for keep-alive support.
- * 5. Fail-fast: any invalid content should abort the stream by setting the
- * state of the parser to "invalid".
- *
- * The parser is a streamed JSON parser and is optimized in such a way
- * that it only scans the message boundary and the actual decoding of JSON
- * strings and construction of JS object are done by JSON.parse (native
- * code).
- */
- goog.provide('goog.net.streams.JsonStreamParser');
- goog.provide('goog.net.streams.JsonStreamParser.Options');
- goog.require('goog.asserts');
- goog.require('goog.json');
- goog.require('goog.net.streams.StreamParser');
- goog.require('goog.net.streams.utils');
- goog.scope(function() {
- var utils = goog.module.get('goog.net.streams.utils');
- /**
- * The default JSON stream parser.
- *
- * @param {!goog.net.streams.JsonStreamParser.Options=} opt_options
- * Configuration for the new JsonStreamParser instance.
- * @constructor
- * @struct
- * @implements {goog.net.streams.StreamParser}
- * @final
- * @package
- */
- goog.net.streams.JsonStreamParser = function(opt_options) {
- /**
- * The current error message, if any.
- * @private {?string}
- */
- this.errorMessage_ = null;
- /**
- * The currently buffered result (parsed JSON objects).
- * @private {!Array<string|!Object>}
- */
- this.result_ = [];
- /**
- * The currently buffered input.
- * @private {string}
- */
- this.buffer_ = '';
- /**
- * The current stack.
- * @private {!Array<!Parser.State_>}
- */
- this.stack_ = [];
- /**
- * The current depth of the nested JSON structure.
- * @private {number}
- */
- this.depth_ = 0;
- /**
- * The current position in the streamed data.
- * @private {number}
- */
- this.pos_ = 0;
- /**
- * The current state of whether the parser is decoding a '\' escaped string.
- * @private {boolean}
- */
- this.slashed_ = false;
- /**
- * The current unicode char count. 0 means no unicode, 1-4 otherwise.
- * @private {number}
- */
- this.unicodeCount_ = 0;
- /**
- * The regexp for parsing string input.
- * @private {!RegExp}
- */
- this.stringInputPattern_ = /[\\"]/g;
- /**
- * The current stream state.
- * @private {goog.net.streams.JsonStreamParser.StreamState_}
- */
- this.streamState_ = Parser.StreamState_.INIT;
- /**
- * The current parser state.
- * @private {goog.net.streams.JsonStreamParser.State_}
- */
- this.state_ = Parser.State_.INIT;
- /**
- * Whether allows compact JSON array format, e.g. "[1, ,2]".
- * @private {boolean}
- */
- this.allowCompactJsonArrayFormat_ =
- !!(opt_options && opt_options.allowCompactJsonArrayFormat);
- /**
- * Whether to deliver the raw message string without decoding into JS object.
- * @private {boolean}
- */
- this.deliverMessageAsRawString_ =
- !!(opt_options && opt_options.deliverMessageAsRawString);
- };
- /**
- * Configuration spec for newly created JSON stream parser:
- *
- * allowCompactJsonArrayFormat: whether allows compact JSON array format, where
- * null is represented as empty string, e.g. "[1, ,2]".
- *
- * deliverMessageAsRawString: whether to deliver the raw message string without
- * decoding into JS object. Semantically insignificant whitespaces in the
- * input may be kept or ignored.
- *
- * @typedef {{
- * allowCompactJsonArrayFormat: (boolean|undefined),
- * deliverMessageAsRawString: (boolean|undefined),
- * }}
- */
- goog.net.streams.JsonStreamParser.Options;
- var Parser = goog.net.streams.JsonStreamParser;
- /**
- * The stream state.
- * @private @enum {number}
- */
- Parser.StreamState_ = {
- INIT: 0,
- ARRAY_OPEN: 1,
- ARRAY_END: 2,
- INVALID: 3
- };
- /**
- * The parser state.
- * @private @enum {number}
- */
- Parser.State_ = {
- INIT: 0,
- VALUE: 1,
- OBJECT_OPEN: 2,
- OBJECT_END: 3,
- ARRAY_OPEN: 4,
- ARRAY_END: 5,
- STRING: 6,
- KEY_START: 7,
- KEY_END: 8,
- TRUE1: 9, // T and expecting RUE ...
- TRUE2: 10,
- TRUE3: 11,
- FALSE1: 12, // F and expecting ALSE ...
- FALSE2: 13,
- FALSE3: 14,
- FALSE4: 15,
- NULL1: 16, // N and expecting ULL ...
- NULL2: 17,
- NULL3: 18,
- NUM_DECIMAL_POINT: 19,
- NUM_DIGIT: 20
- };
- /**
- * @override
- */
- Parser.prototype.isInputValid = function() {
- return this.streamState_ != Parser.StreamState_.INVALID;
- };
- /**
- * @override
- */
- Parser.prototype.getErrorMessage = function() {
- return this.errorMessage_;
- };
- /**
- * @return {boolean} Whether the parser has reached the end of the stream
- *
- * TODO(updogliu): move this API to the base type.
- */
- Parser.prototype.done = function() {
- return this.streamState_ === Parser.StreamState_.ARRAY_END;
- };
- /**
- * Get the part of input that is after the end of the stream. Call this only
- * when {@code this.done()} is true.
- *
- * @return {string} The extra input
- *
- * TODO(updogliu): move this API to the base type.
- */
- Parser.prototype.getExtraInput = function() {
- return this.buffer_;
- };
- /**
- * @param {string|!ArrayBuffer|!Array<number>} input
- * The current input string (always)
- * @param {number} pos The position in the current input that triggers the error
- * @throws {!Error} Throws an error indicating where the stream is broken
- * @private
- */
- Parser.prototype.error_ = function(input, pos) {
- this.streamState_ = Parser.StreamState_.INVALID;
- this.errorMessage_ = 'The stream is broken @' + this.pos_ + '/' + pos +
- '. With input:\n' + input;
- throw Error(this.errorMessage_);
- };
- /**
- * @throws {Error} Throws an error message if the input is invalid.
- * @override
- */
- Parser.prototype.parse = function(input) {
- goog.asserts.assertString(input);
- // captures
- var parser = this;
- var stack = parser.stack_;
- var pattern = parser.stringInputPattern_;
- var State = Parser.State_; // enums
- var num = input.length;
- var streamStart = 0;
- var msgStart = -1;
- var i = 0;
- while (i < num) {
- switch (parser.streamState_) {
- case Parser.StreamState_.INVALID:
- parser.error_(input, i);
- return null;
- case Parser.StreamState_.ARRAY_END:
- if (readMore()) {
- parser.error_(input, i);
- }
- return null;
- case Parser.StreamState_.INIT:
- if (readMore()) {
- var current = input[i++];
- parser.pos_++;
- if (current === '[') {
- parser.streamState_ = Parser.StreamState_.ARRAY_OPEN;
- streamStart = i;
- parser.state_ = State.ARRAY_OPEN;
- continue;
- } else {
- parser.error_(input, i);
- }
- }
- return null;
- case Parser.StreamState_.ARRAY_OPEN:
- parseData();
- if (parser.depth_ === 0 && parser.state_ == State.ARRAY_END) {
- parser.streamState_ = Parser.StreamState_.ARRAY_END;
- parser.buffer_ = input.substring(i);
- } else {
- if (msgStart === -1) {
- parser.buffer_ += input.substring(streamStart);
- } else {
- parser.buffer_ = input.substring(msgStart);
- }
- }
- if (parser.result_.length > 0) {
- var msgs = parser.result_;
- parser.result_ = [];
- return msgs;
- }
- return null;
- }
- }
- return null;
- /**
- * @return {boolean} true if the parser needs parse more data
- */
- function readMore() {
- skipWhitespace();
- return i < num;
- }
- /**
- * Skip as many whitespaces as possible, and increments current index of
- * stream to next available char.
- */
- function skipWhitespace() {
- while (i < input.length) {
- if (utils.isJsonWhitespace(input[i])) {
- i++;
- parser.pos_++;
- continue;
- }
- break;
- }
- }
- /**
- * Parse the input JSON elements with a streamed state machine.
- */
- function parseData() {
- var current;
- while (true) {
- current = input[i++];
- if (!current) {
- break;
- }
- parser.pos_++;
- switch (parser.state_) {
- case State.INIT:
- if (current === '{') {
- parser.state_ = State.OBJECT_OPEN;
- } else if (current === '[') {
- parser.state_ = State.ARRAY_OPEN;
- } else if (!utils.isJsonWhitespace(current)) {
- parser.error_(input, i);
- }
- continue;
- case State.KEY_START:
- case State.OBJECT_OPEN:
- if (utils.isJsonWhitespace(current)) {
- continue;
- }
- if (parser.state_ === State.KEY_START) {
- stack.push(State.KEY_END);
- } else {
- if (current === '}') {
- addMessage('{}');
- parser.state_ = nextState();
- continue;
- } else {
- stack.push(State.OBJECT_END);
- }
- }
- if (current === '"') {
- parser.state_ = State.STRING;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.KEY_END:
- case State.OBJECT_END:
- if (utils.isJsonWhitespace(current)) {
- continue;
- }
- if (current === ':') {
- if (parser.state_ === State.OBJECT_END) {
- stack.push(State.OBJECT_END);
- parser.depth_++;
- }
- parser.state_ = State.VALUE;
- } else if (current === '}') {
- parser.depth_--;
- addMessage();
- parser.state_ = nextState();
- } else if (current === ',') {
- if (parser.state_ === State.OBJECT_END) {
- stack.push(State.OBJECT_END);
- }
- parser.state_ = State.KEY_START;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.ARRAY_OPEN:
- case State.VALUE:
- if (utils.isJsonWhitespace(current)) {
- continue;
- }
- if (parser.state_ === State.ARRAY_OPEN) {
- parser.depth_++;
- parser.state_ = State.VALUE;
- if (current === ']') {
- parser.depth_--;
- if (parser.depth_ === 0) {
- parser.state_ = State.ARRAY_END;
- return;
- }
- addMessage('[]');
- parser.state_ = nextState();
- continue;
- } else {
- stack.push(State.ARRAY_END);
- }
- }
- if (current === '"')
- parser.state_ = State.STRING;
- else if (current === '{')
- parser.state_ = State.OBJECT_OPEN;
- else if (current === '[')
- parser.state_ = State.ARRAY_OPEN;
- else if (current === 't')
- parser.state_ = State.TRUE1;
- else if (current === 'f')
- parser.state_ = State.FALSE1;
- else if (current === 'n')
- parser.state_ = State.NULL1;
- else if (current === '-') {
- // continue
- } else if ('0123456789'.indexOf(current) !== -1) {
- parser.state_ = State.NUM_DIGIT;
- } else if (current === ',' && parser.allowCompactJsonArrayFormat_) {
- parser.state_ = State.VALUE;
- } else if (current === ']' && parser.allowCompactJsonArrayFormat_) {
- i--;
- parser.pos_--;
- parser.state_ = nextState();
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.ARRAY_END:
- if (current === ',') {
- stack.push(State.ARRAY_END);
- parser.state_ = State.VALUE;
- if (parser.depth_ === 1) {
- msgStart = i; // skip ',', including a leading one
- }
- } else if (current === ']') {
- parser.depth_--;
- if (parser.depth_ === 0) {
- return;
- }
- addMessage();
- parser.state_ = nextState();
- } else if (utils.isJsonWhitespace(current)) {
- continue;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.STRING:
- var old = i;
- STRING_LOOP: while (true) {
- while (parser.unicodeCount_ > 0) {
- current = input[i++];
- if (parser.unicodeCount_ === 4) {
- parser.unicodeCount_ = 0;
- } else {
- parser.unicodeCount_++;
- }
- if (!current) {
- break STRING_LOOP;
- }
- }
- if (current === '"' && !parser.slashed_) {
- parser.state_ = nextState();
- break;
- }
- if (current === '\\' && !parser.slashed_) {
- parser.slashed_ = true;
- current = input[i++];
- if (!current) {
- break;
- }
- }
- if (parser.slashed_) {
- parser.slashed_ = false;
- if (current === 'u') {
- parser.unicodeCount_ = 1;
- }
- current = input[i++];
- if (!current) {
- break;
- } else {
- continue;
- }
- }
- pattern.lastIndex = i;
- var patternResult = pattern.exec(input);
- if (!patternResult) {
- i = input.length + 1;
- break;
- }
- i = patternResult.index + 1;
- current = input[patternResult.index];
- if (!current) {
- break;
- }
- }
- parser.pos_ += (i - old);
- continue;
- case State.TRUE1:
- if (!current) {
- continue;
- }
- if (current === 'r') {
- parser.state_ = State.TRUE2;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.TRUE2:
- if (!current) {
- continue;
- }
- if (current === 'u') {
- parser.state_ = State.TRUE3;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.TRUE3:
- if (!current) {
- continue;
- }
- if (current === 'e') {
- parser.state_ = nextState();
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.FALSE1:
- if (!current) {
- continue;
- }
- if (current === 'a') {
- parser.state_ = State.FALSE2;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.FALSE2:
- if (!current) {
- continue;
- }
- if (current === 'l') {
- parser.state_ = State.FALSE3;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.FALSE3:
- if (!current) {
- continue;
- }
- if (current === 's') {
- parser.state_ = State.FALSE4;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.FALSE4:
- if (!current) {
- continue;
- }
- if (current === 'e') {
- parser.state_ = nextState();
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.NULL1:
- if (!current) {
- continue;
- }
- if (current === 'u') {
- parser.state_ = State.NULL2;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.NULL2:
- if (!current) {
- continue;
- }
- if (current === 'l') {
- parser.state_ = State.NULL3;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.NULL3:
- if (!current) {
- continue;
- }
- if (current === 'l') {
- parser.state_ = nextState();
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.NUM_DECIMAL_POINT:
- if (current === '.') {
- parser.state_ = State.NUM_DIGIT;
- } else {
- parser.error_(input, i);
- }
- continue;
- case State.NUM_DIGIT: // no need for a full validation here
- if ('0123456789.eE+-'.indexOf(current) !== -1) {
- continue;
- } else {
- i--;
- parser.pos_--;
- parser.state_ = nextState();
- }
- continue;
- default:
- parser.error_(input, i);
- }
- }
- }
- /**
- * @return {!goog.net.streams.JsonStreamParser.State_} the next state
- * from the stack, or the general VALUE state.
- */
- function nextState() {
- var state = stack.pop();
- if (state != null) {
- return state;
- } else {
- return State.VALUE;
- }
- }
- /**
- * @param {(string)=} opt_data The message to add
- */
- function addMessage(opt_data) {
- if (parser.depth_ > 1) {
- return;
- }
- goog.asserts.assert(opt_data !== ''); // '' not possible
- if (!opt_data) {
- if (msgStart === -1) {
- opt_data = parser.buffer_ + input.substring(streamStart, i);
- } else {
- opt_data = input.substring(msgStart, i);
- }
- }
- if (parser.deliverMessageAsRawString_) {
- parser.result_.push(opt_data);
- } else {
- parser.result_.push(
- goog.asserts.assertInstanceof(goog.json.parse(opt_data), Object));
- }
- msgStart = i;
- }
- };
- }); // goog.scope
|