// Copyright 2006 The Closure Library Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS-IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** * @fileoverview Date/Time parsing library with locale support. */ /** * Namespace for locale date/time parsing functions */ goog.provide('goog.i18n.DateTimeParse'); goog.require('goog.asserts'); goog.require('goog.date'); goog.require('goog.i18n.DateTimeFormat'); goog.require('goog.i18n.DateTimeSymbols'); /** * DateTimeParse is for parsing date in a locale-sensitive manner. It allows * user to use any customized patterns to parse date-time string under certain * locale. Things varies across locales like month name, weekname, field * order, etc. * * This module is the counter-part of DateTimeFormat. They use the same * date/time pattern specification, which is borrowed from ICU/JDK. * * This implementation could parse partial date/time. * * Time Format Syntax: To specify the time format use a time pattern string. * In this pattern, following letters are reserved as pattern letters, which * are defined as the following: * *
* Symbol Meaning Presentation Example * ------ ------- ------------ ------- * G era designator (Text) AD * y# year (Number) 1996 * M month in year (Text & Number) July & 07 * d day in month (Number) 10 * h hour in am/pm (1~12) (Number) 12 * H hour in day (0~23) (Number) 0 * m minute in hour (Number) 30 * s second in minute (Number) 55 * S fractional second (Number) 978 * E day of week (Text) Tuesday * D day in year (Number) 189 * a am/pm marker (Text) PM * k hour in day (1~24) (Number) 24 * K hour in am/pm (0~11) (Number) 0 * z time zone (Text) Pacific Standard Time * Z time zone (RFC 822) (Number) -0800 * v time zone (generic) (Text) Pacific Time * ' escape for text (Delimiter) 'Date=' * '' single quote (Literal) 'o''clock' ** * The count of pattern letters determine the format.
* (Text): 4 or more pattern letters--use full form, * less than 4--use short or abbreviated form if one exists. * In parsing, we will always try long format, then short.
* (Number): the minimum number of digits.
* (Text & Number): 3 or over, use text, otherwise use number.
* Any characters that not in the pattern will be treated as quoted text. For * instance, characters like ':', '.', ' ', '#' and '@' will appear in the * resulting time text even they are not embraced within single quotes. In our * current pattern usage, we didn't use up all letters. But those unused * letters are strongly discouraged to be used as quoted text without quote. * That's because we may use other letter for pattern in future.
* * Examples Using the US Locale: * * Format Pattern Result * -------------- ------- * "yyyy.MM.dd G 'at' HH:mm:ss vvvv" ->> 1996.07.10 AD at 15:08:56 Pacific Time * "EEE, MMM d, ''yy" ->> Wed, July 10, '96 * "h:mm a" ->> 12:08 PM * "hh 'o''clock' a, zzzz" ->> 12 o'clock PM, Pacific Daylight Time * "K:mm a, vvv" ->> 0:00 PM, PT * "yyyyy.MMMMM.dd GGG hh:mm aaa" ->> 01996.July.10 AD 12:08 PM * *
When parsing a date string using the abbreviated year pattern ("yy"), * DateTimeParse must interpret the abbreviated year relative to some * century. It does this by adjusting dates to be within 80 years before and 20 * years after the time the parse function is called. For example, using a * pattern of "MM/dd/yy" and a DateTimeParse instance created on Jan 1, 1997, * the string "01/11/12" would be interpreted as Jan 11, 2012 while the string * "05/04/64" would be interpreted as May 4, 1964. During parsing, only * strings consisting of exactly two digits, as defined by {@link * java.lang.Character#isDigit(char)}, will be parsed into the default * century. Any other numeric string, such as a one digit string, a three or * more digit string will be interpreted as its face value. * *
If the year pattern does not have exactly two 'y' characters, the year is * interpreted literally, regardless of the number of digits. So using the * pattern "MM/dd/yyyy", "01/11/12" parses to Jan 11, 12 A.D. * *
When numeric fields abut one another directly, with no intervening * delimiter characters, they constitute a run of abutting numeric fields. Such * runs are parsed specially. For example, the format "HHmmss" parses the input * text "123456" to 12:34:56, parses the input text "12345" to 1:23:45, and * fails to parse "1234". In other words, the leftmost field of the run is * flexible, while the others keep a fixed width. If the parse fails anywhere in * the run, then the leftmost field is shortened by one character, and the * entire run is parsed again. This is repeated until either the parse succeeds * or the leftmost field is one character in length. If the parse still fails at * that point, the parse of the run fails. * *
Now timezone parsing only support GMT:hhmm, GMT:+hhmm, GMT:-hhmm
*/
/**
* Construct a DateTimeParse based on current locale.
* @param {string|number} pattern pattern specification or pattern type.
* @param {!Object=} opt_dateTimeSymbols Optional symbols to use for this
* instance rather than the global symbols.
* @constructor
* @final
*/
goog.i18n.DateTimeParse = function(pattern, opt_dateTimeSymbols) {
goog.asserts.assert(
goog.isDef(opt_dateTimeSymbols) || goog.isDef(goog.i18n.DateTimeSymbols),
'goog.i18n.DateTimeSymbols or explicit symbols must be defined');
this.patternParts_ = [];
/**
* Data structure with all the locale info needed for date formatting.
* (day/month names, most common patterns, rules for week-end, etc.)
* @const @private {!goog.i18n.DateTimeSymbolsType}
*/
this.dateTimeSymbols_ = /** @type {!goog.i18n.DateTimeSymbolsType} */ (
opt_dateTimeSymbols || goog.i18n.DateTimeSymbols);
if (typeof pattern == 'number') {
this.applyStandardPattern_(pattern);
} else {
this.applyPattern_(pattern);
}
};
/**
* Number of years prior to now that the century used to
* disambiguate two digit years will begin
*
* @type {number}
*/
goog.i18n.DateTimeParse.ambiguousYearCenturyStart = 80;
/**
* Apply a pattern to this Parser. The pattern string will be parsed and saved
* in "compiled" form.
* Note: this method is somewhat similar to the pattern parsing method in
* datetimeformat. If you see something wrong here, you might want
* to check the other.
* @param {string} pattern It describes the format of date string that need to
* be parsed.
* @private
*/
goog.i18n.DateTimeParse.prototype.applyPattern_ = function(pattern) {
var inQuote = false;
var buf = '';
for (var i = 0; i < pattern.length; i++) {
var ch = pattern.charAt(i);
// handle space, add literal part (if exist), and add space part
if (ch == ' ') {
if (buf.length > 0) {
this.patternParts_.push({text: buf, count: 0, abutStart: false});
buf = '';
}
this.patternParts_.push({text: ' ', count: 0, abutStart: false});
while (i < pattern.length - 1 && pattern.charAt(i + 1) == ' ') {
i++;
}
} else if (inQuote) {
// inside quote, except '', just copy or exit
if (ch == '\'') {
if (i + 1 < pattern.length && pattern.charAt(i + 1) == '\'') {
// quote appeared twice continuously, interpret as one quote.
buf += '\'';
i++;
} else {
// exit quote
inQuote = false;
}
} else {
// literal
buf += ch;
}
} else if (goog.i18n.DateTimeParse.PATTERN_CHARS_.indexOf(ch) >= 0) {
// outside quote, it is a pattern char
if (buf.length > 0) {
this.patternParts_.push({text: buf, count: 0, abutStart: false});
buf = '';
}
var count = this.getNextCharCount_(pattern, i);
this.patternParts_.push({text: ch, count: count, abutStart: false});
i += count - 1;
} else if (ch == '\'') {
// Two consecutive quotes is a quote literal, inside or outside of quotes.
if (i + 1 < pattern.length && pattern.charAt(i + 1) == '\'') {
buf += '\'';
i++;
} else {
inQuote = true;
}
} else {
buf += ch;
}
}
if (buf.length > 0) {
this.patternParts_.push({text: buf, count: 0, abutStart: false});
}
this.markAbutStart_();
};
/**
* Apply a predefined pattern to this Parser.
* @param {number} formatType A constant used to identified the predefined
* pattern string stored in locale repository.
* @private
*/
goog.i18n.DateTimeParse.prototype.applyStandardPattern_ = function(formatType) {
var pattern;
// formatType constants are in consecutive numbers. So it can be used to
// index array in following way.
// if type is out of range, default to medium date/time format.
if (formatType > goog.i18n.DateTimeFormat.Format.SHORT_DATETIME) {
formatType = goog.i18n.DateTimeFormat.Format.MEDIUM_DATETIME;
}
if (formatType < 4) {
pattern = this.dateTimeSymbols_.DATEFORMATS[formatType];
} else if (formatType < 8) {
pattern = this.dateTimeSymbols_.TIMEFORMATS[formatType - 4];
} else {
pattern = this.dateTimeSymbols_.DATETIMEFORMATS[formatType - 8];
pattern = pattern.replace(
'{1}', this.dateTimeSymbols_.DATEFORMATS[formatType - 8]);
pattern = pattern.replace(
'{0}', this.dateTimeSymbols_.TIMEFORMATS[formatType - 8]);
}
this.applyPattern_(pattern);
};
/**
* Parse the given string and fill info into date object. This version does
* not validate the input.
* @param {string} text The string being parsed.
* @param {goog.date.DateLike} date The Date object to hold the parsed date.
* @param {number=} opt_start The position from where parse should begin.
* @return {number} How many characters parser advanced.
*/
goog.i18n.DateTimeParse.prototype.parse = function(text, date, opt_start) {
var start = opt_start || 0;
return this.internalParse_(text, date, start, false /*validation*/);
};
/**
* Parse the given string and fill info into date object. This version will
* validate the input and make sure it is a valid date/time.
* @param {string} text The string being parsed.
* @param {goog.date.DateLike} date The Date object to hold the parsed date.
* @param {number=} opt_start The position from where parse should begin.
* @return {number} How many characters parser advanced.
*/
goog.i18n.DateTimeParse.prototype.strictParse = function(
text, date, opt_start) {
var start = opt_start || 0;
return this.internalParse_(text, date, start, true /*validation*/);
};
/**
* Parse the given string and fill info into date object.
* @param {string} text The string being parsed.
* @param {goog.date.DateLike} date The Date object to hold the parsed date.
* @param {number} start The position from where parse should begin.
* @param {boolean} validation If true, input string need to be a valid
* date/time string.
* @return {number} How many characters parser advanced.
* @private
*/
goog.i18n.DateTimeParse.prototype.internalParse_ = function(
text, date, start, validation) {
var cal = new goog.i18n.DateTimeParse.MyDate_();
var parsePos = [start];
// For parsing abutting numeric fields. 'abutPat' is the
// offset into 'pattern' of the first of 2 or more abutting
// numeric fields. 'abutStart' is the offset into 'text'
// where parsing the fields begins. 'abutPass' starts off as 0
// and increments each time we try to parse the fields.
var abutPat = -1; // If >=0, we are in a run of abutting numeric fields
var abutStart = 0;
var abutPass = 0;
for (var i = 0; i < this.patternParts_.length; i++) {
if (this.patternParts_[i].count > 0) {
if (abutPat < 0 && this.patternParts_[i].abutStart) {
abutPat = i;
abutStart = start;
abutPass = 0;
}
// Handle fields within a run of abutting numeric fields. Take
// the pattern "HHmmss" as an example. We will try to parse
// 2/2/2 characters of the input text, then if that fails,
// 1/2/2. We only adjust the width of the leftmost field; the
// others remain fixed. This allows "123456" => 12:34:56, but
// "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we
// try 4/2/2, 3/2/2, 2/2/2, and finally 1/2/2.
if (abutPat >= 0) {
// If we are at the start of a run of abutting fields, then
// shorten this field in each pass. If we can't shorten
// this field any more, then the parse of this set of
// abutting numeric fields has failed.
var count = this.patternParts_[i].count;
if (i == abutPat) {
count -= abutPass;
abutPass++;
if (count == 0) {
// tried all possible width, fail now
return 0;
}
}
if (!this.subParse_(
text, parsePos, this.patternParts_[i], count, cal)) {
// If the parse fails anywhere in the run, back up to the
// start of the run and retry.
i = abutPat - 1;
parsePos[0] = abutStart;
continue;
}
}
// Handle non-numeric fields and non-abutting numeric fields.
else {
abutPat = -1;
if (!this.subParse_(text, parsePos, this.patternParts_[i], 0, cal)) {
return 0;
}
}
} else {
// Handle literal pattern characters. These are any
// quoted characters and non-alphabetic unquoted
// characters.
abutPat = -1;
// A run of white space in the pattern matches a run
// of white space in the input text.
if (this.patternParts_[i].text.charAt(0) == ' ') {
// Advance over run in input text
var s = parsePos[0];
this.skipSpace_(text, parsePos);
// Must see at least one white space char in input
if (parsePos[0] > s) {
continue;
}
} else if (
text.indexOf(this.patternParts_[i].text, parsePos[0]) ==
parsePos[0]) {
parsePos[0] += this.patternParts_[i].text.length;
continue;
}
// We fall through to this point if the match fails
return 0;
}
}
// return progress
return cal.calcDate_(date, validation) ? parsePos[0] - start : 0;
};
/**
* Calculate character repeat count in pattern.
*
* @param {string} pattern It describes the format of date string that need to
* be parsed.
* @param {number} start The position of pattern character.
*
* @return {number} Repeat count.
* @private
*/
goog.i18n.DateTimeParse.prototype.getNextCharCount_ = function(pattern, start) {
var ch = pattern.charAt(start);
var next = start + 1;
while (next < pattern.length && pattern.charAt(next) == ch) {
next++;
}
return next - start;
};
/**
* All acceptable pattern characters.
* @private
*/
goog.i18n.DateTimeParse.PATTERN_CHARS_ = 'GyMdkHmsSEDahKzZvQL';
/**
* Pattern characters that specify numerical field.
* @private
*/
goog.i18n.DateTimeParse.NUMERIC_FORMAT_CHARS_ = 'MydhHmsSDkK';
/**
* Check if the pattern part is a numeric field.
*
* @param {Object} part pattern part to be examined.
*
* @return {boolean} true if the pattern part is numeric field.
* @private
*/
goog.i18n.DateTimeParse.prototype.isNumericField_ = function(part) {
if (part.count <= 0) {
return false;
}
var i = goog.i18n.DateTimeParse.NUMERIC_FORMAT_CHARS_.indexOf(
part.text.charAt(0));
return i > 0 || i == 0 && part.count < 3;
};
/**
* Identify the start of an abutting numeric fields' run. Taking pattern
* "HHmmss" as an example. It will try to parse 2/2/2 characters of the input
* text, then if that fails, 1/2/2. We only adjust the width of the leftmost
* field; the others remain fixed. This allows "123456" => 12:34:56, but
* "12345" => 1:23:45. Likewise, for the pattern "yyyyMMdd" we try 4/2/2,
* 3/2/2, 2/2/2, and finally 1/2/2. The first field of connected numeric
* fields will be marked as abutStart, its width can be reduced to accommodate
* others.
*
* @private
*/
goog.i18n.DateTimeParse.prototype.markAbutStart_ = function() {
// abut parts are continuous numeric parts. abutStart is the switch
// point from non-abut to abut
var abut = false;
for (var i = 0; i < this.patternParts_.length; i++) {
if (this.isNumericField_(this.patternParts_[i])) {
// if next part is not following abut sequence, and isNumericField_
if (!abut && i + 1 < this.patternParts_.length &&
this.isNumericField_(this.patternParts_[i + 1])) {
abut = true;
this.patternParts_[i].abutStart = true;
}
} else {
abut = false;
}
}
};
/**
* Skip space in the string.
*
* @param {string} text input string.
* @param {Array