CocoRoboLabs
/
CocoRoboDesktop


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730
							'use strict';

const generate = require('regjsgen').generate;
const parse = require('regjsparser').parse;
const regenerate = require('regenerate');
const unicodeMatchProperty = require('unicode-match-property-ecmascript');
const unicodeMatchPropertyValue = require('unicode-match-property-value-ecmascript');
const iuMappings = require('./data/iu-mappings.js');
const ESCAPE_SETS = require('./data/character-class-escape-sets.js');

function flatMap(array, callback) {
	const result = [];
	array.forEach(item => {
		const res = callback(item);
		if (Array.isArray(res)) {
			result.push.apply(result, res);
		} else {
			result.push(res);
		}
	});
	return result;
}

const SPECIAL_CHARS = new Set('\\^$.*+?()[]{}|'.split(''));

// Prepare a Regenerate set containing all code points, used for negative
// character classes (if any).
const UNICODE_SET = regenerate().addRange(0x0, 0x10FFFF);

// Prepare a Regenerate set containing all code points that are supposed to be
// matched by `/./u`. https://mths.be/es6#sec-atom
const DOT_SET_UNICODE = UNICODE_SET.clone() // all Unicode code points
	.remove(
		// minus `LineTerminator`s (https://mths.be/es6#sec-line-terminators):
		0x000A, // Line Feed <LF>
		0x000D, // Carriage Return <CR>
		0x2028, // Line Separator <LS>
		0x2029  // Paragraph Separator <PS>
	);

const getCharacterClassEscapeSet = (character, unicode, ignoreCase) => {
	if (unicode) {
		if (ignoreCase) {
			return ESCAPE_SETS.UNICODE_IGNORE_CASE.get(character);
		}
		return ESCAPE_SETS.UNICODE.get(character);
	}
	return ESCAPE_SETS.REGULAR.get(character);
};

const getUnicodeDotSet = (dotAll) => {
	return dotAll ? UNICODE_SET : DOT_SET_UNICODE;
};

const getUnicodePropertyValueSet = (property, value) => {
	const path = value ?
		`${ property }/${ value }` :
		`Binary_Property/${ property }`;
	try {
		return require(`regenerate-unicode-properties/${ path }.js`);
	} catch (exception) {
		throw new Error(
			`Failed to recognize value \`${ value }\` for property ` +
			`\`${ property }\`.`
		);
	}
};

const handleLoneUnicodePropertyNameOrValue = (value) => {
	// It could be a `General_Category` value or a binary property.
	// Note: `unicodeMatchPropertyValue` throws on invalid values.
	try {
		const property = 'General_Category';
		const category = unicodeMatchPropertyValue(property, value);
		return getUnicodePropertyValueSet(property, category);
	} catch (exception) {}
	// It’s not a `General_Category` value, so check if it’s a property
	// of strings.
	try {
		return getUnicodePropertyValueSet('Property_of_Strings', value);
	} catch (exception) {}
	// Lastly, check if it’s a binary property of single code points.
	// Note: `unicodeMatchProperty` throws on invalid properties.
	const property = unicodeMatchProperty(value);
	return getUnicodePropertyValueSet(property);
};

const getUnicodePropertyEscapeSet = (value, isNegative) => {
	const parts = value.split('=');
	const firstPart = parts[0];
	let set;
	if (parts.length == 1) {
		set = handleLoneUnicodePropertyNameOrValue(firstPart);
	} else {
		// The pattern consists of two parts, i.e. `Property=Value`.
		const property = unicodeMatchProperty(firstPart);
		const value = unicodeMatchPropertyValue(property, parts[1]);
		set = getUnicodePropertyValueSet(property, value);
	}
	if (isNegative) {
		if (set.strings) {
			throw new Error('Cannot negate Unicode property of strings');
		}
		return {
			characters: UNICODE_SET.clone().remove(set.characters),
			strings: new Set()
		};
	}
	return {
		characters: set.characters.clone(),
		strings: set.strings
			// We need to escape strings like *️⃣ to make sure that they can be safelu used in unions
			? new Set(set.strings.map(str => SPECIAL_CHARS.has(str[0]) ? `\\${str}` : str))

			: new Set()

	};

};


const getUnicodePropertyEscapeCharacterClassData = (property, isNegative) => {

	const set = getUnicodePropertyEscapeSet(property, isNegative);

	const data = getCharacterClassEmptyData();

	data.singleChars = set.characters;

	if (set.strings.size > 0) {

		data.longStrings = set.strings;

		data.maybeIncludesStrings = true;

	}

	return data;

};


// Given a range of code points, add any case-folded code points in that range

// to a set.

regenerate.prototype.iuAddRange = function(min, max) {

	const $this = this;

	do {

		const folded = caseFold(min);

		if (folded) {

			$this.add(folded);

		}

	} while (++min <= max);

	return $this;

};

regenerate.prototype.iuRemoveRange = function(min, max) {

	const $this = this;

	do {

		const folded = caseFold(min);

		if (folded) {

			$this.remove(folded);

		}

	} while (++min <= max);

	return $this;

};


const update = (item, pattern) => {

	let tree = parse(pattern, config.useUnicodeFlag ? 'u' : '');

	switch (tree.type) {

		case 'characterClass':

		case 'group':

		case 'value':

			// No wrapping needed.

			break;

		default:

			// Wrap the pattern in a non-capturing group.

			tree = wrap(tree, pattern);

	}

	Object.assign(item, tree);

};


const wrap = (tree, pattern) => {

	// Wrap the pattern in a non-capturing group.

	return {

		'type': 'group',

		'behavior': 'ignore',

		'body': [tree],

		'raw': `(?:${ pattern })`

	};

};


const caseFold = (codePoint) => {

	return iuMappings.get(codePoint) || false;

};


const buildHandler = (action) => {

	switch (action) {

		case 'union':

			return {

				single: (data, cp) => {

					data.singleChars.add(cp);

				},

				regSet: (data, set2) => {

					data.singleChars.add(set2);

				},

				range: (data, start, end) => {

					data.singleChars.addRange(start, end);

				},

				iuRange: (data, start, end) => {

					data.singleChars.iuAddRange(start, end);

				},

				nested: (data, nestedData) => {

					data.singleChars.add(nestedData.singleChars);

					for (const str of nestedData.longStrings) data.longStrings.add(str);

					if (nestedData.maybeIncludesStrings) data.maybeIncludesStrings = true;

				}

			};

		case 'union-negative': {

			const regSet = (data, set2) => {

				data.singleChars = UNICODE_SET.clone().remove(set2).add(data.singleChars);

			};

			return {

				single: (data, cp) => {

					const unicode = UNICODE_SET.clone();

					data.singleChars = data.singleChars.contains(cp) ? unicode : unicode.remove(cp);

				},

				regSet: regSet,

				range: (data, start, end) => {

					data.singleChars = UNICODE_SET.clone().removeRange(start, end).add(data.singleChars);

				},

				iuRange: (data, start, end) => {

					data.singleChars = UNICODE_SET.clone().iuRemoveRange(start, end).add(data.singleChars);

				},

				nested: (data, nestedData) => {

					regSet(data, nestedData.singleChars);

					if (nestedData.maybeIncludesStrings) throw new Error('ASSERTION ERROR');

				}

			};

		}

		case 'intersection': {

			const regSet = (data, set2) => {

				if (data.first) data.singleChars = set2;

				else data.singleChars.intersection(set2);

			};

			return {

				single: (data, cp) => {

					data.singleChars = data.first || data.singleChars.contains(cp) ? regenerate(cp) : regenerate();

					data.longStrings.clear();

					data.maybeIncludesStrings = false;

				},

				regSet: (data, set) => {

					regSet(data, set);

					data.longStrings.clear();

					data.maybeIncludesStrings = false;

				},

				range: (data, start, end) => {

					if (data.first) data.singleChars.addRange(start, end);

					else data.singleChars.intersection(regenerate().addRange(start, end));

					data.longStrings.clear();

					data.maybeIncludesStrings = false;

				},

				iuRange: (data, start, end) => {

					if (data.first) data.singleChars.iuAddRange(start, end);

					else data.singleChars.intersection(regenerate().iuAddRange(start, end));

					data.longStrings.clear();

					data.maybeIncludesStrings = false;

				},

				nested: (data, nestedData) => {

					regSet(data, nestedData.singleChars);


					if (data.first) {

						data.longStrings = nestedData.longStrings;

						data.maybeIncludesStrings = nestedData.maybeIncludesStrings;

					} else {

						for (const str of data.longStrings) {

							if (!nestedData.longStrings.has(str)) data.longStrings.delete(str);

						}

						if (!nestedData.maybeIncludesStrings) data.maybeIncludesStrings = false;

					}

				}

			};

		}

		case 'subtraction': {

			const regSet = (data, set2) => {

				if (data.first) data.singleChars.add(set2);

				else data.singleChars.remove(set2);

			};

			return {

				single: (data, cp) => {

					if (data.first) data.singleChars.add(cp);

					else data.singleChars.remove(cp);

				},

				regSet: regSet,

				range: (data, start, end) => {

					if (data.first) data.singleChars.addRange(start, end);

					else data.singleChars.removeRange(start, end);

				},

				iuRange: (data, start, end) => {

					if (data.first) data.singleChars.iuAddRange(start, end);

					else data.singleChars.iuRemoveRange(start, end);

				},

				nested: (data, nestedData) => {

					regSet(data, nestedData.singleChars);


					if (data.first) {

						data.longStrings = nestedData.longStrings;

						data.maybeIncludesStrings = nestedData.maybeIncludesStrings;

					} else {

						for (const str of data.longStrings) {

							if (nestedData.longStrings.has(str)) data.longStrings.delete(str);

						}

					}

				}

			};

		}

		// The `default` clause is only here as a safeguard; it should never be

		// reached. Code coverage tools should ignore it.

		/* istanbul ignore next */

		default:

			throw new Error(`Unknown set action: ${ characterClassItem.kind }`);

	}

};


const getCharacterClassEmptyData = () => ({

	transformed: config.transform.unicodeFlag,

	singleChars: regenerate(),

	longStrings: new Set(),

	hasEmptyString: false,

	first: true,

	maybeIncludesStrings: false

});


const maybeFold = (codePoint) => {

	if (config.flags.ignoreCase && config.transform.unicodeFlag) {

		const folded = caseFold(codePoint);

		if (folded) {

			return [codePoint, folded];

		}

	}

	return [codePoint];

};


const computeClassStrings = (classStrings, regenerateOptions) => {

	let data = getCharacterClassEmptyData();


	for (const string of classStrings.strings) {

		if (string.characters.length === 1) {

			maybeFold(string.characters[0].codePoint).forEach((cp) => {

				data.singleChars.add(cp);

			});

		} else {

			let stringifiedString;

			if (config.flags.ignoreCase && config.transform.unicodeFlag) {

				stringifiedString = '';

				for (const ch of string.characters) {

					let set = regenerate(ch.codePoint);

					const folded = caseFold(ch.codePoint);

					if (folded) set.add(folded);

					stringifiedString += set.toString(regenerateOptions);

				}

			} else {

				stringifiedString = string.characters.map(ch => generate(ch)).join('')

			}


			data.longStrings.add(stringifiedString);

			data.maybeIncludesStrings = true;

		}

	}


	return data;

}


const computeCharacterClass = (characterClassItem, regenerateOptions) => {

	let data = getCharacterClassEmptyData();


	let handlePositive;

	let handleNegative;


	switch (characterClassItem.kind) {

		case 'union':

			handlePositive = buildHandler('union');

			handleNegative = buildHandler('union-negative');

			break;

		case 'intersection':

			handlePositive = buildHandler('intersection');

			handleNegative = buildHandler('subtraction');

			break;

		case 'subtraction':

			handlePositive = buildHandler('subtraction');

			handleNegative = buildHandler('intersection');

			break;

		// The `default` clause is only here as a safeguard; it should never be

		// reached. Code coverage tools should ignore it.

		/* istanbul ignore next */

		default:

			throw new Error(`Unknown character class kind: ${ characterClassItem.kind }`);

	}


	for (const item of characterClassItem.body) {

		switch (item.type) {

			case 'value':

				maybeFold(item.codePoint).forEach((cp) => {

					handlePositive.single(data, cp);

				});

				break;

			case 'characterClassRange':

				const min = item.min.codePoint;

				const max = item.max.codePoint;

				handlePositive.range(data, min, max);

				if (config.flags.ignoreCase && config.transform.unicodeFlag) {

					handlePositive.iuRange(data, min, max);

				}

				break;

			case 'characterClassEscape':

				handlePositive.regSet(data, getCharacterClassEscapeSet(

					item.value,

					config.flags.unicode,

					config.flags.ignoreCase

				));

				break;

			case 'unicodePropertyEscape':

				const nestedData = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative);

				handlePositive.nested(data, nestedData);

				data.transformed =

					data.transformed ||

					config.transform.unicodePropertyEscapes ||

					(config.transform.unicodeSetsFlag && nestedData.maybeIncludesStrings);

				break;

			case 'characterClass':

				const handler = item.negative ? handleNegative : handlePositive;

				const res = computeCharacterClass(item, regenerateOptions);

				handler.nested(data, res);

				data.transformed = true;

				break;

			case 'classStrings':

				handlePositive.nested(data, computeClassStrings(item, regenerateOptions));

				data.transformed = true;

				break;

			// The `default` clause is only here as a safeguard; it should never be

			// reached. Code coverage tools should ignore it.

			/* istanbul ignore next */

			default:

				throw new Error(`Unknown term type: ${ item.type }`);

		}


		data.first = false;

	}


	if (characterClassItem.negative && data.maybeIncludesStrings) {

		throw new SyntaxError('Cannot negate set containing strings');

	}


	return data;

}


const processCharacterClass = (

	characterClassItem,

	regenerateOptions,

	computed = computeCharacterClass(characterClassItem, regenerateOptions)

) => {

	const negative = characterClassItem.negative;

	const { singleChars, transformed, longStrings } = computed;

	if (transformed) {

		const setStr = singleChars.toString(regenerateOptions);


		if (negative) {

			if (config.useUnicodeFlag) {

				update(characterClassItem, `[^${setStr[0] === '[' ? setStr.slice(1, -1) : setStr}]`)

			} else {

				update(characterClassItem, `(?!${setStr})[\\s\\S]`)

			}

		} else {

			const hasEmptyString = longStrings.has('');

			const pieces = Array.from(longStrings).sort((a, b) => b.length - a.length);


			if (setStr !== '[]' || longStrings.size === 0) {

				pieces.splice(pieces.length - (hasEmptyString ? 1 : 0), 0, setStr);

			}


			update(characterClassItem, pieces.join('|'));

		}

	}

	return characterClassItem;

};


const assertNoUnmatchedReferences = (groups) => {

	const unmatchedReferencesNames = Object.keys(groups.unmatchedReferences);

	if (unmatchedReferencesNames.length > 0) {

		throw new Error(`Unknown group names: ${unmatchedReferencesNames}`);

	}

};


const processTerm = (item, regenerateOptions, groups) => {

	switch (item.type) {

		case 'dot':

			if (config.transform.unicodeFlag) {

				update(

					item,

					getUnicodeDotSet(config.flags.dotAll).toString(regenerateOptions)

				);

			} else if (config.transform.dotAllFlag) {

				// TODO: consider changing this at the regenerate level.

				update(item, '[\\s\\S]');

			}

			break;

		case 'characterClass':

			item = processCharacterClass(item, regenerateOptions);

			break;

		case 'unicodePropertyEscape':

			const data = getUnicodePropertyEscapeCharacterClassData(item.value, item.negative);

			if (data.maybeIncludesStrings) {

				if (!config.flags.unicodeSets) {

					throw new Error(

						'Properties of strings are only supported when using the unicodeSets (v) flag.'

					);

				}

				if (config.transform.unicodeSetsFlag) {

					data.transformed = true;

					item = processCharacterClass(item, regenerateOptions, data);

				}

			} else if (config.transform.unicodePropertyEscapes) {

				update(

					item,

					data.singleChars.toString(regenerateOptions)

				);

			}

			break;

		case 'characterClassEscape':

			if (config.transform.unicodeFlag) {

				update(

					item,

					getCharacterClassEscapeSet(

						item.value,

						/* config.transform.unicodeFlag implies config.flags.unicode */ true,

						config.flags.ignoreCase

					).toString(regenerateOptions)

				);

			}

			break;

		case 'group':

			if (item.behavior == 'normal') {

				groups.lastIndex++;

			}

			if (item.name && config.transform.namedGroups) {

				const name = item.name.value;


				if (groups.namesConflicts[name]) {

					throw new Error(

						`Group '${ name }' has already been defined in this context.`

					);

				}

				groups.namesConflicts[name] = true;


				const index = groups.lastIndex;

				delete item.name;


				if (!groups.names[name]) {

					groups.names[name] = [];

				}

				groups.names[name].push(index);


				if (groups.onNamedGroup) {

					groups.onNamedGroup.call(null, name, index);

				}


				if (groups.unmatchedReferences[name]) {

					delete groups.unmatchedReferences[name];

				}

			}

			/* falls through */

		case 'quantifier':

			item.body = item.body.map(term => {

				return processTerm(term, regenerateOptions, groups);

			});

			break;

		case 'disjunction':

			const outerNamesConflicts = groups.namesConflicts;

			item.body = item.body.map(term => {

				groups.namesConflicts = Object.create(outerNamesConflicts);

				return processTerm(term, regenerateOptions, groups);

			});

			break;

		case 'alternative':

			item.body = flatMap(item.body, term => {

				const res = processTerm(term, regenerateOptions, groups);

				// Alternatives cannot contain alternatives; flatten them.

				return res.type === 'alternative' ? res.body : res;

			});

			break;

		case 'value':

			const codePoint = item.codePoint;

			const set = regenerate(codePoint);

			if (config.flags.ignoreCase && config.transform.unicodeFlag) {

				const folded = caseFold(codePoint);

				if (folded) {

					set.add(folded);

				}

			}

			update(item, set.toString(regenerateOptions));

			break;

		case 'reference':

			if (item.name) {

				const name = item.name.value;

				const indexes = groups.names[name];

				if (indexes) {

					const body = indexes.map(index => ({

						'type': 'reference',

						'matchIndex': index,

						'raw': '\\' + index,

					}));

					if (body.length === 1) {

						return body[0];

					}

					return {

						'type': 'alternative',

						'body': body,

						'raw': body.map(term => term.raw).join(''),

					};

				}


				// This named reference comes before the group where it’s defined,

				// so it’s always an empty match.

				groups.unmatchedReferences[name] = true;

				return {

					'type': 'group',

					'behavior': 'ignore',

					'body': [],

					'raw': '(?:)',

				};

			}

			break;

		case 'anchor':

		case 'empty':

		case 'group':

			// Nothing to do here.

			break;

		// The `default` clause is only here as a safeguard; it should never be

		// reached. Code coverage tools should ignore it.

		/* istanbul ignore next */

		default:

			throw new Error(`Unknown term type: ${ item.type }`);

	}

	return item;

};


const config = {

	'flags': {

		'ignoreCase': false,

		'unicode': false,

		'unicodeSets': false,

		'dotAll': false,

	},

	'transform': {

		'dotAllFlag': false,

		'unicodeFlag': false,

		'unicodeSetsFlag': false,

		'unicodePropertyEscapes': false,

		'namedGroups': false,

	},

	get useUnicodeFlag() {

		return (this.flags.unicode || this.flags.unicodeSets) && !this.transform.unicodeFlag;

	}

};


const validateOptions = (options) => {

	if (!options) return;


	for (const key of Object.keys(options)) {

		const value = options[key];

		switch (key) {

			case 'dotAllFlag':

			case 'unicodeFlag':

			case 'unicodePropertyEscapes':

			case 'namedGroups':

				if (value != null && value !== false && value !== 'transform') {

					throw new Error(`.${key} must be false (default) or 'transform'.`);

				}

				break;

			case 'unicodeSetsFlag':

				if (value != null && value !== false && value !== 'parse' && value !== 'transform') {

					throw new Error(`.${key} must be false (default), 'parse' or 'transform'.`);

				}

				break;

			case 'onNamedGroup':

				if (value != null && typeof value !== 'function') {

					throw new Error('.onNamedGroup must be a function.');

				}

				break;

			default:

				throw new Error(`.${key} is not a valid regexpu-core option.`);

		}

	}

};


const hasFlag = (flags, flag) => flags ? flags.includes(flag) : false;

const transform = (options, name) => options ? options[name] === 'transform' : false;


const rewritePattern = (pattern, flags, options) => {

	validateOptions(options);


	config.flags.unicode = hasFlag(flags, 'u');

	config.flags.unicodeSets = hasFlag(flags, 'v');

	config.flags.ignoreCase = hasFlag(flags, 'i');

	config.flags.dotAll = hasFlag(flags, 's');


	config.transform.dotAllFlag = config.flags.dotAll && transform(options, 'dotAllFlag');

	config.transform.unicodeFlag = (config.flags.unicode || config.flags.unicodeSets) && transform(options, 'unicodeFlag');

	config.transform.unicodeSetsFlag = config.flags.unicodeSets && transform(options, 'unicodeSetsFlag');


	// unicodeFlag: 'transform' implies unicodePropertyEscapes: 'transform'

	config.transform.unicodePropertyEscapes = config.flags.unicode && (

		transform(options, 'unicodeFlag') || transform(options, 'unicodePropertyEscapes')

	);

	config.transform.namedGroups = transform(options, 'namedGroups');


	const regjsparserFeatures = {

		'unicodeSet': Boolean(options && options.unicodeSetsFlag),


		// Enable every stable RegExp feature by default

		'unicodePropertyEscape': true,

		'namedGroups': true,

		'lookbehind': true,

	};


	const regenerateOptions = {

		'hasUnicodeFlag': config.useUnicodeFlag,

		'bmpOnly': !config.flags.unicode

	};


	const groups = {

		'onNamedGroup': options && options.onNamedGroup,

		'lastIndex': 0,

		'names': Object.create(null), // { [name]: Array<index> }

		'namesConflicts': Object.create(null), // { [name]: true }

		'unmatchedReferences': Object.create(null) // { [name]: true }

	};


	const tree = parse(pattern, flags, regjsparserFeatures);

	// Note: `processTerm` mutates `tree` and `groups`.

	processTerm(tree, regenerateOptions, groups);

	assertNoUnmatchedReferences(groups);

	return generate(tree);

};


module.exports = rewritePattern;