165 lines
5.1 KiB
TypeScript
165 lines
5.1 KiB
TypeScript
import {bodyRegExps, namedReferences} from './named-references.js';
|
|
import {numericUnicodeMap} from './numeric-unicode-map.js';
|
|
import {fromCodePoint, getCodePoint} from './surrogate-pairs.js';
|
|
|
|
const allNamedReferences = {
|
|
...namedReferences,
|
|
all: namedReferences.html5
|
|
};
|
|
|
|
export type Level = 'xml' | 'html4' | 'html5' | 'all';
|
|
|
|
interface CommonOptions {
|
|
level?: Level;
|
|
}
|
|
|
|
export type EncodeMode = 'specialChars' | 'nonAscii' | 'nonAsciiPrintable' | 'nonAsciiPrintableOnly' | 'extensive';
|
|
|
|
export interface EncodeOptions extends CommonOptions {
|
|
mode?: EncodeMode;
|
|
numeric?: 'decimal' | 'hexadecimal';
|
|
}
|
|
|
|
export type DecodeScope = 'strict' | 'body' | 'attribute';
|
|
|
|
export interface DecodeOptions extends CommonOptions {
|
|
scope?: DecodeScope;
|
|
}
|
|
|
|
const encodeRegExps: Record<EncodeMode, RegExp> = {
|
|
specialChars: /[<>'"&]/g,
|
|
nonAscii: /[<>'"&\u0080-\uD7FF\uE000-\uFFFF\uDC00-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g,
|
|
nonAsciiPrintable: /[<>'"&\x01-\x08\x11-\x15\x17-\x1F\x7f-\uD7FF\uE000-\uFFFF\uDC00-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g,
|
|
nonAsciiPrintableOnly: /[\x01-\x08\x11-\x15\x17-\x1F\x7f-\uD7FF\uE000-\uFFFF\uDC00-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g,
|
|
extensive: /[\x01-\x0c\x0e-\x1f\x21-\x2c\x2e-\x2f\x3a-\x40\x5b-\x60\x7b-\x7d\x7f-\uD7FF\uE000-\uFFFF\uDC00-\uDFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]?/g
|
|
};
|
|
|
|
const defaultEncodeOptions: EncodeOptions = {
|
|
mode: 'specialChars',
|
|
level: 'all',
|
|
numeric: 'decimal'
|
|
};
|
|
|
|
/** Encodes all the necessary (specified by `level`) characters in the text */
|
|
export function encode(
|
|
text: string | undefined | null,
|
|
{mode = 'specialChars', numeric = 'decimal', level = 'all'}: EncodeOptions = defaultEncodeOptions
|
|
) {
|
|
if (!text) {
|
|
return '';
|
|
}
|
|
|
|
const encodeRegExp = encodeRegExps[mode];
|
|
const references = allNamedReferences[level].characters;
|
|
const isHex = numeric === 'hexadecimal';
|
|
|
|
return String.prototype.replace.call(text, encodeRegExp, (input) => {
|
|
let result = references[input];
|
|
if (!result) {
|
|
const code = input.length > 1 ? getCodePoint(input, 0)! : input.charCodeAt(0);
|
|
result = (isHex ? '&#x' + code.toString(16) : '&#' + code) + ';';
|
|
}
|
|
return result;
|
|
});
|
|
}
|
|
|
|
const defaultDecodeOptions: DecodeOptions = {
|
|
scope: 'body',
|
|
level: 'all'
|
|
};
|
|
|
|
const strict = /&(?:#\d+|#[xX][\da-fA-F]+|[0-9a-zA-Z]+);/g;
|
|
const attribute = /&(?:#\d+|#[xX][\da-fA-F]+|[0-9a-zA-Z]+)[;=]?/g;
|
|
|
|
const baseDecodeRegExps: Record<Exclude<Level, 'all'>, Record<DecodeScope, RegExp>> = {
|
|
xml: {
|
|
strict,
|
|
attribute,
|
|
body: bodyRegExps.xml
|
|
},
|
|
html4: {
|
|
strict,
|
|
attribute,
|
|
body: bodyRegExps.html4
|
|
},
|
|
html5: {
|
|
strict,
|
|
attribute,
|
|
body: bodyRegExps.html5
|
|
}
|
|
};
|
|
|
|
const decodeRegExps: Record<Level, Record<DecodeScope, RegExp>> = {
|
|
...baseDecodeRegExps,
|
|
all: baseDecodeRegExps.html5
|
|
};
|
|
|
|
const fromCharCode = String.fromCharCode;
|
|
const outOfBoundsChar = fromCharCode(65533);
|
|
|
|
const defaultDecodeEntityOptions: CommonOptions = {
|
|
level: 'all'
|
|
};
|
|
|
|
function getDecodedEntity(
|
|
entity: string,
|
|
references: Record<string, string>,
|
|
isAttribute: boolean,
|
|
isStrict: boolean
|
|
): string {
|
|
let decodeResult = entity;
|
|
const decodeEntityLastChar = entity[entity.length - 1];
|
|
if (isAttribute && decodeEntityLastChar === '=') {
|
|
decodeResult = entity;
|
|
} else if (isStrict && decodeEntityLastChar !== ';') {
|
|
decodeResult = entity;
|
|
} else {
|
|
const decodeResultByReference = references[entity];
|
|
if (decodeResultByReference) {
|
|
decodeResult = decodeResultByReference;
|
|
} else if (entity[0] === '&' && entity[1] === '#') {
|
|
const decodeSecondChar = entity[2];
|
|
const decodeCode =
|
|
decodeSecondChar == 'x' || decodeSecondChar == 'X'
|
|
? parseInt(entity.substr(3), 16)
|
|
: parseInt(entity.substr(2));
|
|
|
|
decodeResult =
|
|
decodeCode >= 0x10ffff
|
|
? outOfBoundsChar
|
|
: decodeCode > 65535
|
|
? fromCodePoint(decodeCode)
|
|
: fromCharCode(numericUnicodeMap[decodeCode] || decodeCode);
|
|
}
|
|
}
|
|
return decodeResult;
|
|
}
|
|
|
|
/** Decodes a single entity */
|
|
export function decodeEntity(
|
|
entity: string | undefined | null,
|
|
{level = 'all'}: CommonOptions = defaultDecodeEntityOptions
|
|
): string {
|
|
if (!entity) {
|
|
return '';
|
|
}
|
|
return getDecodedEntity(entity, allNamedReferences[level].entities, false, false);
|
|
}
|
|
|
|
/** Decodes all entities in the text */
|
|
export function decode(
|
|
text: string | undefined | null,
|
|
{level = 'all', scope = level === 'xml' ? 'strict' : 'body'}: DecodeOptions = defaultDecodeOptions
|
|
) {
|
|
if (!text) {
|
|
return '';
|
|
}
|
|
|
|
const decodeRegExp = decodeRegExps[level][scope];
|
|
const references = allNamedReferences[level].entities;
|
|
const isAttribute = scope === 'attribute';
|
|
const isStrict = scope === 'strict';
|
|
|
|
return text.replace(decodeRegExp, (entity) => getDecodedEntity(entity, references, isAttribute, isStrict));
|
|
}
|