Initial commit
This commit is contained in:
370
node_modules/maplibre-gl/build/generate-unicode-data.ts
generated
vendored
Normal file
370
node_modules/maplibre-gl/build/generate-unicode-data.ts
generated
vendored
Normal file
@@ -0,0 +1,370 @@
|
||||
import * as fs from 'fs';
|
||||
import * as regenerate from 'regenerate';
|
||||
|
||||
/**
|
||||
* The heuristics in the functions below are based on this version of the
|
||||
* Unicode Standard. This constant should match the `@unicode/unicode-*` package
|
||||
* in package.json.
|
||||
*
|
||||
* When upgrading to a new version of the standard, consider any new scripts,
|
||||
* blocks, and characters that may require different script detection.
|
||||
*/
|
||||
const unicodeVersion = '17.0.0';
|
||||
|
||||
async function createSet(blocks: Array<string>, scripts: Array<string>): Promise<regenerate.regenerate> {
|
||||
const set = regenerate.default();
|
||||
|
||||
for (const block of blocks) {
|
||||
const slug = block.replace(/[- ]/g, '_');
|
||||
set.add((await import(`@unicode/unicode-${unicodeVersion}/Block/${slug}/code-points.js`)).default);
|
||||
}
|
||||
|
||||
for (const script of scripts) {
|
||||
const slug = script.replace(/[- ]/g, '_');
|
||||
set.add((await import(`@unicode/unicode-${unicodeVersion}/Script/${slug}/code-points.js`)).default);
|
||||
}
|
||||
|
||||
return set;
|
||||
}
|
||||
|
||||
async function usesLocalIdeographFontFamily(): Promise<string> {
|
||||
// Local rendering is preferred for Unicode code blocks that represent
|
||||
// writing systems for which TinySDF produces optimal results and greatly
|
||||
// reduces bandwidth consumption. In general, TinySDF is best for any
|
||||
// writing system typically set in a monospaced font. With more than 99,000
|
||||
// codepoints accessed essentially at random, Hanzi/Kanji/Hanja (from the
|
||||
// CJK Unified Ideographs blocks) is the canonical example of wasteful
|
||||
// bandwidth consumption when rendered remotely. For visual consistency
|
||||
// within CJKV text, even relatively small CJKV and other siniform code
|
||||
// blocks prefer local rendering.
|
||||
const set = await createSet([
|
||||
'CJK Compatibility Forms',
|
||||
'CJK Compatibility',
|
||||
'CJK Radicals Supplement',
|
||||
'CJK Strokes',
|
||||
'CJK Unified Ideographs',
|
||||
'Enclosed CJK Letters And Months',
|
||||
'Enclosed Ideographic Supplement',
|
||||
'Halfwidth And Fullwidth Forms',
|
||||
'Hangul Syllables',
|
||||
'Hiragana',
|
||||
'Ideographic Symbols And Punctuation',
|
||||
'Kana Extended-A',
|
||||
'Kana Extended-B',
|
||||
'Kana Supplement',
|
||||
'Kangxi Radicals',
|
||||
'Katakana', // includes "ー"
|
||||
'Katakana Phonetic Extensions',
|
||||
// memo: these symbols are not all. others could be added if needed.
|
||||
'CJK Symbols And Punctuation', // 、。〃〄々〆〇〈〉《》「...
|
||||
'Halfwidth And Fullwidth Forms',
|
||||
'Small Kana Extension',
|
||||
'Vertical Forms',
|
||||
], [
|
||||
'Bopomofo',
|
||||
'Han',
|
||||
'Hangul',
|
||||
'Hiragana',
|
||||
'Katakana',
|
||||
'Khitan Small Script',
|
||||
'Nushu',
|
||||
'Tangut',
|
||||
'Yi',
|
||||
]);
|
||||
|
||||
set.add((await import(`@unicode/unicode-${unicodeVersion}/Binary_Property/Ideographic/code-points.js`)).default);
|
||||
|
||||
return set.toString();
|
||||
}
|
||||
|
||||
async function allowsIdeographicBreaking(): Promise<string> {
|
||||
// Unicode only considers CJKV to be ideographic, but some other scripts mix
|
||||
// with CJKV so can also have ideographic line breaking.
|
||||
const set = await createSet([
|
||||
'CJK Compatibility Forms',
|
||||
'CJK Compatibility',
|
||||
'CJK Radicals Supplement',
|
||||
'CJK Strokes',
|
||||
'CJK Symbols And Punctuation',
|
||||
'Enclosed CJK Letters And Months',
|
||||
'Enclosed Ideographic Supplement',
|
||||
'Halfwidth And Fullwidth Forms',
|
||||
'Ideographic Description Characters',
|
||||
'Ideographic Symbols And Punctuation',
|
||||
'Kana Extended-A',
|
||||
'Kana Extended-B',
|
||||
'Kana Supplement',
|
||||
'Kangxi Radicals',
|
||||
'Katakana Phonetic Extensions',
|
||||
'Small Kana Extension',
|
||||
'Vertical Forms',
|
||||
], [
|
||||
'Bopomofo',
|
||||
'Han',
|
||||
'Hiragana',
|
||||
'Katakana',
|
||||
'Khitan Small Script',
|
||||
'Nushu',
|
||||
'Tangut',
|
||||
'Yi',
|
||||
]);
|
||||
|
||||
return set.toString();
|
||||
}
|
||||
|
||||
// The following logic comes from
|
||||
// <https://www.unicode.org/Public/17.0.0/ucd/VerticalOrientation.txt>.
|
||||
// Keep it synchronized with
|
||||
// <https://www.unicode.org/Public/UCD/latest/ucd/VerticalOrientation.txt>.
|
||||
// The data file denotes with “U” or “Tu” any codepoint that may be drawn
|
||||
// upright in vertical text but does not distinguish between upright and
|
||||
// “neutral” characters.
|
||||
|
||||
async function hasUprightVerticalOrientation(): Promise<string> {
|
||||
const set = await createSet([
|
||||
'Alchemical Symbols',
|
||||
'Anatolian Hieroglyphs',
|
||||
'Byzantine Musical Symbols',
|
||||
'Chess Symbols',
|
||||
'CJK Compatibility Forms',
|
||||
'CJK Compatibility',
|
||||
'CJK Strokes',
|
||||
'CJK Symbols And Punctuation',
|
||||
'Counting Rod Numerals',
|
||||
'Domino Tiles',
|
||||
'Emoticons',
|
||||
'Enclosed Alphanumeric Supplement',
|
||||
'Enclosed CJK Letters And Months',
|
||||
'Geometric Shapes Extended',
|
||||
'Halfwidth And Fullwidth Forms',
|
||||
'Ideographic Description Characters',
|
||||
'Kanbun',
|
||||
'Katakana',
|
||||
'Mahjong Tiles',
|
||||
'Mayan Numerals',
|
||||
'Meroitic Hieroglyphs',
|
||||
'Miscellaneous Symbols And Pictographs',
|
||||
'Miscellaneous Symbols Supplement',
|
||||
'Musical Symbols',
|
||||
'Ornamental Dingbats',
|
||||
'Playing Cards',
|
||||
'Siddham',
|
||||
'Small Form Variants',
|
||||
'Small Kana Extension',
|
||||
'Soyombo',
|
||||
'Supplemental Symbols And Pictographs',
|
||||
'Sutton SignWriting',
|
||||
'Symbols And Pictographs Extended-A',
|
||||
'Tai Xuan Jing Symbols',
|
||||
'Transport And Map Symbols',
|
||||
'Vertical Forms',
|
||||
'Yijing Hexagram Symbols',
|
||||
'Zanabazar Square',
|
||||
'Znamenny Musical Notation',
|
||||
], [
|
||||
'Bopomofo',
|
||||
'Canadian Aboriginal',
|
||||
'Han',
|
||||
'Hangul',
|
||||
'Hiragana',
|
||||
'Katakana',
|
||||
'Khitan Small Script',
|
||||
'Nushu',
|
||||
'Tangut',
|
||||
'Yi',
|
||||
]);
|
||||
|
||||
set.add(0x02EA /* modifier letter yin departing tone mark */);
|
||||
set.add(0x02EB /* modifier letter yang departing tone mark */);
|
||||
|
||||
// Exceptions to CJK Compatibility Forms
|
||||
set.removeRange(0xFE49 /* dashed overline */, 0xFE4F /* wavy low line */);
|
||||
|
||||
// Exceptions to CJK Symbols and Punctuation
|
||||
set.removeRange(0x3008 /* left angle bracket */, 0x3011 /* right black lenticular bracket */);
|
||||
set.removeRange(0x3014 /* left tortoise shell bracket */, 0x301F /* low double prime quotation mark */);
|
||||
set.remove(0x3030 /* wavy dash */);
|
||||
|
||||
// Exceptions to Katakana
|
||||
set.remove(0x30FC /* katakana-hiragana prolonged sound mark */);
|
||||
|
||||
// Exceptions to Halfwidth and Fullwidth Forms
|
||||
set.remove(0xFF08 /* fullwidth left parenthesis */);
|
||||
set.remove(0xFF09 /* fullwidth right parenthesis */);
|
||||
set.remove(0xFF0D /* fullwidth hyphen-minus */);
|
||||
set.removeRange(0xFF1A /* fullwidth colon */, 0xFF1E /* fullwidth greater-than sign */);
|
||||
set.remove(0xFF3B /* fullwidth left square bracket */);
|
||||
set.remove(0xFF3D /* fullwidth right square bracket */);
|
||||
set.remove(0xFF3F /* fullwidth low line */);
|
||||
set.removeRange(0xFF5B /* fullwidth left curly bracket */, 0xFFDF);
|
||||
set.remove(0xFFE3 /* fullwidth macron */);
|
||||
set.removeRange(0xFFE8 /* halfwidth forms light vertical */, 0xFFEF);
|
||||
|
||||
// Exceptions to Small Form Variants
|
||||
set.removeRange(0xFE58 /* small em dash */, 0xFE5E /* small right tortoise shell bracket */);
|
||||
set.removeRange(0xFE63 /* small hyphen-minus */, 0xFE66 /* small equals sign */);
|
||||
|
||||
return set.toString();
|
||||
}
|
||||
|
||||
async function hasNeutralVerticalOrientation(): Promise<string> {
|
||||
const set = await createSet([
|
||||
'CJK Compatibility Forms',
|
||||
'CJK Symbols And Punctuation',
|
||||
'Control Pictures',
|
||||
'Enclosed Alphanumerics',
|
||||
'Geometric Shapes',
|
||||
'Halfwidth And Fullwidth Forms',
|
||||
'Katakana',
|
||||
'Letterlike Symbols',
|
||||
'Miscellaneous Symbols',
|
||||
'Number Forms',
|
||||
'Optical Character Recognition',
|
||||
'Private Use Area',
|
||||
'Small Form Variants',
|
||||
'Supplementary Private Use Area-A',
|
||||
'Supplementary Private Use Area-B',
|
||||
], []);
|
||||
|
||||
// Latin-1 Supplement
|
||||
set.add(0x00A7 /* section sign */);
|
||||
set.add(0x00A9 /* copyright sign */);
|
||||
set.add(0x00AE /* registered sign */);
|
||||
set.add(0x00B1 /* plus-minus sign */);
|
||||
set.add(0x00BC /* vulgar fraction one quarter */);
|
||||
set.add(0x00BD /* vulgar fraction one half */);
|
||||
set.add(0x00BE /* vulgar fraction three quarters */);
|
||||
set.add(0x00D7 /* multiplication sign */);
|
||||
set.add(0x00F7 /* division sign */);
|
||||
|
||||
// General Punctuation
|
||||
set.add(0x2016 /* double vertical line */);
|
||||
set.add(0x2020 /* dagger */);
|
||||
set.add(0x2021 /* double dagger */);
|
||||
set.add(0x2030 /* per mille sign */);
|
||||
set.add(0x2031 /* per ten thousand sign */);
|
||||
set.add(0x203B /* reference mark */);
|
||||
set.add(0x203C /* double exclamation mark */);
|
||||
set.add(0x2042 /* asterism */);
|
||||
set.add(0x2047 /* double question mark */);
|
||||
set.add(0x2048 /* question exclamation mark */);
|
||||
set.add(0x2049 /* exclamation question mark */);
|
||||
set.add(0x2051 /* two asterisks aligned vertically */);
|
||||
|
||||
// Miscellaneous Technical
|
||||
set.addRange(0x2300 /* diameter sign */, 0x2307 /* wavy line */);
|
||||
set.addRange(0x230C /* bottom right crop */, 0x231F /* bottom right corner */);
|
||||
set.addRange(0x2324 /* up arrowhead between two horizontal bars */, 0x2328 /* keyboard */);
|
||||
set.add(0x232B /* erase to the left */);
|
||||
set.addRange(0x237D /* shouldered open box */, 0x239A /* clear screen symbol */);
|
||||
set.addRange(0x23BE /* dentistry symbol light vertical and top right */, 0x23CD /* square foot */);
|
||||
set.add(0x23CF /* eject symbol */);
|
||||
set.addRange(0x23D1 /* metrical breve */, 0x23DB /* fuse */);
|
||||
set.addRange(0x23E2 /* white trapezium */, 0x23FF);
|
||||
|
||||
// Exceptions to Control Pictures
|
||||
set.remove(0x2423 /* open box */);
|
||||
|
||||
// Exceptions to Miscellaneous Symbols
|
||||
set.removeRange(0x261A /* black left pointing index */, 0x261F /* white down pointing index */);
|
||||
|
||||
// Miscellaneous Symbols and Arrows
|
||||
set.addRange(0x2B12 /* square with top half black */, 0x2B2F /* white vertical ellipse */);
|
||||
set.addRange(0x2B50 /* white medium star */, 0x2B59 /* heavy circled saltire */);
|
||||
set.addRange(0x2BB8 /* upwards white arrow from bar with horizontal bar */, 0x2BEB);
|
||||
|
||||
set.add(0x221E /* infinity */);
|
||||
set.add(0x2234 /* therefore */);
|
||||
set.add(0x2235 /* because */);
|
||||
set.addRange(0x2700 /* black safety scissors */, 0x2767 /* rotated floral heart bullet */);
|
||||
set.addRange(0x2776 /* dingbat negative circled digit one */, 0x2793 /* dingbat negative circled sans-serif number ten */);
|
||||
set.add(0xFFFC /* object replacement character */);
|
||||
set.add(0xFFFD /* replacement character */);
|
||||
|
||||
return set.toString();
|
||||
}
|
||||
|
||||
async function requiresComplexTextShaping(): Promise<string> {
|
||||
// This is a rough heuristic: whether we "can render" a script
|
||||
// actually depends on the properties of the font being used
|
||||
// and whether differences from the ideal rendering are considered
|
||||
// semantically significant.
|
||||
|
||||
// These blocks cover common scripts that require
|
||||
// complex text shaping, based on unicode script metadata:
|
||||
// https://www.unicode.org/repos/cldr/trunk/common/properties/scriptMetadata.txt
|
||||
// where "Web Rank <= 32" "Shaping Required = YES"
|
||||
const set = await createSet([
|
||||
'Bengali',
|
||||
'Devanagari',
|
||||
'Gujarati',
|
||||
'Gurmukhi',
|
||||
'Kannada',
|
||||
'Khmer',
|
||||
'Malayalam',
|
||||
'Myanmar',
|
||||
'Oriya',
|
||||
'Tamil',
|
||||
'Telugu',
|
||||
'Tibetan',
|
||||
'Sinhala',
|
||||
], []);
|
||||
|
||||
return set.toString();
|
||||
}
|
||||
|
||||
fs.writeFileSync('src/util/unicode_properties.g.ts',
|
||||
`// This file is generated. Edit build/generate-unicode-data.ts, then run \`npm run generate-unicode-data\`.
|
||||
|
||||
/**
|
||||
* Returns whether the fallback fonts specified by the
|
||||
* \`localIdeographFontFamily\` map option apply to the given codepoint.
|
||||
*/
|
||||
export function codePointUsesLocalIdeographFontFamily(codePoint: number): boolean {
|
||||
return /${await usesLocalIdeographFontFamily()}/gim.test(String.fromCodePoint(codePoint));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the given codepoint participates in ideographic line
|
||||
* breaking.
|
||||
*/
|
||||
export function codePointAllowsIdeographicBreaking(codePoint: number): boolean {
|
||||
return /${await allowsIdeographicBreaking()}/gim.test(String.fromCodePoint(codePoint));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given Unicode codepoint identifies a character with
|
||||
* upright orientation.
|
||||
*
|
||||
* A character has upright orientation if it is drawn upright (unrotated)
|
||||
* whether the line is oriented horizontally or vertically, even if both
|
||||
* adjacent characters can be rotated. For example, a Chinese character is
|
||||
* always drawn upright. An uprightly oriented character causes an adjacent
|
||||
* “neutral” character to be drawn upright as well.
|
||||
*/
|
||||
export function codePointHasUprightVerticalOrientation(codePoint: number): boolean {
|
||||
return /${await hasUprightVerticalOrientation()}/gim.test(String.fromCodePoint(codePoint));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the given Unicode codepoint identifies a character with
|
||||
* neutral orientation.
|
||||
*
|
||||
* A character has neutral orientation if it may be drawn rotated or unrotated
|
||||
* when the line is oriented vertically, depending on the orientation of the
|
||||
* adjacent characters. For example, along a vertically oriented line, the
|
||||
* vulgar fraction ½ is drawn upright among Chinese characters but rotated among
|
||||
* Latin letters. A neutrally oriented character does not influence whether an
|
||||
* adjacent character is drawn upright or rotated.
|
||||
*/
|
||||
export function codePointHasNeutralVerticalOrientation(codePoint: number): boolean {
|
||||
return /${await hasNeutralVerticalOrientation()}/gim.test(String.fromCodePoint(codePoint));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether the give codepoint is likely to require complex text shaping.
|
||||
*/
|
||||
export function codePointRequiresComplexTextShaping(codePoint: number): boolean {
|
||||
return /${await requiresComplexTextShaping()}/gim.test(String.fromCodePoint(codePoint));
|
||||
}
|
||||
`);
|
||||
Reference in New Issue
Block a user