Viewing File: /usr/local/cpanel/share/libraries/cjt2/src/util/idn.js
/*
# cjt/util/idn.js Copyright 2022 cPanel, L.L.C.
# All rights reserved.
# copyright@cpanel.net http://cpanel.net
# This code is subject to the cPanel license. Unauthorized copying is prohibited
*/
/**
* ----------------------------------------------------------------------
* idn.js - IDN validation per RFC 5891/4.2
*
* This provides part of the IDN validation algorithm. Specifically,
* this identifies:
*
* - DISALLOWED characters
*
* - improper hyphens
*
* - most contextual rule violations
*
* This currently does NOT identify:
*
* - UNASSIGNED characters
*
* - leading combining marks
*
* - certain contextual rules
*
* - violations of Bidi criteria (RFC 5893/2)
* ----------------------------------------------------------------------
*
* EXAMPLE USAGE:
*
* problemsStr = IDN.getLabelDefects( labelString )
*
* ----------------------------------------------------------------------
*/
define( [
"lodash",
"punycode",
"cjt/util/locale",
"cjt/util/idnDisallowed",
"cjt/util/unicode",
],
function(_, PUNYCODE, LOCALE, IDN_DISALLOWED, UNICODE) {
"use strict";
// NB: many of the characters that fit these ranges are
// also on IDN’s DISALLOWED list. The ranges below that
// the DISALLOWED list fully excludes are commented out.
//
// We could edit the ranges that only partly overlap with the
// DISALLOWED list, but that would make it harder to compare
// our data with the upstream source. So partially-DISALLOWED
// ranges are left in place.
//
// The lists of code points below come from:
// https://www.unicode.org/Public/12.1.0/ucd/Scripts.txt
//
// The most recent version will be at:
// https://www.unicode.org/Public/UCD/latest/ucd/Scripts.txt
//
// It may be useful at a later point to take steps to keep
// these lists in sync with further revisions of that list.
// For now we just publish static lists and hope that upstream
// changes to these groups are rare.
var SCRIPT_DATA = {
greek: [
[0x370, 0x373],
0x375,
[0x376, 0x377],
// 0x37a, // DISALLOWED
[0x37b, 0x37d],
0x37f,
// 0x384, // DISALLOWED
// 0x386, // DISALLOWED
// [0x388, 0x38a], // DISALLOWED
// 0x38c, // DISALLOWED
[0x38e, 0x3a1],
[0x3a3, 0x3e1],
[0x3f0, 0x3f5],
// 0x3f6, // DISALLOWED
[0x3f7, 0x3ff],
[0x1d26, 0x1d2a],
// [0x1d5d, 0x1d61], // DISALLOWED
// [0x1d66, 0x1d6a], // DISALLOWED
// 0x1dbf, // DISALLOWED
[0x1f00, 0x1f15],
// [0x1f18, 0x1f1d], // DISALLOWED
[0x1f20, 0x1f45],
// [0x1f48, 0x1f4d], // DISALLOWED
[0x1f50, 0x1f57],
// 0x1f59, // DISALLOWED
// 0x1f5b, // DISALLOWED
// 0x1f5d, // DISALLOWED
[0x1f5f, 0x1f7d],
[0x1f80, 0x1fb4],
[0x1fb6, 0x1fbc],
// 0x1fbd, // DISALLOWED
// 0x1fbe, // DISALLOWED
// [0x1fbf, 0x1fc1], // DISALLOWED
// [0x1fc2, 0x1fc4], // DISALLOWED
[0x1fc6, 0x1fcc],
// [0x1fcd, 0x1fcf], // DISALLOWED
[0x1fd0, 0x1fd3],
[0x1fd6, 0x1fdb],
// [0x1fdd, 0x1fdf], // DISALLOWED
[0x1fe0, 0x1fec],
// [0x1fed, 0x1fef], // DISALLOWED
// [0x1ff2, 0x1ff4], // DISALLOWED
[0x1ff6, 0x1ffc],
// [0x1ffd, 0x1ffe], // DISALLOWED
// 0x2126, // DISALLOWED
0xab65,
[0x10140, 0x10174],
[0x10175, 0x10178],
[0x10179, 0x10189],
[0x1018a, 0x1018b],
[0x1018c, 0x1018e],
0x101a0,
// [0x1d200, 0x1d241], // DISALLOWED
// [0x1d242, 0x1d244], // DISALLOWED
// 0x1d245, // DISALLOWED
],
hebrew: [
[ 0x591, 0x5bd ],
// 0x5be, // DISALLOWED
0x5bf,
// 0x5c0, // DISALLOWED
[ 0x5c1, 0x5c2 ],
// 0x5c3, // DISALLOWED
[ 0x5c4, 0x5c5 ],
// 0x5c6, // DISALLOWED
0x5c7,
[ 0x5d0, 0x5ea ],
[ 0x5ef, 0x5f2 ],
[ 0x5f3, 0x5f4 ],
// 0xfb1d, // DISALLOWED
0xfb1e,
// [ 0xfb1f, 0xfb28 ], // DISALLOWED
// 0xfb29, // DISALLOWED
// [ 0xfb2a, 0xfb36 ], // DISALLOWED
// [ 0xfb38, 0xfb3c ], // DISALLOWED
// 0xfb3e, // DISALLOWED
// [ 0xfb40, 0xfb41 ], // DISALLOWED
// [ 0xfb43, 0xfb44 ], // DISALLOWED
// [ 0xfb46, 0xfb4f ], // DISALLOWED
],
hiragana: [
[ 0x3041, 0x3096 ],
[ 0x309d, 0x309e ],
// 0x309f, // DISALLOWED
[ 0x1b001, 0x1b11e ],
[ 0x1b150, 0x1b152 ],
// 0x1f200, // DISALLOWED
],
katakana: [
[0x30a1, 0x30fa],
[0x30fd, 0x30fe],
// 0x30ff, // DISALLOWED
[0x31f0, 0x31ff],
// [0x32d0, 0x32fe], // DISALLOWED
// [0x3300, 0x3357], // DISALLOWED
// [0xff66, 0xff6f], // DISALLOWED
// [0xff71, 0xff9d], // DISALLOWED
0x1b000,
[0x1b164, 0x1b167],
],
han: [
// [0x2e80, 0x2e99], // DISALLOWED
// [0x2e9b, 0x2ef3], // DISALLOWED
// [0x2f00, 0x2fd5], // DISALLOWED
0x3005,
0x3007,
// [0x3021, 0x3029], // DISALLOWED
// [0x3038, 0x303a], // DISALLOWED
// 0x303b, // DISALLOWED
[0x3400, 0x4db5],
[0x4e00, 0x9fef],
[0xf900, 0xfa6d],
// [0xfa70, 0xfad9], // DISALLOWED
[0x20000, 0x2a6d6],
[0x2a700, 0x2b734],
[0x2b740, 0x2b81d],
[0x2b820, 0x2cea1],
[0x2ceb0, 0x2ebe0],
// [0x2f800, 0x2fa1d], // DISALLOWED
],
};
var VIRAMA_LIST = [
0x94d,
0x9cd,
0xa4d,
0xacd,
0xb4d,
0xbcd,
0xc4d,
0xccd,
0xd3b,
0xd3c,
0xd4d,
0xdca,
0xe3a,
0xeba,
0xf84,
0x1039,
0x103a,
0x1714,
0x1734,
0x17d2,
0x1a60,
0x1b44,
0x1baa,
0x1bab,
0x1bf2,
0x1bf3,
0x2d7f,
0xa806,
0xa8c4,
0xa953,
0xa9c0,
0xaaf6,
0xabed,
0x10a3f,
0x11046,
0x1107f,
0x110b9,
0x11133,
0x11134,
0x111c0,
0x11235,
0x112ea,
0x1134d,
0x11442,
0x114c2,
0x115bf,
0x1163f,
0x116b6,
0x1172b,
0x11839,
0x119e0,
0x11a34,
0x11a47,
0x11a99,
0x11c3f,
0x11d44,
0x11d45,
0x11d97,
];
var SCRIPT_LOOKUP;
var KATAKANA_MIDDLE_DOT_OK = {
han: true,
katakana: true,
hiragana: true,
};
function _getCodePointScript(cp) {
if (!SCRIPT_LOOKUP) {
var scriptNames = Object.keys(SCRIPT_DATA);
SCRIPT_LOOKUP = {};
scriptNames.forEach( function(script) {
UNICODE.augmentCodePointLookup(SCRIPT_DATA[script], SCRIPT_LOOKUP, script);
} );
}
return SCRIPT_LOOKUP[cp];
}
function _encodeCP(cp) {
return PUNYCODE.ucs2.encode([cp]);
}
function _getContextDefectCPs(label) {
var badContext = [];
// Implementations of various parts of
// https://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml
var codePoints = PUNYCODE.ucs2.decode(label);
CODE_POINT:
for (var i = 0; i < codePoints.length; i++) {
var ii;
switch (codePoints[i]) {
case 0x200c:
// TODO: We have the Virama logic but need the check
// on joining type for this to be functional.
break;
case 0x200d:
// Previous character’s canonical combining class
// must be Virama.
if (-1 === VIRAMA_LIST.indexOf(codePoints[i - 1])) {
badContext.push(codePoints[i]);
}
break;
case 0xb7:
if (codePoints[i - 1] !== 0x6c || codePoints[i + 1] !== 0x6c) {
badContext.push(codePoints[i]);
}
break;
case 0x375:
// The script of the following character MUST be Greek.
if (_getCodePointScript(codePoints[i + 1]) !== "greek") {
badContext.push(codePoints[i]);
}
break;
case 0x5f3:
case 0x5f4:
// The script of the preceding character MUST be Hebrew.
if (_getCodePointScript(codePoints[i - 1]) !== "hebrew") {
badContext.push(codePoints[i]);
}
break;
case 0x30fb:
// At least one character in the label must be of the
// Hiragana, Katakana, or Han script.
for (ii = 0; ii < codePoints.length; ii++) {
var cpScript = _getCodePointScript(codePoints[ii]);
if (KATAKANA_MIDDLE_DOT_OK[cpScript]) {
continue CODE_POINT;
}
}
badContext.push(codePoints[i]);
break;
default:
// Arabic-Indic digits can’t be with Extended Arabic-Indic
if (codePoints[i] >= 0x660 && codePoints[i] <= 0x669) {
for (ii = 0; ii < codePoints.length; ii++) {
if (codePoints[ii] >= 0x6f0 && codePoints[ii] <= 0x6f9) {
badContext.push(codePoints[i]);
break;
}
}
}
// Extended Arabic-Indic digits can’t be with
// (regular) Arabic-Indic
if (codePoints[i] >= 0x6f0 && codePoints[i] <= 0x6f9) {
for (ii = 0; ii < codePoints.length; ii++) {
if (codePoints[ii] >= 0x660 && codePoints[ii] <= 0x669) {
badContext.push(codePoints[i]);
break;
}
}
}
}
}
return _.uniq(badContext);
}
function _codePointsToUplus(cps) {
return cps.map( function(cp) {
return "U+" + _.padStart(cp.toString(16).toUpperCase(), 4, "0");
} );
}
/**
* @function getLabelDefects
*
* @param label String The input to parse as an IDN label.
* @returns Array Human-readable descriptions of the validation errors.
*/
function getLabelDefects(label) {
var phrases = [];
var disallowed = IDN_DISALLOWED.getDisallowedInLabel(label);
if (disallowed.length) {
var cps = PUNYCODE.ucs2.decode( disallowed.join("") );
var upluses = _codePointsToUplus(cps);
phrases.push( LOCALE.maketext("Domain names may not contain [list_or_quoted,_1] ([list_or,_2]).", disallowed, upluses) );
}
var badContextCPs = _getContextDefectCPs(label);
if (badContextCPs.length) {
var chars = badContextCPs.map( _encodeCP );
var ctxUpluses = _codePointsToUplus(badContextCPs);
phrases.push( LOCALE.maketext("You must use [list_and_quoted,_1] ([list_and,_2]) properly in domain names.", chars, ctxUpluses) );
}
if (label.substr(2, 2) === "--") {
phrases.push( LOCALE.maketext("“[_1]” is forbidden at the third position of a domain label.", "--") );
}
if (/^-|-$/.test(label)) {
phrases.push( LOCALE.maketext("“[_1]” is forbidden at the start or end of a domain label.", "-") );
}
return phrases;
}
return {
getLabelDefects: getLabelDefects,
// for testing only
_lists: _.assign(
{},
SCRIPT_DATA,
{ virama: VIRAMA_LIST }
),
};
});
Back to Directory
File Manager