Solution minimale pour la question posée
Cela devrait résoudre ce que vous avez demandé :
function fromArabicToASCII(arabic) {
return arabic.replace(
/[\u0660-\u0669\u06F0-\u06F9]/g,
function(a) {
return String.fromCharCode((a.charCodeAt(0)&15)+48);
}
);
};
Fonction générale pour tous les chiffres connus de la classe Unicode Nd (Number, Decimal Digit)
La fonction ci-dessous prend en charge la classe Unicode Nd (Number, Decimal Digit) : Adlam, Ahom, arabe-indic, balinais, bengali, Bhaiksuki, Brahmi, Chakma, Cham, Devanagari, arabe-indic étendu, pleine largeur, Gujarati, Gurmukhi, Javanais, Kannada, Kayah Li, Khmer, Khudawadi, Lao, Lepcha, Limbu, Malayalam, Mathematical Bold, Mathematical Double-struck, Mathematical Monospace, Mathematical Sans-serif Bold, Mathematical Sans-serif, Meetei Mayek, Modi, Mongol, Mro, Myanmar Shan, Myanmar Tai Laing, Myanmar, New Tai Lue, Newa, Nko, Ol Chiki, Oriya, Osmanya, Pahawh Hmong, Saurashtra, Sharada, Sinhala Lith, Sora Sompeng, Sundanese, Tai Tham Hora, Tai Tham Tham, Takri, Tamil, Telugu, Thai, Tibetan, Tirhuta, Vai, Warang Citi.
Il n'y a pas de support pour les nombres romains et autres nombres non décimaux, car ils ne sont pas décimaux.
// This function takes an UTF16 encoded string as input,
// and returns with all suported digits from Unicode
// class 'Nd' (Number, Decimal Digit) replaced with their
// equivalent ASCII digit.
// Source : http://stackoverflow.com/a/12171250/36866
// License: MIT
// Author : some@domain.name
// Note : If you are going to use this code I would appreciate to
// get an email to some@domain.name. You don't have to but
// it would make me happier!
var digitsToASCII=
(function () {
// Regexp that matches all supported digits.
// Most Unicode digit classes have the zero digit at a codepoint
// where the four least significant bits are ether zero or six.
// The notable exception is the Math-class where several classes
// have sequential codepoints. The information about the offset
// is needed when decoding, and by using groups in the RexExp
// no lookup is needed.
var reDigit = new RegExp(
'('+ // Offset 0
'['+
'\u0030-\u0039\u0660-\u0669\u06F0-\u06F9\u07C0-\u07C9'+
'\u0E50-\u0E59\u0ED0-\u0ED9\u0F20-\u0F29\u1040-\u1049'+
'\u1090-\u1099\u17E0-\u17E9\u1810-\u1819\u19D0-\u19D9'+
'\u1A80-\u1A89\u1A90-\u1A99\u1B50-\u1B59\u1BB0-\u1BB9'+
'\u1C40-\u1C49\u1C50-\u1C59\uA620-\uA629\uA8D0-\uA8D9'+
'\uA900-\uA909\uA9D0-\uA9D9\uA9F0-\uA9F9\uAA50-\uAA59'+
'\uABF0-\uABF9\uFF10-\uFF19'+
']'+
'|\uD801[\uDCA0-\uDCA9]'+
'|\uD804[\uDCF0-\uDCF9\uDDD0-\uDDD9\uDEF0-\uDEF9]'+
'|\uD805['+
'\uDC50-\uDC59\uDCD0-\uDCD9\uDE50-\uDE59'+
'\uDEC0-\uDEC9\uDF30-\uDF39'+
']'+
'|\uD806[\uDCE0-\uDCE9]|\uD807[\uDC50-\uDC59]'+
'|\uD81A[\uDE60-\uDE69]|\uD81A[\uDF50-\uDF59]'+
'|\uD83A[\uDD50-\uDD59]'+
')|('+ // Offset 6
'['+
'\u0966-\u096F\u09E6-\u09EF\u0A66-\u0A6F\u0AE6-\u0AEF'+
'\u0B66-\u0B6F\u0BE6-\u0BEF\u0C66-\u0C6F\u0CE6-\u0CEF'+
'\u0D66-\u0D6F\u0DE6-\u0DEF\u1946-\u194F'+
']'+
'|\uD804[\uDC66-\uDC6F\uDD36-\uDD3F]'+
')|('+ // Math
'\uD835[\uDFCE-\uDFFF]'+
')',
'g'
);
function replace(match, offset0, offset6, offsetMath) {
// 'match' contains the whole match and can therefore have
// a length longer than one character if surrogate pairs is used.
// By getting the last character from 'match' the operation is simplified.
var raw = match.charCodeAt( match.length - 1);
var digit =
offset0 ? raw & 0xF : // use 4 bits
offset6 ? (raw -6) & 0xF : // subtract 6, use 4 bits
offsetMath ? ((raw - 0xCE) & 0x3F) % 10 : // subtract CE, use 6 bits
null;
return String.fromCharCode(48 + digit); // Digit to ASCII
}
return function replaceDigits(input) {
return input.replace(reDigit, replace);
}
})();
Utilisation :
myAsciiVariable = digitsToASCII( myForeignVariable );
Module pour node.js
Une fonction similaire à celle-ci est maintenant disponible dans un module node.js appelé unicodedigits
. Cette fonction permet de traduire n'importe quel chiffre pris en charge en ASCII ou dans l'une des plages prises en charge.
Vous pouvez l'installer avec npm install unicodedigits --save
ou trouvez-le sur github.com/somec/unicodedigits