JavaScript函数在全宽和半宽形式之间转换UTF8字符串

xpt*_*xpt 6 javascript encoding character converter utf-8

编辑:谢谢GOTO 0,我现在确切地知道我的问题是什么.

我需要一个JavaScript函数来从UTF-8全宽形式转换为半宽形式.

GOT*_*O 0 15

显然,您希望将半宽和全宽格式字符转换为等效的基本拉丁形式.如果这是正确的,您可以使用正则表达式进行替换.这样的事情应该有效:

var x = "?????????";
var y = x.replace(
    /[\uff01-\uff5e]/g,
    function(ch) { return String.fromCharCode(ch.charCodeAt(0) - 0xfee0); }
    );
Run Code Online (Sandbox Code Playgroud)

其中x是输入字符串,y是输出.


7vu*_*0hy 7

2018年回答

许多年后 - 在互联网上找到这样做的功能仍然是不可能的.所以我写了我的.(几乎学会了日语和韩语来达到这一点.)

简单版

仅拉丁语范围.

var shiftCharCode = ? => c => String.fromCharCode(c.charCodeAt(0) + ?);
var toFullWidth = str => str.replace(/[!-~]/g, shiftCharCode(0xFEE0));
var toHalfWidth = str => str.replace(/[?-?]/g, shiftCharCode(-0xFEE0));
Run Code Online (Sandbox Code Playgroud)

完整版

如果我错过任何角色,请告诉我.

(function () {
    let charsets = {
        latin: {halfRE: /[!-~]/g, fullRE: /[?-?]/g, delta: 0xFEE0},
        hangul1: {halfRE: /[?-?]/g, fullRE: /[?-?]/g, delta: -0xEDF9},
        hangul2: {halfRE: /[?-?]/g, fullRE: /[?-?]/g, delta: -0xEE61},
        kana: {delta: 0,
            half: "???????????????????????????????????????????????????????????????", 
            full: "????????????????????????????" + 
                "???????????????????????????????????"},
        extras: {delta: 0,
            half: "¢£¬¯¦¥?\u0020|?????°", 
            full: "???????\u3000???????"}
    };
    let toFull = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) + set.delta) : 
        [...set.full][[...set.half].indexOf(c)];
    let toHalf = set => c => set.delta ? 
        String.fromCharCode(c.charCodeAt(0) - set.delta) : 
        [...set.half][[...set.full].indexOf(c)];
    let re = (set, way) => set[way + "RE"] || new RegExp("[" + set[way] + "]", "g");
    let sets = Object.keys(charsets).map(i => charsets[i]);
    window.toFullWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "half"), toFull(set)), str0);
    window.toHalfWidth = str0 => 
        sets.reduce((str,set) => str.replace(re(set, "full"), toHalf(set)), str0);
})();

/* Example starts here: */
var set = prompt("Enter a couple of comma-separated strings (half or full-width):", 
    ["aouäöü123", "'\"?:", "¢£¥???", "?????", "????"].join()).split(",");
var steps = [set, set.map(toFullWidth), set.map(toFullWidth).map(toHalfWidth)];
var tdHTML = str => `<td>${str}</td>`;
var stepsHTML = steps.map(step => step.map(tdHTML).join(""));
var rows = document.getElementsByTagName("tr");
[...rows].forEach((row,i) => row.insertAdjacentHTML("beforeEnd", stepsHTML[i]));
Run Code Online (Sandbox Code Playgroud)
th, td {border: 1px solid lightgrey; padding: 0.2em;}
th {text-align: left;}
table {border-collapse: collapse;}
Run Code Online (Sandbox Code Playgroud)
<table>
    <tr><th scope="row">Input:</th></tr>
    <tr><th scope="row">Full-width:</th></tr>
    <tr><th scope="row">Half-width:</th></tr>
</table>
Run Code Online (Sandbox Code Playgroud)


Rez*_*ned 6

试试这个

function toASCII(chars) {
    var ascii = '';
    for(var i=0, l=chars.length; i<l; i++) {
        var c = chars[i].charCodeAt(0);

        // make sure we only convert half-full width char
        if (c >= 0xFF00 && c <= 0xFFEF) {
           c = 0xFF & (c + 0x20);
        }

        ascii += String.fromCharCode(c);
    }

    return ascii;
}

// example
toASCII("???"); // returns 'ABC' 0x41
Run Code Online (Sandbox Code Playgroud)