xpt*_*xpt 6 javascript encoding character converter utf-8
编辑:谢谢GOTO 0,我现在确切地知道我的问题是什么.
我需要一个JavaScript函数来从UTF-8全宽形式转换为半宽形式.
GOT*_*O 0 15
显然,您希望将半宽和全宽格式字符转换为等效的基本拉丁形式.如果这是正确的,您可以使用正则表达式进行替换.这样的事情应该有效:
var x = "?????????";
var y = x.replace(
/[\uff01-\uff5e]/g,
function(ch) { return String.fromCharCode(ch.charCodeAt(0) - 0xfee0); }
);
Run Code Online (Sandbox Code Playgroud)
其中x是输入字符串,y是输出.
许多年后 - 在互联网上找到这样做的功能仍然是不可能的.所以我写了我的.(几乎学会了日语和韩语来达到这一点.)
仅拉丁语范围.
var shiftCharCode = ? => c => String.fromCharCode(c.charCodeAt(0) + ?);
var toFullWidth = str => str.replace(/[!-~]/g, shiftCharCode(0xFEE0));
var toHalfWidth = str => str.replace(/[?-?]/g, shiftCharCode(-0xFEE0));
Run Code Online (Sandbox Code Playgroud)
如果我错过任何角色,请告诉我.
(function () {
let charsets = {
latin: {halfRE: /[!-~]/g, fullRE: /[?-?]/g, delta: 0xFEE0},
hangul1: {halfRE: /[?-?]/g, fullRE: /[?-?]/g, delta: -0xEDF9},
hangul2: {halfRE: /[?-?]/g, fullRE: /[?-?]/g, delta: -0xEE61},
kana: {delta: 0,
half: "???????????????????????????????????????????????????????????????",
full: "????????????????????????????" +
"???????????????????????????????????"},
extras: {delta: 0,
half: "¢£¬¯¦¥?\u0020|?????°",
full: "???????\u3000???????"}
};
let toFull = set => c => set.delta ?
String.fromCharCode(c.charCodeAt(0) + set.delta) :
[...set.full][[...set.half].indexOf(c)];
let toHalf = set => c => set.delta ?
String.fromCharCode(c.charCodeAt(0) - set.delta) :
[...set.half][[...set.full].indexOf(c)];
let re = (set, way) => set[way + "RE"] || new RegExp("[" + set[way] + "]", "g");
let sets = Object.keys(charsets).map(i => charsets[i]);
window.toFullWidth = str0 =>
sets.reduce((str,set) => str.replace(re(set, "half"), toFull(set)), str0);
window.toHalfWidth = str0 =>
sets.reduce((str,set) => str.replace(re(set, "full"), toHalf(set)), str0);
})();
/* Example starts here: */
var set = prompt("Enter a couple of comma-separated strings (half or full-width):",
["aouäöü123", "'\"?:", "¢£¥???", "?????", "????"].join()).split(",");
var steps = [set, set.map(toFullWidth), set.map(toFullWidth).map(toHalfWidth)];
var tdHTML = str => `<td>${str}</td>`;
var stepsHTML = steps.map(step => step.map(tdHTML).join(""));
var rows = document.getElementsByTagName("tr");
[...rows].forEach((row,i) => row.insertAdjacentHTML("beforeEnd", stepsHTML[i]));Run Code Online (Sandbox Code Playgroud)
th, td {border: 1px solid lightgrey; padding: 0.2em;}
th {text-align: left;}
table {border-collapse: collapse;}Run Code Online (Sandbox Code Playgroud)
<table>
<tr><th scope="row">Input:</th></tr>
<tr><th scope="row">Full-width:</th></tr>
<tr><th scope="row">Half-width:</th></tr>
</table>Run Code Online (Sandbox Code Playgroud)
试试这个
function toASCII(chars) {
var ascii = '';
for(var i=0, l=chars.length; i<l; i++) {
var c = chars[i].charCodeAt(0);
// make sure we only convert half-full width char
if (c >= 0xFF00 && c <= 0xFFEF) {
c = 0xFF & (c + 0x20);
}
ascii += String.fromCharCode(c);
}
return ascii;
}
// example
toASCII("???"); // returns 'ABC' 0x41
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
13892 次 |
| 最近记录: |