Javascript:Unicode字符串到十六进制

Wes*_*ley 49 javascript unicode jquery hex utf-8

我正在尝试将unicode字符串转换为javascript中的十六进制表示形式.

这就是我所拥有的:

function convertFromHex(hex) {
    var hex = hex.toString();//force conversion
    var str = '';
    for (var i = 0; i < hex.length; i += 2)
        str += String.fromCharCode(parseInt(hex.substr(i, 2), 16));
    return str;
}

function convertToHex(str) {
    var hex = '';
    for(var i=0;i<str.length;i++) {
        hex += ''+str.charCodeAt(i).toString(16);
    }
    return hex;
}
Run Code Online (Sandbox Code Playgroud)

但是如果像中国人那样失败的unicode角色;

输入:汉字

输出:ªo"[W

有任何想法吗?这可以在javascript中完成吗?

McD*_*ell 86

请记住,JavaScript代码单元是16位宽.因此,十六进制字符串形式将是每个代码单元4位数.

用法:

var str = "\u6f22\u5b57"; // "\u6f22\u5b57" === "??"
alert(str.hexEncode().hexDecode());
Run Code Online (Sandbox Code Playgroud)

字符串到十六进制形式

String.prototype.hexEncode = function(){
    var hex, i;

    var result = "";
    for (i=0; i<this.length; i++) {
        hex = this.charCodeAt(i).toString(16);
        result += ("000"+hex).slice(-4);
    }

    return result
}
Run Code Online (Sandbox Code Playgroud)

再次回来:

String.prototype.hexDecode = function(){
    var j;
    var hexes = this.match(/.{1,4}/g) || [];
    var back = "";
    for(j = 0; j<hexes.length; j++) {
        back += String.fromCharCode(parseInt(hexes[j], 16));
    }

    return back;
}
Run Code Online (Sandbox Code Playgroud)

  • `"\u6f22\u5b57"` 是文字 `​​"汉字"` 的 Unicode 转义形式,就像 `\n` 是换行符一样。我倾向于使用它们来避免歧义并避免字符编码问题。有关详细信息,请参阅[规范](http://www.ecma-international.org/publications/standards/Ecma-262.htm)。要自己生成它们,请将上面的 `("000"+hex).slice(-4)` 更改为 `"\\u" + ("000"+hex).slice(-4)`。表达式 `"\u6f22\u5b57" === "汉字"` 的计算结果为 true,因为在代码解析之后它们是相同的。 (2认同)
  • 如果您使用顶级算法编写的"test"编码为"0074006500730074"`.没有ASCII.JavaScript字符串是__always__ UTF-16. (2认同)

Pav*_*nar 11

这取决于您使用的编码.如果要将utf-8编码的十六进制转换为字符串,请使用:

function fromHex(hex,str){
  try{
    str = decodeURIComponent(hex.replace(/(..)/g,'%$1'))
  }
  catch(e){
    str = hex
    console.log('invalid hex input: ' + hex)
  }
  return str
}
Run Code Online (Sandbox Code Playgroud)

对于其他方向使用此:

function toHex(str,hex){
  try{
    hex = unescape(encodeURIComponent(str))
    .split('').map(function(v){
      return v.charCodeAt(0).toString(16)
    }).join('')
  }
  catch(e){
    hex = str
    console.log('invalid text input: ' + str)
  }
  return hex
}
Run Code Online (Sandbox Code Playgroud)


Sim*_*han 11

一个更新的解决方案,用于编码:

// This is the same for all of the below, and
// you probably won't need it except for debugging
// in most cases.
function bytesToHex(bytes) {
  return Array.from(
    bytes,
    byte => byte.toString(16).padStart(2, "0")
  ).join("");
}

// You almost certainly want UTF-8, which is
// now natively supported:
function stringToUTF8Bytes(string) {
  return new TextEncoder().encode(string);
}

// But you might want UTF-16 for some reason.
// .charCodeAt(index) will return the underlying
// UTF-16 code-units (not code-points!), so you
// just need to format them in whichever endian order you want.
function stringToUTF16Bytes(string, littleEndian) {
  const bytes = new Uint8Array(string.length * 2);
  // Using DataView is the only way to get a specific
  // endianness.
  const view = new DataView(bytes.buffer);
  for (let i = 0; i != string.length; i++) {
    view.setUint16(i, string.charCodeAt(i), littleEndian);
  }
  return bytes;
}

// And you might want UTF-32 in even weirder cases.
// Fortunately, iterating a string gives the code
// points, which are identical to the UTF-32 encoding,
// though you still have the endianess issue.
function stringToUTF32Bytes(string, littleEndian) {
  const codepoints = Array.from(string, c => c.codePointAt(0));
  const bytes = new Uint8Array(codepoints.length * 4);
  // Using DataView is the only way to get a specific
  // endianness.
  const view = new DataView(bytes.buffer);
  for (let i = 0; i != codepoints.length; i++) {
    view.setUint32(i, codepoints[i], littleEndian);
  }
  return bytes;
}
Run Code Online (Sandbox Code Playgroud)

例子:

bytesToHex(stringToUTF8Bytes("hello ?? "))
// "68656c6c6f20e6bca2e5ad9720f09f918d"
bytesToHex(stringToUTF16Bytes("hello ?? ", false))
// "00680065006c006c006f00206f225b570020d83ddc4d"
bytesToHex(stringToUTF16Bytes("hello ?? ", true))
// "680065006c006c006f002000226f575b20003dd84ddc"
bytesToHex(stringToUTF32Bytes("hello ?? ", false))
// "00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"
bytesToHex(stringToUTF32Bytes("hello ?? ", true))
// "68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"
Run Code Online (Sandbox Code Playgroud)

对于解码,通常要简单得多,您只需要:

function hexToBytes(hex) {
    const bytes = new Uint8Array(hex.length / 2);
    for (let i = 0; i !== bytes.length; i++) {
        bytes[i] = parseInt(hex.substr(i * 2, 2), 16);
    }
    return bytes;
}
Run Code Online (Sandbox Code Playgroud)

然后使用的编码参数TextDecoder

// UTF-8 is default
new TextDecoder().decode(hexToBytes("68656c6c6f20e6bca2e5ad9720f09f918d"));
// but you can also use:
new TextDecoder("UTF-16LE").decode(hexToBytes("680065006c006c006f002000226f575b20003dd84ddc"))
new TextDecoder("UTF-16BE").decode(hexToBytes("00680065006c006c006f00206f225b570020d83ddc4d"));
// "hello ?? "
Run Code Online (Sandbox Code Playgroud)

以下是允许的编码名称列表:https : //www.w3.org/TR/encoding/#names-and-labels

您可能会注意到 UTF-32 不在该列表中,这很痛苦,因此:

function bytesToStringUTF32(bytes, littleEndian) {
  const view = new DataView(bytes.buffer);
  const codepoints = new Uint32Array(view.byteLength / 4);
  for (let i = 0; i !== codepoints.length; i++) {
    codepoints[i] = view.getUint32(i * 4, littleEndian);
  }
  return String.fromCodePoint(...codepoints);
}
Run Code Online (Sandbox Code Playgroud)

然后:

bytesToStringUTF32(hexToBytes("00000068000000650000006c0000006c0000006f0000002000006f2200005b57000000200001f44d"), false)
bytesToStringUTF32(hexToBytes("68000000650000006c0000006c0000006f00000020000000226f0000575b0000200000004df40100"), true)
// "hello ?? "
Run Code Online (Sandbox Code Playgroud)


red*_*off 9

以下是McDowell算法的一个调整,它没有填充结果:

  function toHex(str) {
    var result = '';
    for (var i=0; i<str.length; i++) {
      result += str.charCodeAt(i).toString(16);
    }
    return result;
  }
Run Code Online (Sandbox Code Playgroud)

  • 你为什么不想垫?现在十六进制输出是模糊的 (6认同)

Mat*_*ens 8

你是如何"\u6f22\u5b57"??JavaScript中获得的?

这些是JavaScript Unicode转义序列,例如\u12AB.要转换它们,您可以迭代字符串中的每个代码单元,调用.toString(16)它,然后从那里开始.

但是,在任何可能的情况下,例如\xAA在输出中也使用十六进制转义序列更有效.

另请注意,ASCII符号(如A,, b)-可能不需要进行转义.

我写了一个小的JavaScript库来为你完成所有这些,称为jsesc.它有很多控制输出的选项.

以下是该工具的在线演示:http://mothereff.in/js-escapes#1%E6%BC%A2%E5%AD%97


您的问题被标记为utf-8.阅读其余的问题,UTF-8编码/解码似乎不是你想要的,但万一你需要它:使用utf8.js(在线演示).


Zib*_*bri 5

干得好。:D

"??".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")
Run Code Online (Sandbox Code Playgroud)
"6f225b57"
Run Code Online (Sandbox Code Playgroud)

对于非 unicode

"hi".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")
Run Code Online (Sandbox Code Playgroud)
"6869"
Run Code Online (Sandbox Code Playgroud)

ASCII (utf-8) 二进制十六进制字符串到字符串

"68656c6c6f20776f726c6421".match(/.{1,2}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")
Run Code Online (Sandbox Code Playgroud)

字符串转 ASCII (utf-8) 二进制十六进制字符串

"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(2,"0"),"")
Run Code Online (Sandbox Code Playgroud)

--- 统一码 ---

字符串到 UNICODE (utf-16) 二进制十六进制字符串

"hello world!".split("").reduce((hex,c)=>hex+=c.charCodeAt(0).toString(16).padStart(4,"0"),"")
Run Code Online (Sandbox Code Playgroud)

UNICODE (utf-16) 二进制十六进制字符串到字符串

"00680065006c006c006f00200077006f0072006c00640021".match(/.{1,4}/g).reduce((acc,char)=>acc+String.fromCharCode(parseInt(char, 16)),"")
Run Code Online (Sandbox Code Playgroud)