Tgr*_*Tgr 19
encodeURIComponent(text).replace(/%[A-F\d]{2}/g, 'U').length
Run Code Online (Sandbox Code Playgroud)
bro*_*ofa 17
结合各种答案,以下方法应该快速准确,并避免可能导致encodeURIComponent()错误的无效代理对的问题:
function getUTF8Length(s) {
var len = 0;
for (var i = 0; i < s.length; i++) {
var code = s.charCodeAt(i);
if (code <= 0x7f) {
len += 1;
} else if (code <= 0x7ff) {
len += 2;
} else if (code >= 0xd800 && code <= 0xdfff) {
// Surrogate pair: These take 4 bytes in UTF-8 and 2 chars in UCS-2
// (Assume next char is the other [valid] half and just skip it)
len += 4; i++;
} else if (code < 0xffff) {
len += 3;
} else {
len += 4;
}
}
return len;
}
Run Code Online (Sandbox Code Playgroud)
der*_*cki 14
编辑:正如didier-l所指出的,此函数不能正确计算代理字符.
broofa的答案应该正确计算代理人,请参阅/sf/answers/854426261/.
我在这里测试了两个提出的版本以及一个天真的实现:
getUTF8Length: function(string) {
var utf8length = 0;
for (var n = 0; n < string.length; n++) {
var c = string.charCodeAt(n);
if (c < 128) {
utf8length++;
}
else if((c > 127) && (c < 2048)) {
utf8length = utf8length+2;
}
else {
utf8length = utf8length+3;
}
}
return utf8length;
}
Run Code Online (Sandbox Code Playgroud)
结果是我的版本在firefox中稍微快一点,并且chrome(~30x)比在这里发布的版本快得多.
fra*_*eff 14
如果你的字符串中有非bmp字符,那就更复杂了......
因为javascript执行UTF-16编码,而"字符"是2字节堆栈(16位),所有多字节字符(3个或更多字节)将不起作用:
<script type="text/javascript">
var nonBmpString = "foo€";
console.log( nonBmpString.length );
// will output 5
</script>
Run Code Online (Sandbox Code Playgroud)
字符"€"的长度为3个字节(24位).Javascript确实将其解释为2个字符,因为在JS中,字符是16位块.
因此,要正确获取混合字符串的字节大小,我们必须编写自己的函数fixedCharCodeAt();
function fixedCharCodeAt(str, idx) {
idx = idx || 0;
var code = str.charCodeAt(idx);
var hi, low;
if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters)
hi = code;
low = str.charCodeAt(idx + 1);
if (isNaN(low)) {
throw 'Kein gültiges Schriftzeichen oder Speicherfehler!';
}
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;
}
if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate
// We return false to allow loops to skip this iteration since should have already handled high surrogate above in the previous iteration
return false;
/*hi = str.charCodeAt(idx-1);
low = code;
return ((hi - 0xD800) * 0x400) + (low - 0xDC00) + 0x10000;*/
}
return code;
}
Run Code Online (Sandbox Code Playgroud)
现在我们可以计算字节...
function countUtf8(str) {
var result = 0;
for (var n = 0; n < str.length; n++) {
var charCode = fixedCharCodeAt(str, n);
if (typeof charCode === "number") {
if (charCode < 128) {
result = result + 1;
} else if (charCode < 2048) {
result = result + 2;
} else if (charCode < 65536) {
result = result + 3;
} else if (charCode < 2097152) {
result = result + 4;
} else if (charCode < 67108864) {
result = result + 5;
} else {
result = result + 6;
}
}
}
return result;
}
Run Code Online (Sandbox Code Playgroud)
顺便说一下......你不应该使用encodeURI方法,因为它是一个原生的浏览器功能;)
更多东西:
干杯
frankneff.ch / @frank_neff
Run Code Online (Sandbox Code Playgroud)