tri*_*e84 182 javascript regex string split
我想将一个非常大的字符串(比方说,10,000个字符)拆分成N个大小的块.
这样做的最佳方式是什么?
例如:
"1234567890"拆分为2将成为["12", "34", "56", "78", "90"].
这样的事情String.prototype.match是否有可能使用,如果是这样,那么这将是在性能方面做到这一点的最佳方式吗?
Viv*_*ath 412
你可以这样做:
"1234567890".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "90"]
Run Code Online (Sandbox Code Playgroud)
该方法仍然适用于大小不是块大小的精确倍数的字符串:
"123456789".match(/.{1,2}/g);
// Results in:
["12", "34", "56", "78", "9"]
Run Code Online (Sandbox Code Playgroud)
通常,对于要从中提取最多n个大小的子字符串的任何字符串,您可以执行以下操作:
str.match(/.{1,n}/g); // Replace n with the size of the substring
Run Code Online (Sandbox Code Playgroud)
如果您的字符串可以包含换行符或回车符,您可以:
str.match(/(.|[\r\n]){1,n}/g); // Replace n with the size of the substring
Run Code Online (Sandbox Code Playgroud)
就性能而言,我用大约10k个字符尝试了这个,并且在Chrome上花了一点多时间.因人而异.
这也可用于可重用的功能:
function chunkString(str, length) {
return str.match(new RegExp('.{1,' + length + '}', 'g'));
}
Run Code Online (Sandbox Code Playgroud)
Jus*_*tin 29
我创建了几个更快的变种,你可以在jsPerf上看到.我最喜欢的是:
function chunkSubstr(str, size) {
const numChunks = Math.ceil(str.length / size)
const chunks = new Array(numChunks)
for (let i = 0, o = 0; i < numChunks; ++i, o += size) {
chunks[i] = str.substr(o, size)
}
return chunks
}
Run Code Online (Sandbox Code Playgroud)
Tha*_*you 15
这是最快,最高性能的解决方案:
function chunkString (str, len) {
const size = Math.ceil(str.length/len)
const r = Array(size)
let offset = 0
for (let i = 0; i < size; i++) {
r[i] = str.substr(offset, len)
offset += len
}
return r
}
console.log(chunkString("helloworld", 3))
// => [ "hel", "low", "orl", "d" ]
// 10,000 char string
const bigString = "helloworld".repeat(1000)
console.time("perf")
const result = chunkString(bigString, 3)
console.timeEnd("perf")
console.log(result)
// => perf: 0.385 ms
// => [ "hel", "low", "orl", "dhe", "llo", "wor", ... ]Run Code Online (Sandbox Code Playgroud)
将它与其他人比较 ; 我赢了 :)
小智 13
你绝对可以做类似的事情
let pieces = "1234567890 ".split(/(.{2})/).filter(x => x.length == 2);
Run Code Online (Sandbox Code Playgroud)
得到这个:
[ '12', '34', '56', '78', '90' ]
Run Code Online (Sandbox Code Playgroud)
如果你想动态输入/调整块大小以使块的大小为n,你可以这样做:
n = 2;
let pieces = "1234567890 ".split(new RegExp("(.{"+n.toString()+"})")).filter(x => x.length == n);
Run Code Online (Sandbox Code Playgroud)
要查找原始字符串中所有可能的大小为 n 的块,请尝试以下操作:
let subs = new Set();
let n = 2;
let str = "1234567890 ";
let regex = new RegExp("(.{"+n.toString()+"})"); //set up regex expression dynamically encoded with n
for (let i = 0; i < n; i++){ //starting from all possible offsets from position 0 in the string
let pieces = str.split(regex).filter(x => x.length == n); //divide the string into chunks of size n...
for (let p of pieces) //...and add the chunks to the set
subs.add(p);
str = str.substr(1); //shift the string reading frame
}
Run Code Online (Sandbox Code Playgroud)
你最终应该得到:
[ '12', '23', '34', '45', '56', '67', '78', '89', '90', '0 ' ]
Run Code Online (Sandbox Code Playgroud)
Foz*_*ozi 10
惊喜!您可以使用拆分来拆分.
var parts = "1234567890 ".split(/(.{2})/).filter(O=>O)
Run Code Online (Sandbox Code Playgroud)
结果是 [ '12', '34', '56', '78', '90', ' ' ]
包括带有预分配的左右版本。对于小块,这与 RegExp impl 一样快,但随着块大小的增长,速度会更快。而且它的记忆效率很高。
function chunkLeft (str, size = 3) {
if (typeof str === 'string') {
const length = str.length
const chunks = Array(Math.ceil(length / size))
for (let i = 0, index = 0; index < length; i++) {
chunks[i] = str.slice(index, index += size)
}
return chunks
}
}
function chunkRight (str, size = 3) {
if (typeof str === 'string') {
const length = str.length
const chunks = Array(Math.ceil(length / size))
if (length) {
chunks[0] = str.slice(0, length % size || size)
for (let i = 1, index = chunks[0].length; index < length; i++) {
chunks[i] = str.slice(index, index += size)
}
}
return chunks
}
}
console.log(chunkRight()) // undefined
console.log(chunkRight('')) // []
console.log(chunkRight('1')) // ["1"]
console.log(chunkRight('123')) // ["123"]
console.log(chunkRight('1234')) // ["1", "234"]
console.log(chunkRight('12345')) // ["12", "345"]
console.log(chunkRight('123456')) // ["123", "456"]
console.log(chunkRight('1234567')) // ["1", "234", "567"]
Run Code Online (Sandbox Code Playgroud)
var str = "123456789";
var chunks = [];
var chunkSize = 2;
while (str) {
if (str.length < chunkSize) {
chunks.push(str);
break;
}
else {
chunks.push(str.substr(0, chunkSize));
str = str.substr(chunkSize);
}
}
alert(chunks); // chunks == 12,34,56,78,9
Run Code Online (Sandbox Code Playgroud)
我写了一个扩展函数,所以块长度也可以是一个数字数组,比如[1,3]
String.prototype.chunkString = function(len) {
var _ret;
if (this.length < 1) {
return [];
}
if (typeof len === 'number' && len > 0) {
var _size = Math.ceil(this.length / len), _offset = 0;
_ret = new Array(_size);
for (var _i = 0; _i < _size; _i++) {
_ret[_i] = this.substring(_offset, _offset = _offset + len);
}
}
else if (typeof len === 'object' && len.length) {
var n = 0, l = this.length, chunk, that = this;
_ret = [];
do {
len.forEach(function(o) {
chunk = that.substring(n, n + o);
if (chunk !== '') {
_ret.push(chunk);
n += chunk.length;
}
});
if (n === 0) {
return undefined; // prevent an endless loop when len = [0]
}
} while (n < l);
}
return _ret;
};
Run Code Online (Sandbox Code Playgroud)
编码
"1234567890123".chunkString([1,3])
Run Code Online (Sandbox Code Playgroud)
将返回:
[ '1', '234', '5', '678', '9', '012', '3' ]
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
117629 次 |
| 最近记录: |