Ant*_*rin 22 html javascript html5 client-side filereader
你可以帮我解决以下问题.
目标
在客户端(通过JS和HTML5类在浏览器中)逐行读取文件,而不将整个文件加载到内存中.
脚本
我正在处理网页,它应解析客户端的文件.目前,我正在阅读本文中描述的文件.
HTML:
<input type="file" id="files" name="files[]" />
Run Code Online (Sandbox Code Playgroud)
JavaScript的:
$("#files").on('change', function(evt){
// creating FileReader
var reader = new FileReader();
// assigning handler
reader.onloadend = function(evt) {
lines = evt.target.result.split(/\r?\n/);
lines.forEach(function (line) {
parseLine(...);
});
};
// getting File instance
var file = evt.target.files[0];
// start reading
reader.readAsText(file);
}
Run Code Online (Sandbox Code Playgroud)
问题是FileReader立即读取整个文件,这会导致大文件崩溃的选项卡(大小> = 300 MB).使用reader.onprogress不能解决问题,因为它只会增加结果,直到它达到极限.
发明一个轮子
我已经在互联网上做了一些研究,并没有找到任何简单的方法来做到这一点(有很多文章描述了这个确切的功能,但在服务器端为node.js).
作为解决它的唯一方法,我只看到以下内容:
File.split(startByte, endByte)方法)但我最好使用现有的东西来避免熵增长.
Ant*_*rin 14
最终我创建了新的逐行阅读器,这与以前的阅读器完全不同.
特点是:
查看这个jsFiddle以获取示例.
用法:
// Initialization
var file; // HTML5 File object
var navigator = new FileNavigator(file);
// Read some amount of lines (best performance for sequential file reading)
navigator.readSomeLines(startingFromIndex, function (err, index, lines, eof, progress) { ... });
// Read exact amount of lines
navigator.readLines(startingFromIndex, count, function (err, index, lines, eof, progress) { ... });
// Find first from index
navigator.find(pattern, startingFromIndex, function (err, index, match) { ... });
// Find all matching lines
navigator.findAll(new RegExp(pattern), indexToStartWith, limitOfMatches, function (err, index, limitHit, results) { ... });
Run Code Online (Sandbox Code Playgroud)
性能与以前的解决方案相同.你可以测量它在jsFiddle中调用'Read'.
GitHub:https://github.com/anpur/client-line-navigator/wiki
更新:从我的第二个答案检查LineNavigator,那个读者更好.
我制作了自己的读者,满足了我的需求.
性能
由于问题仅与大文件有关,因此性能是最重要的部分.

如您所见,性能与直接读取几乎相同(如上所述).
目前我正在努力使它变得更好,因为更大的时间消费者是异步调用以避免调用堆栈限制命中,这对于执行问题不是必需的.性能问题解决了.
质量
以下案例进行了测试:
代码和用法
HTML:
<input type="file" id="file-test" name="files[]" />
<div id="output-test"></div>
Run Code Online (Sandbox Code Playgroud)
用法:
$("#file-test").on('change', function(evt) {
var startProcessing = new Date();
var index = 0;
var file = evt.target.files[0];
var reader = new FileLineStreamer();
$("#output-test").html("");
reader.open(file, function (lines, err) {
if (err != null) {
$("#output-test").append('<span style="color:red;">' + err + "</span><br />");
return;
}
if (lines == null) {
var milisecondsSpend = new Date() - startProcessing;
$("#output-test").append("<strong>" + index + " lines are processed</strong> Miliseconds spend: " + milisecondsSpend + "<br />");
return;
}
// output every line
lines.forEach(function (line) {
index++;
//$("#output-test").append(index + ": " + line + "<br />");
});
reader.getNextLine();
});
reader.getNextLine();
});
Run Code Online (Sandbox Code Playgroud)
码:
function FileLineStreamer() {
var loopholeReader = new FileReader();
var chunkReader = new FileReader();
var delimiter = "\n".charCodeAt(0);
var expectedChunkSize = 15000000; // Slice size to read
var loopholeSize = 200; // Slice size to search for line end
var file = null;
var fileSize;
var loopholeStart;
var loopholeEnd;
var chunkStart;
var chunkEnd;
var lines;
var thisForClosure = this;
var handler;
// Reading of loophole ended
loopholeReader.onloadend = function(evt) {
// Read error
if (evt.target.readyState != FileReader.DONE) {
handler(null, new Error("Not able to read loophole (start: )"));
return;
}
var view = new DataView(evt.target.result);
var realLoopholeSize = loopholeEnd - loopholeStart;
for(var i = realLoopholeSize - 1; i >= 0; i--) {
if (view.getInt8(i) == delimiter) {
chunkEnd = loopholeStart + i + 1;
var blob = file.slice(chunkStart, chunkEnd);
chunkReader.readAsText(blob);
return;
}
}
// No delimiter found, looking in the next loophole
loopholeStart = loopholeEnd;
loopholeEnd = Math.min(loopholeStart + loopholeSize, fileSize);
thisForClosure.getNextBatch();
};
// Reading of chunk ended
chunkReader.onloadend = function(evt) {
// Read error
if (evt.target.readyState != FileReader.DONE) {
handler(null, new Error("Not able to read loophole"));
return;
}
lines = evt.target.result.split(/\r?\n/);
// Remove last new line in the end of chunk
if (lines.length > 0 && lines[lines.length - 1] == "") {
lines.pop();
}
chunkStart = chunkEnd;
chunkEnd = Math.min(chunkStart + expectedChunkSize, fileSize);
loopholeStart = Math.min(chunkEnd, fileSize);
loopholeEnd = Math.min(loopholeStart + loopholeSize, fileSize);
thisForClosure.getNextBatch();
};
this.getProgress = function () {
if (file == null)
return 0;
if (chunkStart == fileSize)
return 100;
return Math.round(100 * (chunkStart / fileSize));
}
// Public: open file for reading
this.open = function (fileToOpen, linesProcessed) {
file = fileToOpen;
fileSize = file.size;
loopholeStart = Math.min(expectedChunkSize, fileSize);
loopholeEnd = Math.min(loopholeStart + loopholeSize, fileSize);
chunkStart = 0;
chunkEnd = 0;
lines = null;
handler = linesProcessed;
};
// Public: start getting new line async
this.getNextBatch = function() {
// File wasn't open
if (file == null) {
handler(null, new Error("You must open a file first"));
return;
}
// Some lines available
if (lines != null) {
var linesForClosure = lines;
setTimeout(function() { handler(linesForClosure, null) }, 0);
lines = null;
return;
}
// End of File
if (chunkStart == fileSize) {
handler(null, null);
return;
}
// File part bigger than expectedChunkSize is left
if (loopholeStart < fileSize) {
var blob = file.slice(loopholeStart, loopholeEnd);
loopholeReader.readAsArrayBuffer(blob);
}
// All file can be read at once
else {
chunkEnd = fileSize;
var blob = file.slice(chunkStart, fileSize);
chunkReader.readAsText(blob);
}
};
};
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
26039 次 |
| 最近记录: |