使用Node.js编写大文件

nab*_*nab 27 large-files node.js

我正在使用可写流编写node.js的大文件:

var fs     = require('fs');
var stream = fs.createWriteStream('someFile.txt', { flags : 'w' });

var lines;
while (lines = getLines()) {
    for (var i = 0; i < lines.length; i++) {
        stream.write( lines[i] );
    }
}
Run Code Online (Sandbox Code Playgroud)

我想知道这个方案是否安全而不使用drain事件?如果不是(我认为是这种情况),将任意大数据写入文件的模式是什么?

nab*_*nab 22

这就是我最终做到的.其背后的想法是创建实现ReadStream接口的可读流,然后使用pipe()方法将数据传输到可写流.

var fs = require('fs');
var writeStream = fs.createWriteStream('someFile.txt', { flags : 'w' });
var readStream = new MyReadStream();

readStream.pipe(writeStream);
writeStream.on('close', function () {
    console.log('All done!');
});
Run Code Online (Sandbox Code Playgroud)

MyReadStream类的示例可以从mongoose QueryStream中获取.

  • 当我们只对将文件写入文件感兴趣时,为什么还需要ReadStream()? (13认同)

jco*_*and 11

排水背后的想法是你会用它来测试:

var fs = require('fs');
var stream = fs.createWriteStream('someFile.txt', {flags: 'w'});

var lines;
while (lines = getLines()) {
    for (var i = 0; i < lines.length; i++) {
        stream.write(lines[i]); //<-- the place to test
    }
}
Run Code Online (Sandbox Code Playgroud)

你不是.因此,您需要重新架构以使其"可重入".

var fs = require('fs');
var stream = fs.createWriteStream('someFile.txt', {flags: 'w'});

var lines;
while (lines = getLines()) {
    for (var i = 0; i < lines.length; i++) {
        var written = stream.write(lines[i]); //<-- the place to test
        if (!written){
           //do something here to wait till you can safely write again
           //this means prepare a buffer and wait till you can come back to finish
           //  lines[i] -> remainder
        }
    }
}
Run Code Online (Sandbox Code Playgroud)

但是,这是否意味着您需要在等待时继续缓冲getLines?

var fs = require('fs');
var stream = fs.createWriteStream('someFile.txt', {flags: 'w'});

var lines,
    buffer = {
     remainingLines = []
    };
while (lines = getLines()) {
    for (var i = 0; i < lines.length; i++) {
        var written = stream.write(lines[i]); //<-- the place to test
        if (!written){
           //do something here to wait till you can safely write again
           //this means prepare a buffer and wait till you can come back to finish
           //  lines[i] -> remainder
           buffer.remainingLines = lines.slice(i);
           break;
           //notice there's no way to re-run this once we leave here.
        }
    }
}

stream.on('drain',function(){
  if (buffer.remainingLines.length){
    for (var i = 0; i < buffer.remainingLines.length; i++) {
      var written = stream.write(buffer.remainingLines[i]); //<-- the place to test
      if (!written){
       //do something here to wait till you can safely write again
       //this means prepare a buffer and wait till you can come back to finish
       //  lines[i] -> remainder
       buffer.remainingLines = lines.slice(i);
      }
    }
  }
});
Run Code Online (Sandbox Code Playgroud)

  • 没有必要使用自己的缓冲区.Node.js已经为您完成了.读源文件nodejs-source/lib/fs.js#WriteStream.prototype.write (3认同)

Tyl*_*ler 6

处理这个问题的最简洁的方法是让你的行生成器成为一个可读的流——我们称之为lineReader。然后以下将自动处理缓冲区并为您很好地排出:

lineReader.pipe(fs.createWriteStream('someFile.txt'));
Run Code Online (Sandbox Code Playgroud)

如果您不想创建可读流,您可以监听write's 的输出以了解缓冲区是否充满并响应如下:

var i = 0, n = lines.length;
function write () {
  if (i === n) return;  // A callback could go here to know when it's done.
  while (stream.write(lines[i++]) && i < n);
  stream.once('drain', write);
}
write();  // Initial call.
Run Code Online (Sandbox Code Playgroud)

可以在此处找到这种情况的更长示例。