节点JS / Gzip:图像文件下载过早结束,没有错误

Bon*_*nes 8 javascript node.js

我正在Node.js中创建一个应用程序以下载图像文件。但是,我遇到一个问题,如果我的下载速度很慢或失去连接,我正在下载的图像将被截断。如果我的程序抛出一个错误警告我该图像未完成下载,那么情况就不会那么糟,但是,它不会执行并可以像成功下载该图像一样进行解析。下载后检查响应代码或探查图像时,我没有收到错误消息。

这是我用来下载图像的当前代码。

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
    var cookie = `userid=${userid}; phash=${phash};`;
    return {
        'url': url,
        'method': 'GET',
        'gzip': true,
        'headers': {
            'Referer': `https://${website}/`,
            'Cookie': cookie
        }
    };
}

function downloadImage(req, filename) {
    return new Promise((resolve, reject) => {
        var response = null;
        var bytes;
        var dirname = path.dirname(filename);
        if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

        progress(req, { delay: 0 }).on('progress', function (state) {
            updateDownloadSpeed(state.speed);
        }).on('end', function () {
            if (response.statusCode == 200) {
                var input = require('fs').createReadStream(filename);
                probe(input).then(result => {
                    input.destroy();
                    if (result != null) {
                        resolve({bytes: bytes, width: result.width,
                                height: result.height,});
                    } else {
                        // The image size probe does not detect if the download was truncated
                        reject({ 'name': 'ImageMissingOrCorrupt');
                    }
                }).catch((error) => {
                    reject(error);
                });
            } else {
                // This is never triggered when the download stops and the image is truncated
                reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
            }
        }).on('response', function (resp) {
            response = resp;
            bytes = response.headers['content-length'];
        }).on('error', function (error) {
            // This does not detect when an image is truncated either
            reject(error);
        }).pipe(fs.createWriteStream(filename));
    });
}
Run Code Online (Sandbox Code Playgroud)

在这里询问了该问题的早期版本,但是在实施了答案的建议后,我的下载器仍然存在相同的问题。从那时起,我的下载程序的代码也已更改。

如何检测图像何时被截断,以便指示下载程序重新尝试下载图像?

编辑1

读完这篇文章后,我认为我的问题可能与我正在下载的网站有关,原因是要求我使用gzip,并且当服务器停止响应时,客户端无法告知内容是否已真正完成下载。但是,我不确定如何测试这种情况。

编辑2

这是我的下载程序成功连接到映像时响应标头的样子。

cache-control:"public, max-age=31536000"
connection:"close"
content-disposition:"inline; filename=129.jpg"
content-length:"185756"
content-transfer-encoding:"binary"
content-type:"image/jpeg"
date:"Thu, 05 Sep 2019 00:15:11 GMT"
expires:"Fri, 04 Sep 2020 00:15:11 GMT"
server:"Apache"
Run Code Online (Sandbox Code Playgroud)

小智 5

我建议您尝试检查writeStreambytesWritten属性是否等于content-length您收到的标题。

我一直在玩你的代码,我发现end请求的事件在writeStream关闭之前被触发,所以我认为你现在实际上没有办法检查这个属性。相反,您应该在close您的writeStream.

试试这个示例代码并告诉我们它是如何进行的:

const fs = require('fs-extra');
const request = require('request');
var probe = require('probe-image-size');
var progress = require('request-progress');

var filename = 'C:/Users/User/Desktop/myimage.jpg';
var writeSteam = fs.createWriteStream(filename)
var req = request(createRequestHeaders('www.linktomyimage.com/image.jpg'));
downloadImage(req, filename)

function createRequestHeaders(url) {
  var cookie = `userid=${userid}; phash=${phash};`;
  return {
    'url': url,
    'method': 'GET',
    'gzip': true,
    'headers': {
      'Referer': `https://${website}/`,
      'Cookie': cookie
    }
  };
}

function downloadImage(req, filename) {
  return new Promise((resolve, reject) => {
    var response = null;
    var bytes;
    var dirname = path.dirname(filename);
    if (!fs.existsSync(dirname)) fs.ensureDirSync(dirname);

    // NEW CODE
    writeSteam.on('close', () => {
      if (bytes !== writeSteam.bytesWritten) { // NEW CODE
        // Here the write stream is closed, so we can compare the property bytesWritten with the bytes we expected to receive
        console.log('The size is not equal! Image is corrupt!')
        reject({ 'name': 'ImageCorrupt'})
      } else if (response.statusCode === 200) {
        var input = require('fs').createReadStream(filename);
        probe(input).then(result => {
          input.destroy();
          if (result != null) {
            resolve({bytes: bytes, width: result.width,
              height: result.height,});
          } else {
            // The image size probe does not detect if the download was truncated
            reject({ 'name': 'ImageMissingOrCorrupt'});
          }
        }).catch((error) => {
          reject(error);
        });
      } else {
        // This is never triggered when the download stops and the image is truncated
        reject({ 'name': 'StatusCodeError', 'message': response.statusCode });
      }
    })

    progress(req, { delay: 0 }).on('progress', function (state) {
      updateDownloadSpeed(state.speed);
    }).on('end', function () {
      console.log('Ended request!!') // NEW CODE
    }).on('response', function (resp) {
      response = resp;
      bytes = response.headers['content-length'];
    }).on('error', function (error) {
      // This does not detect when an image is truncated either
      reject(error);
    }).pipe(writeSteam);
  });
}
Run Code Online (Sandbox Code Playgroud)