如何使用node.js和cloudflare工作人员在现有HTML响应中注入javascript

Tod*_*odd 3 javascript v8 node.js cloudflare-workers

我有一个指向 GitBook 的虚荣 URL。GitBook 不支持插入任意 JavaScript 片段。目前 GitBook 只有 4 个“集成”。

我可以通过我自己的 VM 服务器来完成此操作,但我有 CloudFlare,并且我想尝试工作人员。(Javascript 在 CDN 边缘运行)。

CloudFlare工作环境使标头注入变得非常容易,但没有明显的方法来做到这一点。

Tod*_*odd 7

使用 TransformStream 进行处理非常重要,这样处理是异步的并且不需要内存缓冲(为了可扩展性并最大限度地减少 GC)——只有 5 毫秒的 CPU 时间预算。

概述

  • 要供自己使用,请更改字符串forHeadStartforHeadEndforBodyEnd
  • 这种 deferredInjection 方法是推荐的方法,可以最大限度地减少工作线程的 CPU 时间。它更高效,因为它只需要解析 HTML 的开头。另一种方法需要解析 headInjection 的整个 head 部分,如果您使用 bodyInjection 它实际上需要解析整个 html 响应。
  • deferredInjection 方法的工作原理是将内容注入到 head 标签的开头,然后在运行时在客户端将 HTML 内容部署到所需的位置。
  • 如果需要,您可以使用headInjection和/或直接注入bodyInjection。取消相关代码的注释,包括 中的代码injectScripts,并设置将要编码的 tagBytes 的字符串。
  • 该解决方案将仅解析 HTML 内容类型
  • 该解决方案直接作用于字节(而不是字符串)以提高效率。搜索结束标记字符串的字节。
  • 您可能会定位更多结束标签,但通常不需要定位超过这两个
  • 通过流式处理数据(整个 HTML 字符串不会缓存在内存中)。这降低了峰值内存使用量并加快了第一个字节的时间。
  • 处理罕见的边缘情况,即结束标记位于文本读取边界上。我相信每约 1000 字节可能会出现一个边界(每个 TCP 数据包 1000-1500 字节),并且这可能会因 gzip 压缩而有所不同。
  • 将注入解析代码分开,以便代码简单地转发其余部分以保持清晰。
  • 如果不需要,您可以通过注释掉第二个主体标签注入器来禁用它 - 这将加快处理速度。
  • 我自己测试了这个确切的代码并且它有效。可能还存在剩余的错误(取决于结束标记的位置,以及您的服务器是否使用部分 html 模板(仅正文)进行回复)。我今天可能已经修好了 2019-06-28

代码

addEventListener('fetch', event => {
  event.passThroughOnException();
  event.respondWith(handleRequest(event.request))
})

/**
 * Fetch and log a request
 * @param {Request} request
 */
async function handleRequest(request) {
  const response = await fetch(request);

  var ctype = response.headers.get('content-type');
  if (ctype.startsWith('text/html') === false)
    return response; //Only parse html body

  let { readable, writable } = new TransformStream();
  let promise = injectScripts(response.body, writable);
  return new Response(readable, response);
}

let encoder = new TextEncoder('utf-8');

let deferredInjection = function() {
    let forHeadStart = `<script>var test = 1; //Start of head section</script>`;
    let forHeadEnd = `<script>var test = 2; //End of head section</script>`;
    let forBodyEnd = `<script>var test = 3; //End of body section</script><button>click</button>`;

    let helper = `
    ${forHeadStart}
    <script>
        function appendHtmlTo(element, htmlContent) {
            var temp = document.createElement('div');
            temp.innerHTML = htmlContent;
            while (temp.firstChild) {
                element.appendChild(temp.firstChild);
            };
        }

        let forHeadEnd = "${ btoa(forHeadEnd) }";
        let forBodyEnd = "${ btoa(forBodyEnd) }";

        if (forHeadEnd.length > 0) appendHtmlTo(document.head, atob(forHeadEnd)); 
    if (forBodyEnd.length > 0) window.onload = function() {
      appendHtmlTo(document.body, atob(forBodyEnd));
    };

    </script>
    `;
    return {
        forInjection: encoder.encode(helper),
        tagBytes: encoder.encode("<head>"),
        insertAfterTag: true
    };

}();

// let headInjection = {
    // forInjection: encoder.encode("<script>var test = 1;</script>"),
    // tagBytes: encoder.encode("</head>"), //case sensitive
    // insertAfterTag: false
// };
// let bodyInjection = {
    // forInjection: encoder.encode("<script>var test = 1;</script>"),
    // tagBytes: encoder.encode("</body>"), //case sensitive
    // insertAfterTag: false
// }

//console.log(bodyTagBytes);
encoder = null;

async function injectScripts(readable, writable) {
  let processingState = {
    readStream: readable,
    writeStream: writable,
    reader: readable.getReader(),
    writer: writable.getWriter(),
    leftOvers: null, //data left over after a closing tag is found
    inputDone: false,
    result: {charactersFound: 0, foundIndex: -1, afterHeadTag: -1} //Reused object for the duration of the request
  };


  await parseForInjection(processingState, deferredInjection);

  //await parseForInjection(processingState, headInjection);

  //await parseForInjection(processingState, bodyInjection);

  await forwardTheRest(processingState);      
}



///Return object will have foundIndex: -1, if there is no match, and no partial match at the end of the array
///If there is an exact match, return object will have charactersFound:(tagBytes.Length)
///If there is a partial match at the end of the array, return object charactersFound will be < (tagBytes.Length)
///The result object needs to be passed in to reduce Garbage Collection - we can reuse the object
function searchByteArrayChunkForClosingTag(chunk, tagBytes, result)
{   
  //console.log('search');
    let searchStart = 0;
  //console.log(tagBytes.length);
    //console.log(chunk.length);

    for (;;) {
        result.charactersFound = 0;
        result.foundIndex = -1;
        result.afterHeadTag = -1;
    //console.log(result);

        let sweepIndex = chunk.indexOf(tagBytes[0], searchStart);
        if (sweepIndex === -1)
            return; //Definitely not found

        result.foundIndex = sweepIndex;
        sweepIndex++;
        searchStart = sweepIndex; //where we start searching from next
        result.charactersFound++;   
        result.afterHeadTag = sweepIndex;

    //console.log(result);

        for (let i = 1; i < tagBytes.length; i++)
        {
            if (sweepIndex === chunk.length) return; //Partial match
            if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; } //Failed to match (even partially to boundary)
            result.charactersFound++;
            result.afterHeadTag = sweepIndex; //Because we work around the actual found tag in case it's across a boundary
        }   

    if (result.charactersFound === tagBytes.length)
          return; //Found
    }

}

function continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result)
{
  //console.log('continue');
    //Finish the search (no need to check the last buffer at all)
    //console.log('finish the search');
    result.charactersFound = lastSplitResult.charactersFound; //We'll be building on the progress from the lastSplitResult
    result.foundIndex = (-1 * result.charactersFound); //This won't be used, but a negative value is indicative of chunk spanning
    let sweepIndex = 0;
    result.afterHeadTag = 0;
    for (let i = lastSplitResult.charactersFound; i < tagBytes.length; i++) //Zero-based
    {
        if (sweepIndex === chunk.length) return result; //So we support working on a chunk that's smaller than the tagBytes search size
        if (chunk[sweepIndex++] !== tagBytes[i]) { result.charactersFound = 0; result.afterHeadTag = -1; break; }
        result.charactersFound++;
        result.afterHeadTag = sweepIndex;
    }
}

function continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
{
  //console.log('continueOrNewSearch');
      if (lastSplitResult == null)
          searchByteArrayChunkForClosingTag(chunk, tagBytes, result);
      else
      {
          continueSearchByteArrayChunkForClosingTag(chunk, tagBytes, lastSplitResult, result);
        if (result.charactersFound === tagBytes.length)
            return result;
        else
            return searchByteArrayChunkForClosingTag(chunk, tagBytes, result); //Keep searching onward
      }
}

async function parseForInjection(processingState, injectionJob)
{
  if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?            
  if (!injectionJob) return;
  if (!injectionJob.tagBytes) return;
  if (!injectionJob.forInjection) return;

  let reader = processingState.reader;
  let writer = processingState.writer;
  let result = processingState.result;
  let tagBytes = injectionJob.tagBytes;
  //(reader, writer, tagBytes, forInjection)

  let lastSplitResult = null;
  let chunk = null;
  processingState.inputDone = false;
  for (;;) {
    if (processingState.leftOvers)
      {
      chunk = processingState.leftOvers;
      processingState.leftOvers = null;
      }
      else
      {
      let readerResult = await reader.read();
      chunk = readerResult.value;
      processingState.inputDone = readerResult.done;
      }

      if (processingState.inputDone) {
        if (lastSplitResult !== null) {
            //Very edge case: Somehow tagBytes is never found?            
            console.log('edge');
                  throw 'tag not found'; //Causing the system to fall back to the direct request
        }
        await writer.close();
        return true;
      }   
      //console.log(value.length);

        continueOrNewSearch(chunk, tagBytes, lastSplitResult, result)
      //console.log(result);

      if (result.charactersFound === tagBytes.length) //Complete match
      {
        //Inject
        //console.log('inject');
        if (result.foundIndex > 0)
        {
          let partValue = chunk.slice(0, result.foundIndex);
          //console.log(partValue);
          await writer.write(partValue);
        }
        console.log('injected');
        if (parseForInjection.insertAfterTag)
        {
            await writer.write(injectionJob.forInjection);
            await writer.write(injectionJob.tagBytes);
        }
        else
        {
            await writer.write(injectionJob.tagBytes);
            await writer.write(injectionJob.forInjection);
        }
        let remainder = chunk.slice(result.afterHeadTag, chunk.length - 1);
        processingState.leftOvers = remainder;
        lastSplitResult = null;
        return;
      }

      if (lastSplitResult !== null)
      {
        //console.log('no match over boundary');
        //The remainder wasn't found, so write the partial match from before (maybe `<` or `</`)
        let failedLastBit = injectionJob.tagBytes.slice(0, lastSplitResult.charactersFound);
        await writer.write(failedLastBit);
        lastSplitResult = null;
      }

      if (result.charactersFound === 0)
      {
        //console.log('not found')
        await writer.write(chunk);
        continue;
      }

      if (result.charactersFound < tagBytes.length)
      {
        //console.log('boundary: ' + result.charactersFound);
        lastSplitResult = result;
        let partValue = chunk.slice(0, result.foundIndex);
        //console.log(partValue);
        await writer.write(partValue);
        continue;
      }
  }
}

async function forwardTheRest(processingState)
{
  try
  {
  if (processingState.inputDone) return; //Very edge case: Somehow </head> is never found?            

  if (processingState.leftOvers)
  {
    chunk = processingState.leftOvers;
    await processingState.writer.write(chunk);
  }

  processingState.reader.releaseLock();
  processingState.writer.releaseLock();

  await processingState.readStream.pipeTo(processingState.writeStream);

  //Should there be an explicit close method called? I couldn't find one
  }
  catch (e)
  {
    console.log(e);
  }
}
Run Code Online (Sandbox Code Playgroud)

直接使用 (utf-8) 字节的进一步说明:

  • 仅适用于字节值。这至少可以通过搜索字符的第一个独特的 utf-8 字节(< 128 和 > 192)来实现。但在本例中,我们要搜索的</head>是由低于 128 字节组成的,非常容易使用。
  • 考虑到搜索 utf-8 的性质(这是最棘手的),这应该与 ['utf-8', 'utf8', 'iso-8859-1', 'us-ascii'] 一起使用。您将需要更改代码片段编码器以匹配。
  • 这还没有经过彻底测试。边界情况,没有触发我。理想情况下,我们应该有一个核心功能的测试平台
  • 感谢肯顿瓦尔达挑战我
  • 请告诉我是否有 CloudFlare 工作人员方法可以在forwardTheRest 函数中执行 pipelineTo
  • 您可能会发现continueOrNewSearch这两个子函数是跨块边界查找多字节的有趣方法。直到边界为止,我们只计算找到了多少字节。不需要保留这些字节(我们知道它们是什么)。然后在下一个块中,我们从上次中断的地方继续。我们总是剪切头部周围的数组缓冲区,并确保写入头部字节(使用 tagBytes)