一起录制视频和屏幕并用 Javascript 覆盖

ami*_*ros 3 html javascript css html5-video

我想在网络摄像头旁边记录用户的屏幕并将结果显示为叠加层,如下所示:

屏幕和网络摄像头覆盖

我假设在录制时我可以在两个单独的视频元素中显示多个流并用 CSS 覆盖它们。

但是,如何将结果保存为两个视频的叠加?

Bki*_*kiD 12

这可以在纯 JS 中实现,如下所示 -

  1. 通过 getUserMedia() 获取网络摄像头流
  2. 通过 getDisplayMedia() 获取屏幕共享流
  3. 使用一些数学和画布操作合并两个流
  4. 使用canvas.captureStream()生成复合视频流。
  5. 使用 AudioContext 合并音频剪辑(如果同时使用麦克风和系统音频,则尤其需要)。
  6. 使用 MediaStream 构造函数创建一个新流,使用 - 来自新流的视频 + 来自 audioContext 目标节点的音频,如下 -

new MediaStream([...newStream.getVideoTracks(), ...audioDestination.stream.getTracks()]);

  1. 根据需要使用新生成的MediaStream(即替换在RTCPeerConnection等中)。
  2. 在此示例中 - MediaRecorder API 用于录制生成的复合/画中画视频。
  3. 单击“记录结果流”按钮后,记录开始。单击“停止录制并下载结果流”按钮即可下载最终结果。

PS:该代码片段无法获取相机/屏幕共享 + 请使用此CodePen 链接查看其实际效果。

let localCamStream,
  localScreenStream,
  localOverlayStream,
  rafId,
  cam,
  screen,
  mediaRecorder,
  audioContext,
  audioDestination;
let mediaWrapperDiv = document.getElementById("mediaWrapper");
let startWebcamBtn = document.getElementById("startWebcam");
let startScreenShareBtn = document.getElementById("startScreenShare");
let mergeStreamsBtn = document.getElementById("mergeStreams");
let startRecordingBtn = document.getElementById("startRecording");
let stopRecordingBtn = document.getElementById("stopRecording");
let stopAllStreamsBtn = document.getElementById("stopAllStreams");
let canvasElement = document.createElement("canvas");
let canvasCtx = canvasElement.getContext("2d");
let encoderOptions = {
  mimeType: "video/webm; codecs=vp9"
};
let recordedChunks = [];
let audioTracks = [];

/**
 * Internal Polyfill to simulate
 * window.requestAnimationFrame
 * since the browser will kill canvas
 * drawing when tab is inactive
 */
const requestVideoFrame = function(callback) {
  return window.setTimeout(function() {
    callback(Date.now());
  }, 1000 / 60); // 60 fps - just like requestAnimationFrame
};

/**
 * Internal polyfill to simulate
 * window.cancelAnimationFrame
 */
const cancelVideoFrame = function(id) {
  clearTimeout(id);
};

async function startWebcamFn() {
  localCamStream = await navigator.mediaDevices.getUserMedia({
    video: true,
    audio: {
      deviceId: {
        exact: "communications"
      }
    }
  });
  if (localCamStream) {
    cam = await attachToDOM("justWebcam", localCamStream);
  }
}

async function startScreenShareFn() {
  localScreenStream = await navigator.mediaDevices.getDisplayMedia({
    video: true,
    audio: true
  });
  if (localScreenStream) {
    screen = await attachToDOM("justScreenShare", localScreenStream);
  }
}

async function stopAllStreamsFn() {
  [
    ...(localCamStream ? localCamStream.getTracks() : []),
    ...(localScreenStream ? localScreenStream.getTracks() : []),
    ...(localOverlayStream ? localOverlayStream.getTracks() : [])
  ].map((track) => track.stop());
  localCamStream = null;
  localScreenStream = null;
  localOverlayStream = null;
  cancelVideoFrame(rafId);
  mediaWrapperDiv.innerHTML = "";
  document.getElementById("recordingState").innerHTML = "";
}

async function makeComposite() {
  if (cam && screen) {
    canvasCtx.save();
    canvasElement.setAttribute("width", `${screen.videoWidth}px`);
    canvasElement.setAttribute("height", `${screen.videoHeight}px`);
    canvasCtx.clearRect(0, 0, screen.videoWidth, screen.videoHeight);
    canvasCtx.drawImage(screen, 0, 0, screen.videoWidth, screen.videoHeight);
    canvasCtx.drawImage(
      cam,
      0,
      Math.floor(screen.videoHeight - screen.videoHeight / 4),
      Math.floor(screen.videoWidth / 4),
      Math.floor(screen.videoHeight / 4)
    ); // this is just a rough calculation to offset the webcam stream to bottom left
    let imageData = canvasCtx.getImageData(
      0,
      0,
      screen.videoWidth,
      screen.videoHeight
    ); // this makes it work
    canvasCtx.putImageData(imageData, 0, 0); // properly on safari/webkit browsers too
    canvasCtx.restore();
    rafId = requestVideoFrame(makeComposite);
  }
}

async function mergeStreamsFn() {
  document.getElementById("mutingStreams").style.display = "block";
  await makeComposite();
  audioContext = new AudioContext();
  audioDestination = audioContext.createMediaStreamDestination();
  let fullVideoStream = canvasElement.captureStream();
  let existingAudioStreams = [
    ...(localCamStream ? localCamStream.getAudioTracks() : []),
    ...(localScreenStream ? localScreenStream.getAudioTracks() : [])
  ];
  audioTracks.push(
    audioContext.createMediaStreamSource(
      new MediaStream([existingAudioStreams[0]])
    )
  );
  if (existingAudioStreams.length > 1) {
    audioTracks.push(
      audioContext.createMediaStreamSource(
        new MediaStream([existingAudioStreams[1]])
      )
    );
  }
  audioTracks.map((track) => track.connect(audioDestination));
  console.log(audioDestination.stream);
  localOverlayStream = new MediaStream([...fullVideoStream.getVideoTracks()]);
  let fullOverlayStream = new MediaStream([
    ...fullVideoStream.getVideoTracks(),
    ...audioDestination.stream.getTracks()
  ]);
  console.log(localOverlayStream, existingAudioStreams);
  if (localOverlayStream) {
    overlay = await attachToDOM("pipOverlayStream", localOverlayStream);
    mediaRecorder = new MediaRecorder(fullOverlayStream, encoderOptions);
    mediaRecorder.ondataavailable = handleDataAvailable;
    overlay.volume = 0;
    cam.volume = 0;
    screen.volume = 0;
    cam.style.display = "none";
    // localCamStream.getAudioTracks().map(track => { track.enabled = false });
    screen.style.display = "none";
    // localScreenStream.getAudioTracks().map(track => { track.enabled = false });
  }
}

async function startRecordingFn() {
  mediaRecorder.start();
  console.log(mediaRecorder.state);
  console.log("recorder started");
  document.getElementById("pipOverlayStream").style.border = "10px solid red";
  document.getElementById(
    "recordingState"
  ).innerHTML = `${mediaRecorder.state}...`;
}

async function attachToDOM(id, stream) {
  let videoElem = document.createElement("video");
  videoElem.id = id;
  videoElem.width = 640;
  videoElem.height = 360;
  videoElem.autoplay = true;
  videoElem.setAttribute("playsinline", true);
  videoElem.srcObject = new MediaStream(stream.getTracks());
  mediaWrapperDiv.appendChild(videoElem);
  return videoElem;
}

function handleDataAvailable(event) {
  console.log("data-available");
  if (event.data.size > 0) {
    recordedChunks.push(event.data);
    console.log(recordedChunks);
    download();
  } else {}
}

function download() {
  var blob = new Blob(recordedChunks, {
    type: "video/webm"
  });
  var url = URL.createObjectURL(blob);
  var a = document.createElement("a");
  document.body.appendChild(a);
  a.style = "display: none";
  a.href = url;
  a.download = "result.webm";
  a.click();
  window.URL.revokeObjectURL(url);
}

function stopRecordingFn() {
  mediaRecorder.stop();
  document.getElementById(
    "recordingState"
  ).innerHTML = `${mediaRecorder.state}...`;
}

startWebcamBtn.addEventListener("click", startWebcamFn);
startScreenShareBtn.addEventListener("click", startScreenShareFn);
mergeStreamsBtn.addEventListener("click", mergeStreamsFn);
stopAllStreamsBtn.addEventListener("click", stopAllStreamsFn);
startRecordingBtn.addEventListener("click", startRecordingFn);
stopRecordingBtn.addEventListener("click", stopRecordingFn);
Run Code Online (Sandbox Code Playgroud)
div#mediaWrapper,
div#buttonWrapper {
  display: flex;
  flex: 1 1 100%;
  flex-basis: row nowrap;
}

div#mediaWrapper video {
  border: 1px solid black;
  margin: 1px;
  max-width: 33%;
  height: auto;
}

div#mediaWrapper video#pipOverlayStream {
  max-width: 100% !important;
}

div#buttonWrapper button {
  border-radius: 0.25rem;
  color: #ffffff;
  display: inline-block;
  font-size: 1rem;
  font-weight: bold;
  line-height: 1.6;
  padding: 0.375rem 0.75rem;
  text-align: center;
  -webkit-user-select: none;
  -moz-user-select: none;
  -ms-user-select: none;
  user-select: none;
  vertical-align: middle;
  margin: 5px;
  cursor: pointer;
}

div#buttonWrapper button#startWebcam {
  background-color: #007bff;
  border: 1px solid #007bff;
}

div#buttonWrapper button#startScreenShare {
  background-color: #17a2b8;
  border: 1px solid #17a2b8;
}

div#buttonWrapper button#mergeStreams {
  background-color: #28a745;
  border: 1px solid #28a745;
}

div#buttonWrapper button#startRecording {
  background-color: #17a2b8;
  border: 1px solid #17a2b8;
}

div#buttonWrapper button#stopRecording {
  background-color: #000000;
  border: 1px solid #000000;
}

div#buttonWrapper button#stopAllStreams {
  background-color: #dc3545;
  border: 1px solid #dc3545;
}
Run Code Online (Sandbox Code Playgroud)
<main>
  <p>This demo is a proof-of-concept solution for this <a href="https://stackoverflow.com/questions/71557879" target="_blank" rel="noopener noreferrer">StackOverflow question</a> and <a href="https://stackoverflow.com/questions/37404860" target="_blank"
      rel="noopener noreferrer">also this one</a> - as long as you make the required changes<br>i.e. <b>mimeType: "video/mp4; codecs=h264"</b> instead of <b>mimeType: "video/webm; codecs=vp9"</b><br>AND<br><b>type: "video/mp4"</b> instead of <b>type: "video/webm"</b><br>AND<br><b>result.mp4</b>    instead of <b>result.webm</b></p>
  <h2>Click on "Start Webcam" to get started. </h2>
  <h3>
    Core Idea:<br>
    <ol>
      <li>Fetch Webcam Stream via getUserMedia()</li>
      <li>Fetch Screen Share Stream via getDisplayMedia()</li>
      <li>Merge Both Stream using some math & canvas operations</li>
      <li>Use canvas.captureStream() to generate the composite video stream.</li>
      <li>Use AudioContext to merge audio clips (especially needed if using both microphone & system audio).</li>
      <li>Use MediaStream constructor to create a new stream using - the video from the new stream + audio from audioContext Destination Node as follows -<br><br>
        <code>new MediaStream([...newStream.getVideoTracks(), ...audioDestination.stream.getTracks()]);</code>
      </li><br>
      <li>Use newly generated MediaStream as required (i.e. replace in RTCPeerConnection, etc.).</li>
      <li>In this example - MediaRecorder API is used to record the resulting composite/picture-in-picture video. Recording begins when the "Record Resulting Stream" button is clicked. The final result can be downloaded upon clicking the "Stop Recording and
        Download Resulting Stream" button</li>
    </ol>
  </h3>
  <div id="mediaWrapper"></div>
  <div id="buttonWrapper">
    <button id="startWebcam" title="Start Webcam">Start Webcam</button>
    <button id="startScreenShare" title="Start Screen Share">Start Screen Share</button>
    <button id="mergeStreams" title="Merge Streams">Merge Streams</button>
    <button id="startRecording" title="Record Resulting Stream">Record Resulting Stream</button>
    <button id="stopRecording" title="Stop Recording and Download Resulting Stream">Stop Recording and Download Resulting Stream</button>
    <button id="stopAllStreams" title="Stop All Streams">Stop All Streams</button>
  </div>

  <div id="helpText">
    <h1 id="recordingState"></h1><br>
    <h2 id="mutingStreams">
      Note: In a WebRTC setting, you wouldn't be hearing your own voice or the screen share audio via the "video" tag. The same has been simulated here by ensuring that all video tags have a "volume = 0". Removing this will create a loopback hell which you
      do not want.<br><br> Another way to avoid this issue is to ensure that the video tags are created with ONLY video stream tracks using <em style="color: blue;">new MediaStream([ source.getVideoTracks() ])</em> during the srcObject assignment.
    </h2>
    <h1>
      Remember to send the correct stream (with both audio and video) to the rest of the peers though.
    </h1>
  </div>
</main>
Run Code Online (Sandbox Code Playgroud)