使用 React Native 和 Tensorflow.js 对实时视频源进行预测

cag*_*glu 4 react-native tensorflow tensorflow.js

我已经设置了我的 React Native 应用程序,所有 unimodules 和包的安装和配置都按预期工作。没有依赖性等问题。

现在我想实现一个我从谷歌的可教机器上训练的张量流模型,但我无法理解如何将它与相机一起使用,因为我想实时处理帧,就像张量流反应本机API文档所说的那样。这是我在网上找到的代码,我将用我的模型更改它,但问题是它仅在用户拍照时检测模型。我希望我的相机能够实时理解模型,就像人脸检测、条形码扫描仪一样。

Main.js

import React, {useRef, useEffect, useState} from 'react';
import {View, StyleSheet, Dimensions} from 'react-native';

import {
  getModel,
  convertBase64ToTensor,
  startPrediction,
} from '../../helpers/tensor-helper';

import {Camera} from 'expo-camera';

import * as tf from '@tensorflow/tfjs';
import '@tensorflow/tfjs-react-native';
import {
  cameraWithTensors,
  bundleResourceIO,
} from '@tensorflow/tfjs-react-native';

const TensorCamera = cameraWithTensors(Camera);

const Main = () => {
  const [model, setModel] = useState();
  const [prediction, setPredictions] = useState();
  const cameraRef = useRef(null);

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrame = 1;

  const modelJson = require('../../model/model.json');
  const modelWeights = require('../../model/weights.bin');

  const getModel = async () => {
    try {
      await tf.ready();
      const model = await tf.loadLayersModel(
        bundleResourceIO(modelJson, modelWeights),
      );
      return model;
    } catch (error) {
      console.log('Could not load model', error);
    }
  };
  useEffect(() => {
    setModel(getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = tensors => {
    if (!tensors) {
      console.log('Image not found!');
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
          console.log(`prediction: ${JSON.stringify(prediction)}`);
        }
        tf.dispose(tensors);
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    console.log(`prediction: ${JSON.stringify(prediction)}`);

    loop();
    console.log(`prediction: ${JSON.stringify(prediction)}`);
  };

  let textureDims;
  if (Platform.OS === 'ios') {
    textureDims = {
      height: 1920,
      width: 1080,
    };
  } else {
    textureDims = {
      height: 1200,
      width: 1600,
    };
  }
  return (
    <View style={styles.container}>
      <TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={50}
        resizeWidth={50}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />
    </View>
  );
};



export default Main;
Run Code Online (Sandbox Code Playgroud)

张量助手.js:

import * as tf from '@tensorflow/tfjs';
import {bundleResourceIO, decodeJpeg} from '@tensorflow/tfjs-react-native';
import * as tfc from '@tensorflow/tfjs-core';

import {Base64Binary} from '../utils/utils';
const BITMAP_DIMENSION = 224;

const modelJson = require('../model/model.json');
const modelWeights = require('../model/weights.bin');

// 0: channel from JPEG-encoded image
// 1: gray scale
// 3: RGB image
const TENSORFLOW_CHANNEL = 3;

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights),
    );
    return model;
  } catch (error) {
    console.log('Could not load model', error);
  }
};

export const convertBase64ToTensor = async base64 => {
  try {
    const uIntArray = Base64Binary.decode(base64);
    // decode a JPEG-encoded image to a 3D Tensor of dtype
    const decodedImage = decodeJpeg(uIntArray, 3);
    // reshape Tensor into a 4D array
    return decodedImage.reshape([
      1,
      BITMAP_DIMENSION,
      BITMAP_DIMENSION,
      TENSORFLOW_CHANNEL,
    ]);
  } catch (error) {
    console.log('Could not convert base64 string to tesor', error);
  }
};

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor);
    // return typed array

    return tfc.tensor().dataSync();
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};

Run Code Online (Sandbox Code Playgroud)

我编辑了文件并将其作为输出:

 LOG  prediction: undefined
 LOG  prediction: undefined
 WARN  Possible Unhandled Promise Rejection (id: 1):
Error: When using targetShape.depth=3, targetShape.width must be a multiple of 4. Alternatively do not call detectGLCapabilities()
fromTexture@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:267911:24
nextFrameGenerator$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268598:67  
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
loop$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126503:43
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26610:30
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26640:19
tryCallTwo@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31390:9
doResolve@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31554:25
Promise@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:31413:14
callInvokeWithMethodAndArg@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26639:33
enqueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26644:157
async@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26661:69
loop@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126494:42
handleCameraStream@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126535:11   
onReady@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:126572:34
onGLContextCreate$@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:268641:37   
tryCatch@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26537:23
invoke@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:26710:32   
__callImmediates@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3317:35       
http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3096:34
__guard@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3300:15
flushedQueue@http://localhost:8081/index.bundle?platform=android&dev=true&minify=false&app=com.AppName&modulesOnly=false&runModule=true:3095:21
flushedQueue@[native code]
invokeCallbackAndReturnFlushedQueue@[native code]```
Run Code Online (Sandbox Code Playgroud)

yud*_*esh 5

好吧,我不久前就这样做了(去年),所以我可能忘记了一些东西,但你可以参考这里的代码,使用 Expo 并对实时视频源进行预测,请原谅那些非常糟糕的代码(我现在编写更好的代码) 。

不管怎样,这是一个关于你需要做什么的简单更新,主要是关于handleCameraStream(). 您将需要运行两个不同的useEffect钩子,一个用于最初加载模型,另一个用于取消在不断进行预测时需要使用的动画帧。

model将模型设置为状态,然后您可以从文件中的任何部分访问它。我也为 做了同样的事情predictions

N我还添加了对每个帧进行预测的功能,通过makePredictionsEveryNFrames设置1它基本上将张量从TensorCamera函数传递到每个帧进行预测。做出预测后,您还需要使用处理张量tf.dispose()。该函数loop()需要无限运行才能连续预测即将到来的帧。

const Main = () => {
  const [model, setModel] = useState();
  const [predictions, setPredictions] = useState();

  let requestAnimationFrameId = 0;
  let frameCount = 0;
  let makePredictionsEveryNFrames = 1;

  useEffect(() => {
    setModel(await getModel());
  }, []);

  useEffect(() => {
    return () => {
      cancelAnimationFrame(requestAnimationFrameId);
    };
  }, [requestAnimationFrameId]);

  const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const results = await startPrediction(model, imageTensor);
          setPredictions(results);
        }
        tf.dispose(tensors); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };
}
Run Code Online (Sandbox Code Playgroud)

我更新了getModel()模型加载时返回模型,这样我们就可以将其设置为状态。

export const getModel = async () => {
  try {
    await tf.ready();
    const model = await tf.loadLayersModel(
      bundleResourceIO(modelJson, modelWeights)
    );
    return model;
  } catch (error) {
    console.log("Could not load model", error);
  }
};
Run Code Online (Sandbox Code Playgroud)

所以你只需要访问predictions并渲染它们。

编辑1

回顾代码,该函数存在一些问题startPredictions,您实际上并没有从模型返回预测,并且您需要一次对一批图像进行预测。

export const startPrediction = async (model, tensor) => {
  try {
    // predict against the model
    const output = await model.predict(tensor, {batchSize: 1});
    return output.dataSync(); 
  } catch (error) {
    console.log('Error predicting from tesor image', error);
  }
};
Run Code Online (Sandbox Code Playgroud)

编辑2

查看这里的模型输入形状,预期的输入形状是(batch_size, 224,224,3)。但你传递的是 的图像(batch_size, 50,50,3)。因此,请尝试更新参数resizeWidth和。resizeHeight224

<TensorCamera
        ref={cameraRef}
        // Standard Camera props
        style={styles.camera}
        type={Camera.Constants.Type.back}
        flashMode={Camera.Constants.FlashMode.off}
        // Tensor related props
        cameraTextureHeight={textureDims.height}
        cameraTextureWidth={textureDims.width}
        resizeHeight={224}
        resizeWidth={224}
        resizeDepth={3}
        onReady={tensors => handleCameraStream(tensors)}
        autorender={true}
      />
Run Code Online (Sandbox Code Playgroud)

除此之外,您还需要将 3D 张量转换为 4D 张量,然后将其传递给模型进行预测,也称为扩展其中一个维度。handleCameraStream也将函数更新为此。张量的大小为(224,224,3),扩展第一维后它将为(1,224,224,3)

const handleCameraStream = (tensors) => {
    if (!tensors) {
      console.log("Image not found!");
    }
    const loop = async () => {
      if (frameCount % makePredictionsEveryNFrame === 0) {
        const imageTensor = tensors.next().value;
        if (model) {
          const imageTensorReshaped = imageTensor.expandDims(axis=0);
          const results = await startPrediction(model, imageTensorReshaped);
          setPredictions(results);
        }
        tf.dispose(imageTensorReshaped); 
      }
      frameCount += 1;
      frameCount = frameCount % makePredictionsEveryNFrame;
      requestAnimationFrameId = requestAnimationFrame(loop);
    };
    loop();
  };
Run Code Online (Sandbox Code Playgroud)