Ali*_*oul 5 recorder node.js reactjs openai-api openai-whisper
我正在使用 Node.js 和 React 中的 openAI Whisper API 创建一个转录器。我希望用户能够在浏览器中录制音频文件并转录他们的录音。我通过将已录制的音频 blob 的缓冲区数据保存到 mp3 文件中来执行此操作,然后使用 createTranscription() api 调用输入 fs.createReadStream(recorded_audio_file.mp3) ,该输出输出 400 错误。当我使用 Windows 录音机录制音频文件并输入该文件时,API 调用工作正常。这是我的反应记录器组件
import React, { useState, useEffect, useRef } from "react";
import Microphone from "./Microphone/Microphone";
const TSST = () => {
const BASE_URL = process.env.REACT_APP_SERVER_URL || "http://localhost:5000";
const mediaRecorder = useRef(null);
const [stream, setStream] = useState(null);
const [audioChunks, setAudioChunks] = useState([]);
const [audio, setAudio] = useState(null);
const [audioFile, setAudioFile] = useState(null);
const [transcribtion, setTranscription] = useState("");
const [audioBlob, setAudioBlob] = useState("");
const [audioBuffer, setAudioBuffer] = useState("");
useEffect(() => {
const initializeMediaRecorder = async () => {
if ("MediaRecorder" in window) {
try {
const streamData = await navigator.mediaDevices.getUserMedia({ audio: true });
setStream(streamData);
} catch (err) {
console.log(err.message);
}
} else {
console.log("The MediaRecorder API is not supported in your browser.");
}
}
initializeMediaRecorder();
}, [])
const handleStartRecording = () => {
const media = new MediaRecorder(stream, { type: "audio/mp3" });
mediaRecorder.current = media;
mediaRecorder.current.start();
let chunks = [];
mediaRecorder.current.ondataavailable = (e) => {
chunks.push(e.data);
};
setAudioChunks(chunks);
}
const handleStopRecording = () => {
mediaRecorder.current.stop();
mediaRecorder.current.onstop = () => {
const audioBlob = new Blob(audioChunks, { type: "audio/mp3" });
const audioUrl = URL.createObjectURL(audioBlob);
setAudioBlob(audioBlob)
setAudio(audioUrl);
setAudioChunks([]);
let file = new File([audioUrl], "recorded_audio.mp3",{type:"audio/mp3", lastModified:new Date().getTime()});
let container = new DataTransfer();
container.items.add(file);
document.getElementById("audioFile").files = container.files;
setAudioFile(container.files[0]);
console.log(file);
};
}
const handleSubmitRecording = async () => {
try {
// Assuming you have an audio blob called 'audioBlob'
// Convert the audio blob to a base64 string
const reader = new FileReader();
reader.onloadend = async () => {
const base64String = reader.result.split(',')[1]; // Extract base64 data from the result
const res = await fetch(`${BASE_URL}/api/openai/transcriber`, {
method: "POST",
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ audioBuffer: base64String, lang: "en" })
})
const data = await res.json();
setTranscription(data);
};
reader.readAsDataURL(audioBlob);
} catch (error) {
console.log(error);
} finally {
}
}
return (
<div className="h-[calc(100vh-73px)] flex justify-center items-center">
<div className="w-[40%] flex justify-between items-center">
<div className="flex flex-col">
<Microphone startFunction={ handleStartRecording } stopFunction={ handleStopRecording } />
<button onClick={handleStartRecording} className="w-fit my-10 p-5 bg-gray-200 rounded-lg">Start Recording</button>
<button onClick={handleStopRecording} className="w-fit mb-10 p-5 bg-gray-200 rounded-lg">Stop Recording</button>
<audio className="mb-10" src={audio && audio} controls></audio>
<input id="audioFile" type="file" onChange={ (e) => {setAudioFile(e.target.files[0])}}/>
</div>
<div>
<button className="p-10 bg-yellow-500 rounded-xl" onClick={ handleSubmitRecording } >Submit</button>
</div>
</div>
<div className="w-[40%] flex justify-center items-center">
<textarea value={transcribtion} readOnly className="w-[60%] aspect-square resize-none shadow-lg shadow-black"></textarea>
</div>
</div>
);
};
export default TSST;
Run Code Online (Sandbox Code Playgroud)
这是 API:
export const transcribe = async (req, res) => {
// const lang = JSON.parse(req.body.json).lang;
// const audioBuffer = req.file;
const { audioBuffer, lang} = req.body;
const audioBufferBase64 = Buffer.from(audioBuffer, 'base64');
const fileName = "test.mp3";
const folderName = `./audio/${fileName}`
const writableStream = fs.createWriteStream(folderName); // Replace with your desired file path and extension
writableStream.write(audioBufferBase64);
const readStream = fs.createReadStream(folderName);
readStream.on('data', (data) => {
console.log('Read stream data:', data);
});
try {
const whisperRes = await openai.createTranscription(
readStream,
"whisper-1",
)
const chatResponse = whisperRes.data.text;
console.log(chatResponse)
res.status(200).json({ chatResponse: chatResponse });
} catch (error) {
//console.log(error);
res.status(500).json({ message: error });
}
}
Run Code Online (Sandbox Code Playgroud)
这是服务器调用:
import express from "express";
import cors from "cors";
import * as dotenv from "dotenv";
import mongoose from "mongoose";
import multer from "multer";
import { dalle, chatGPT, summarize, translate, transcribe } from "./api/openai.js";
import { getImages, postImage } from "./api/imageShowcase.js";
import { login, signup } from "./api/user.js";
dotenv.config();
const app = express();
const upload = multer();
const storage = multer.memoryStorage();
const uploadMiddleware = multer({ storage: storage });
app.use(cors());
app.use(express.json({limit: '50mb'}));
const atlasURL = process.env.MONGODB_URL;
const PORT = process.env.PORT || 5000;
mongoose.connect(atlasURL)
.then(() => app.listen(PORT, () => console.log(`Successfully connected to port ${PORT}`)))
.catch(error => console.log("There was an error: ", error));
app.get("/", async (req, res) => {
res.send("Server is RUNNING");
})
app.post("/api/openai/transcriber",(req, res) => transcribe(req, res));
Run Code Online (Sandbox Code Playgroud)
保存的 mp3 文件工作正常。apikey 是正确的。当我使用 Windows 录音机录制自己的 mp3 并使用它的 createReadStream 时,它工作得很好。保存的文件数据是以下形式的缓冲区
我尝试更改保存文件的方式,对缓冲区、二进制十六进制、base64 使用不同的格式化方法。尝试将缓冲区直接上传到 Whisper api。尝试使用 axios 直接发布到 api url。尝试做出保存 mp3 文件的承诺,然后创建ReadStream 和许多其他小更改。尝试直接从缓冲区中读取。我查看了所有类似问题的答案,但没有任何效果。
只需在, of函数中调用transcribeAudio函数即可。trycatchtranscribe
另外,请确保您能够访问create该.mp3文件locally并尝试播放它。有时,这audio file是在执行代码时not correct引起的。problems
try {
const whisperRes = await transcribeAudio(readStream);
const chatResponse = whisperRes.data.text;
console.log(chatResponse)
res.status(200).json({ chatResponse: chatResponse });
} catch (error) {
//console.log(error);
res.status(500).json({ message: error });
}
Run Code Online (Sandbox Code Playgroud)
import FormData from "form-data";
import axios from 'axios'
const transcribeAudio = async (file) => {
let data = new FormData();
data.append("file", fs.createReadStream(file));
data.append("model", "whisper-1");
data.append("language", "en");
let config = {
method: "post",
maxBodyLength: Infinity,
url: "https://api.openai.com/v1/audio/transcriptions",
headers: {
Authorization:
`Bearer ${process.env.OPENAI_API_KEY}`,
"Content-Type": "multipart/form-data",
...data.getHeaders(),
},
data: data,
};
try {
const response = await axios.request(config);
const data = response.data;
return { data };
} catch (error) {
return {};
}
};
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
2488 次 |
| 最近记录: |