mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-01 05:36:49 -04:00
* feat: Rename 'Whisper' model type to 'STT' in UI - Updated models.html: Changed 'Whisper' filter button to 'STT' - Updated talk.html: Changed 'Whisper Model' to 'STT Model' - Updated backends.html: Changed 'Whisper' to 'STT' - Updated talk.js: Renamed getWhisperModel() to getSTTModel(), sendAudioToWhisper() to sendAudioToSTT(), and whisperModelSelect to sttModelSelect This change makes the UI more consistent with the model category naming, where all speech-to-text models (including Whisper, Parakeet, Moonshine, WhisperX, etc.) are grouped under the 'STT' (Speech-to-Text) category. Fixes #8776 Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> * Rename whisperModelSelect to sttModelSelect in talk.html As requested by maintainer mudler in PR review, replacing all whisperModelSelect occurrences with sttModelSelect since the model type was renamed from Whisper to STT. Signed-off-by: LocalAI [bot] <localai-bot@users.noreply.github.com> --------- Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> Signed-off-by: LocalAI [bot] <localai-bot@users.noreply.github.com> Co-authored-by: team-coding-agent-1 <team-coding-agent-1@localai.dev> Co-authored-by: LocalAI [bot] <localai-bot@users.noreply.github.com>
160 lines
4.9 KiB
JavaScript
160 lines
4.9 KiB
JavaScript
|
|
const recordButton = document.getElementById('recordButton');
|
|
const audioPlayback = document.getElementById('audioPlayback');
|
|
const resetButton = document.getElementById('resetButton');
|
|
|
|
let mediaRecorder;
|
|
let audioChunks = [];
|
|
let isRecording = false;
|
|
let conversationHistory = [];
|
|
let resetTimer;
|
|
|
|
function getModel() {
|
|
return document.getElementById('modelSelect').value;
|
|
}
|
|
|
|
function getSTTModel() {
|
|
return document.getElementById('sttModelSelect').value;
|
|
}
|
|
|
|
function getTTSModel() {
|
|
return document.getElementById('ttsModelSelect').value;
|
|
}
|
|
|
|
function resetConversation() {
|
|
conversationHistory = [];
|
|
console.log("Conversation has been reset.");
|
|
clearTimeout(resetTimer);
|
|
}
|
|
|
|
function setResetTimer() {
|
|
clearTimeout(resetTimer);
|
|
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
|
|
}
|
|
|
|
recordButton.addEventListener('click', toggleRecording);
|
|
resetButton.addEventListener('click', resetConversation);
|
|
|
|
function toggleRecording() {
|
|
if (!isRecording) {
|
|
startRecording();
|
|
} else {
|
|
stopRecording();
|
|
}
|
|
}
|
|
|
|
async function startRecording() {
|
|
document.getElementById("recording").style.display = "block";
|
|
document.getElementById("resetButton").style.display = "none";
|
|
if (!navigator.mediaDevices) {
|
|
alert('MediaDevices API not supported!');
|
|
return;
|
|
}
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
mediaRecorder = new MediaRecorder(stream);
|
|
audioChunks = [];
|
|
mediaRecorder.ondataavailable = (event) => {
|
|
audioChunks.push(event.data);
|
|
};
|
|
mediaRecorder.start();
|
|
recordButton.textContent = 'Stop Recording';
|
|
// add class bg-red-500 to recordButton
|
|
recordButton.classList.add("bg-gray-500");
|
|
|
|
isRecording = true;
|
|
}
|
|
|
|
function stopRecording() {
|
|
mediaRecorder.stop();
|
|
mediaRecorder.onstop = async () => {
|
|
document.getElementById("recording").style.display = "none";
|
|
document.getElementById("recordButton").style.display = "none";
|
|
|
|
document.getElementById("loader").style.display = "block";
|
|
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
|
|
document.getElementById("statustext").textContent = "Processing audio...";
|
|
const transcript = await sendAudioToSTT(audioBlob);
|
|
console.log("Transcript:", transcript);
|
|
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
|
|
const responseText = await sendTextToChatGPT(transcript);
|
|
|
|
console.log("Response:", responseText);
|
|
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";
|
|
|
|
const ttsAudio = await getTextToSpeechAudio(responseText);
|
|
playAudioResponse(ttsAudio);
|
|
|
|
recordButton.textContent = 'Record';
|
|
// remove class bg-red-500 from recordButton
|
|
recordButton.classList.remove("bg-gray-500");
|
|
isRecording = false;
|
|
document.getElementById("loader").style.display = "none";
|
|
document.getElementById("recordButton").style.display = "block";
|
|
document.getElementById("resetButton").style.display = "block";
|
|
document.getElementById("statustext").textContent = "Press the record button to start recording.";
|
|
};
|
|
}
|
|
|
|
async function sendAudioToSTT(audioBlob) {
|
|
const formData = new FormData();
|
|
formData.append('file', audioBlob);
|
|
formData.append('model', getSTTModel());
|
|
|
|
const response = await fetch('v1/audio/transcriptions', {
|
|
method: 'POST',
|
|
body: formData
|
|
});
|
|
|
|
const result = await response.json();
|
|
console.log("STT result:", result)
|
|
return result.text;
|
|
}
|
|
|
|
async function sendTextToChatGPT(text) {
|
|
conversationHistory.push({ role: "user", content: text });
|
|
|
|
const response = await fetch('v1/chat/completions', {
|
|
method: 'POST',
|
|
headers: { "Content-Type": "application/json" },
|
|
body: JSON.stringify({
|
|
model: getModel(),
|
|
messages: conversationHistory
|
|
})
|
|
});
|
|
|
|
const result = await response.json();
|
|
const responseText = result.choices[0].message.content;
|
|
conversationHistory.push({ role: "assistant", content: responseText });
|
|
|
|
setResetTimer();
|
|
|
|
return responseText;
|
|
}
|
|
|
|
async function getTextToSpeechAudio(text) {
|
|
const response = await fetch('v1/audio/speech', {
|
|
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json'
|
|
},
|
|
body: JSON.stringify({
|
|
// "backend": "string",
|
|
input: text,
|
|
model: getTTSModel(),
|
|
// "voice": "string"
|
|
})
|
|
});
|
|
|
|
const audioBlob = await response.blob();
|
|
return audioBlob; // Return the blob directly
|
|
}
|
|
|
|
function playAudioResponse(audioBlob) {
|
|
const audioUrl = URL.createObjectURL(audioBlob);
|
|
audioPlayback.src = audioUrl;
|
|
audioPlayback.hidden = false;
|
|
audioPlayback.play();
|
|
}
|
|
|