Files
LocalAI/core/http/static/talk.js
LocalAI [bot] 61c139fa7d feat: Rename 'Whisper' model type to 'STT' in UI (#8785)
* feat: Rename 'Whisper' model type to 'STT' in UI

- Updated models.html: Changed 'Whisper' filter button to 'STT'
- Updated talk.html: Changed 'Whisper Model' to 'STT Model'
- Updated backends.html: Changed 'Whisper' to 'STT'
- Updated talk.js: Renamed getWhisperModel() to getSTTModel(),
  sendAudioToWhisper() to sendAudioToSTT(), and whisperModelSelect to sttModelSelect

This change makes the UI more consistent with the model category naming,
where all speech-to-text models (including Whisper, Parakeet, Moonshine,
WhisperX, etc.) are grouped under the 'STT' (Speech-to-Text) category.

Fixes #8776

Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev>

* Rename whisperModelSelect to sttModelSelect in talk.html

As requested by maintainer mudler in PR review, replacing all
whisperModelSelect occurrences with sttModelSelect since the
model type was renamed from Whisper to STT.

Signed-off-by: LocalAI [bot] <localai-bot@users.noreply.github.com>

---------

Signed-off-by: team-coding-agent-1 <team-coding-agent-1@localai.dev>
Signed-off-by: LocalAI [bot] <localai-bot@users.noreply.github.com>
Co-authored-by: team-coding-agent-1 <team-coding-agent-1@localai.dev>
Co-authored-by: LocalAI [bot] <localai-bot@users.noreply.github.com>
2026-03-05 09:51:47 +01:00

160 lines
4.9 KiB
JavaScript

const recordButton = document.getElementById('recordButton');
const audioPlayback = document.getElementById('audioPlayback');
const resetButton = document.getElementById('resetButton');
let mediaRecorder;
let audioChunks = [];
let isRecording = false;
let conversationHistory = [];
let resetTimer;
function getModel() {
return document.getElementById('modelSelect').value;
}
function getSTTModel() {
return document.getElementById('sttModelSelect').value;
}
function getTTSModel() {
return document.getElementById('ttsModelSelect').value;
}
function resetConversation() {
conversationHistory = [];
console.log("Conversation has been reset.");
clearTimeout(resetTimer);
}
function setResetTimer() {
clearTimeout(resetTimer);
resetTimer = setTimeout(resetConversation, 300000); // Reset after 5 minutes
}
recordButton.addEventListener('click', toggleRecording);
resetButton.addEventListener('click', resetConversation);
function toggleRecording() {
if (!isRecording) {
startRecording();
} else {
stopRecording();
}
}
async function startRecording() {
document.getElementById("recording").style.display = "block";
document.getElementById("resetButton").style.display = "none";
if (!navigator.mediaDevices) {
alert('MediaDevices API not supported!');
return;
}
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaRecorder = new MediaRecorder(stream);
audioChunks = [];
mediaRecorder.ondataavailable = (event) => {
audioChunks.push(event.data);
};
mediaRecorder.start();
recordButton.textContent = 'Stop Recording';
// add class bg-red-500 to recordButton
recordButton.classList.add("bg-gray-500");
isRecording = true;
}
function stopRecording() {
mediaRecorder.stop();
mediaRecorder.onstop = async () => {
document.getElementById("recording").style.display = "none";
document.getElementById("recordButton").style.display = "none";
document.getElementById("loader").style.display = "block";
const audioBlob = new Blob(audioChunks, { type: 'audio/webm' });
document.getElementById("statustext").textContent = "Processing audio...";
const transcript = await sendAudioToSTT(audioBlob);
console.log("Transcript:", transcript);
document.getElementById("statustext").textContent = "Seems you said: " + transcript+ ". Generating response...";
const responseText = await sendTextToChatGPT(transcript);
console.log("Response:", responseText);
document.getElementById("statustext").textContent = "Response generated: '" + responseText + "'. Generating audio response...";
const ttsAudio = await getTextToSpeechAudio(responseText);
playAudioResponse(ttsAudio);
recordButton.textContent = 'Record';
// remove class bg-red-500 from recordButton
recordButton.classList.remove("bg-gray-500");
isRecording = false;
document.getElementById("loader").style.display = "none";
document.getElementById("recordButton").style.display = "block";
document.getElementById("resetButton").style.display = "block";
document.getElementById("statustext").textContent = "Press the record button to start recording.";
};
}
async function sendAudioToSTT(audioBlob) {
const formData = new FormData();
formData.append('file', audioBlob);
formData.append('model', getSTTModel());
const response = await fetch('v1/audio/transcriptions', {
method: 'POST',
body: formData
});
const result = await response.json();
console.log("STT result:", result)
return result.text;
}
async function sendTextToChatGPT(text) {
conversationHistory.push({ role: "user", content: text });
const response = await fetch('v1/chat/completions', {
method: 'POST',
headers: { "Content-Type": "application/json" },
body: JSON.stringify({
model: getModel(),
messages: conversationHistory
})
});
const result = await response.json();
const responseText = result.choices[0].message.content;
conversationHistory.push({ role: "assistant", content: responseText });
setResetTimer();
return responseText;
}
async function getTextToSpeechAudio(text) {
const response = await fetch('v1/audio/speech', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({
// "backend": "string",
input: text,
model: getTTSModel(),
// "voice": "string"
})
});
const audioBlob = await response.blob();
return audioBlob; // Return the blob directly
}
function playAudioResponse(audioBlob) {
const audioUrl = URL.createObjectURL(audioBlob);
audioPlayback.src = audioUrl;
audioPlayback.hidden = false;
audioPlayback.play();
}