voicebot demo
This commit is contained in:
235
frontend/assets/audio-playback-worklet.js
Normal file
235
frontend/assets/audio-playback-worklet.js
Normal file
@@ -0,0 +1,235 @@
|
||||
/**
|
||||
* @typedef {Object} AppendDeltaMessage
|
||||
* @property {"appendDelta"} type
|
||||
* @property {ResponseAudioDelta} delta
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} DeleteItemMessage
|
||||
* @property {"deleteItem"} type
|
||||
* @property {string} item_id
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} ClearMessage
|
||||
* @property {"clear"} type
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} SetSourceRateMessage
|
||||
* @property {"setSourceRate"} type
|
||||
* @property {number} hz
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} NowPlayingMessage
|
||||
* @property {"nowPlaying"} type
|
||||
* @property {string|null} item_id
|
||||
* @property {number} played_ms
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} MuteMessage
|
||||
* @property {"mute"} type
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} UnmuteMessage
|
||||
* @property {"unmute"} type
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {AppendDeltaMessage | DeleteItemMessage | ClearMessage | SetSourceRateMessage | MuteMessage | UnmuteMessage} PlaybackMessage
|
||||
*/
|
||||
|
||||
/**
|
||||
* @typedef {Object} Chunk
|
||||
* @property {string} item_id
|
||||
* @property {Int16Array} data
|
||||
* @property {number} off
|
||||
*/
|
||||
|
||||
class AudioPlaybackWorklet extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super()
|
||||
/** @type {number} */ this.srcRate = 24000
|
||||
/** @type {number} */ this.dstRate = sampleRate
|
||||
/** @type {number} */ this.step = this.srcRate / this.dstRate
|
||||
/** @type {Chunk[]} */ this.queue = []
|
||||
/** @type {Chunk|null} */ this.cur = null
|
||||
/** @type {number} */ this.hold = 0
|
||||
/** @type {number} */ this.phase = 0
|
||||
/** @type {number|undefined} */ this._x0 = undefined
|
||||
/** @type {number|undefined} */ this._x1 = undefined
|
||||
/** @type {string|null} */ this._x0ItemId = null
|
||||
/** @type {string|null} */ this._x1ItemId = null
|
||||
/** @type {string|null} */ this._nextItemId = null
|
||||
/** @type {string|null} */ this.nowItemId = null
|
||||
/** @type {number} */ this.nowItemSamples = 0
|
||||
/** @type {number} */ this._notifyFrames = 0
|
||||
/** @type {boolean} */ this.muted = false
|
||||
this.port.onmessage = (e) => this._onMessage(/** @type {PlaybackMessage} */ (e.data))
|
||||
}
|
||||
|
||||
/** @param {PlaybackMessage} msg */
|
||||
_onMessage(msg) {
|
||||
if (!msg || !msg.type) return
|
||||
if (msg.type === "appendDelta" && msg.delta && msg.delta.pcmInt16 instanceof Int16Array) {
|
||||
this.queue.push({ item_id: msg.delta.item_id, data: msg.delta.pcmInt16, off: 0 })
|
||||
return
|
||||
}
|
||||
if (msg.type === "deleteItem") {
|
||||
const id = msg.item_id
|
||||
this.queue = this.queue.filter((ch) => ch.item_id !== id)
|
||||
if (this.cur && this.cur.item_id === id) {
|
||||
this.cur = null
|
||||
this.hold = 0
|
||||
}
|
||||
if (this.nowItemId === id) {
|
||||
this.nowItemId = null
|
||||
this.nowItemSamples = 0
|
||||
this._postNowPlaying()
|
||||
}
|
||||
return
|
||||
}
|
||||
if (msg.type === "clear") {
|
||||
this.queue.length = 0
|
||||
this.cur = null
|
||||
this.hold = 0
|
||||
this.phase = 0
|
||||
this._x0 = undefined
|
||||
this._x1 = undefined
|
||||
this._x0ItemId = null
|
||||
this._x1ItemId = null
|
||||
this._nextItemId = null
|
||||
this.nowItemId = null
|
||||
this.nowItemSamples = 0
|
||||
this._notifyFrames = 0
|
||||
this._postNowPlaying()
|
||||
return
|
||||
}
|
||||
if (msg.type === "setSourceRate" && Number.isFinite(msg.hz) && msg.hz > 0) {
|
||||
this.srcRate = msg.hz | 0
|
||||
this.step = this.srcRate / this.dstRate
|
||||
return
|
||||
}
|
||||
if (msg.type === "mute") {
|
||||
this.muted = true
|
||||
return
|
||||
}
|
||||
if (msg.type === "unmute") {
|
||||
this.muted = false
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
/** @returns {boolean} */
|
||||
_ensureCurrent() {
|
||||
if (this.cur == null) {
|
||||
if (this.queue.length === 0) return false
|
||||
this.cur = this.queue.shift() || null
|
||||
if (this.cur == null) return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
/** @returns {number} */
|
||||
_nextInt16() {
|
||||
for (;;) {
|
||||
if (!this._ensureCurrent()) {
|
||||
this._nextItemId = null
|
||||
this.hold = 0
|
||||
return 0
|
||||
}
|
||||
const d = this.cur.data
|
||||
const o = this.cur.off | 0
|
||||
if (o < d.length) {
|
||||
const s = d[o]
|
||||
this.cur.off = o + 1
|
||||
this.hold = s
|
||||
this._nextItemId = this.cur.item_id
|
||||
return s
|
||||
}
|
||||
this.cur = null
|
||||
}
|
||||
}
|
||||
|
||||
_postNowPlaying() {
|
||||
/** @type {NowPlayingMessage} */
|
||||
const m = {
|
||||
type: "nowPlaying",
|
||||
item_id: this.nowItemId,
|
||||
played_ms: Math.max(0, Math.floor((this.nowItemSamples * 1000) / this.srcRate)),
|
||||
}
|
||||
this.port.postMessage(m)
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Float32Array[][]} _inputs
|
||||
* @param {Float32Array[][]} outputs
|
||||
* @param {Record<string, Float32Array>} _parameters
|
||||
* @returns {boolean}
|
||||
*/
|
||||
process(_inputs, outputs, _parameters) {
|
||||
const out = outputs[0]
|
||||
if (!out || out.length === 0) return true
|
||||
const ch0 = out[0]
|
||||
const N = ch0.length
|
||||
|
||||
if (this._x1 === undefined) {
|
||||
this._x1 = this._nextInt16()
|
||||
this._x1ItemId = this._nextItemId
|
||||
this._x0 = this._x1
|
||||
this._x0ItemId = this._x1ItemId
|
||||
this.phase = 0
|
||||
this.nowItemId = this._x0ItemId
|
||||
this.nowItemSamples = 0
|
||||
this._postNowPlaying()
|
||||
}
|
||||
|
||||
const common = () => {
|
||||
this.phase += this.step
|
||||
while (this.phase >= 1) {
|
||||
this.phase -= 1
|
||||
this._x0 = this._x1
|
||||
this._x0ItemId = this._x1ItemId
|
||||
this._x1 = this._nextInt16()
|
||||
this._x1ItemId = this._nextItemId
|
||||
|
||||
if (this.nowItemId !== this._x0ItemId) {
|
||||
this.nowItemId = this._x0ItemId
|
||||
this.nowItemSamples = 0
|
||||
this._postNowPlaying()
|
||||
}
|
||||
if (this.nowItemId) this.nowItemSamples += 1
|
||||
}
|
||||
}
|
||||
|
||||
if (this.muted) {
|
||||
for (let i = 0; i < N; i++) {
|
||||
ch0[i] = 0
|
||||
for (let c = 1; c < out.length; c++) out[c][i] = 0
|
||||
common()
|
||||
}
|
||||
} else {
|
||||
for (let i = 0; i < N; i++) {
|
||||
const yI16 =
|
||||
/** @type {number} */ (this._x0) +
|
||||
this.phase * /** @type {number} */ (this._x1 - /** @type {number} */ (this._x0))
|
||||
const yF32 = Math.max(-1, Math.min(1, yI16 / 32768))
|
||||
ch0[i] = yF32
|
||||
for (let c = 1; c < out.length; c++) out[c][i] = yF32
|
||||
common()
|
||||
}
|
||||
}
|
||||
|
||||
this._notifyFrames += N
|
||||
if (this._notifyFrames >= this.dstRate / 20) {
|
||||
this._postNowPlaying()
|
||||
this._notifyFrames = 0
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor("audio-playback-worklet", AudioPlaybackWorklet)
|
||||
72
frontend/assets/audio-processor-worklet.js
Normal file
72
frontend/assets/audio-processor-worklet.js
Normal file
@@ -0,0 +1,72 @@
|
||||
class PCMAudioProcessor extends AudioWorkletProcessor {
|
||||
constructor() {
|
||||
super()
|
||||
this._inRate = sampleRate // z.B. 48000
|
||||
this._pos = 0 // Phase in Eingangssamples [0, step)
|
||||
this._carry = null // letztes Eingangssample des vorigen Blocks
|
||||
this._outRate = 24000
|
||||
}
|
||||
|
||||
process(inputs) {
|
||||
const chs = inputs[0]
|
||||
if (!chs || chs.length === 0) return true
|
||||
|
||||
const inF32 = chs[0] // mono Float32
|
||||
const step = this._inRate / this._outRate
|
||||
|
||||
// src ggf. mit carry präfixieren, damit i+1 existiert
|
||||
let src = inF32
|
||||
if (this._carry !== null) {
|
||||
const tmp = new Float32Array(1 + inF32.length)
|
||||
tmp[0] = this._carry
|
||||
tmp.set(inF32, 1)
|
||||
src = tmp
|
||||
}
|
||||
|
||||
// Anzahl ausgebbarer Samples (lineare Interp.: i+1 < src.length)
|
||||
const avail = src.length - 1 - this._pos
|
||||
const outLen = avail > 0 ? Math.ceil(avail / step) : 0
|
||||
|
||||
const outI16 = new Int16Array(outLen)
|
||||
|
||||
let pos = this._pos
|
||||
for (let k = 0; k < outLen; k++) {
|
||||
const i = Math.floor(pos)
|
||||
const frac = pos - i
|
||||
|
||||
const x0 = src[i]
|
||||
const x1 = src[i + 1] // existiert garantiert durch -1 in avail
|
||||
|
||||
// Linearinterp. + Clamping
|
||||
let y = x0 + frac * (x1 - x0)
|
||||
if (y > 1) y = 1
|
||||
else if (y < -1) y = -1
|
||||
|
||||
// Float32 -> int16 (runden, saturieren)
|
||||
const s = y <= -1 ? -0x8000 : Math.round(y * 0x7fff)
|
||||
outI16[k] = s
|
||||
|
||||
pos += step
|
||||
}
|
||||
|
||||
// Phase für nächsten Block relativ zu dessen src verschieben
|
||||
// (src[0] == letztes Sample des aktuellen Eingangs)
|
||||
this._pos = pos - (src.length - 1)
|
||||
if (this._pos < 0) this._pos = 0 // numerische Sicherheit
|
||||
|
||||
// letztes echtes Eingangssample als carry behalten
|
||||
this._carry = src[src.length - 1]
|
||||
|
||||
// zero-copy an die UI (Transfer des Buffers)
|
||||
try {
|
||||
this.port.postMessage(outI16, [outI16.buffer])
|
||||
} catch {
|
||||
// Fallback ohne Transferliste (z. B. in älteren Browsern)
|
||||
this.port.postMessage(outI16)
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
registerProcessor("audio-processor-worklet", PCMAudioProcessor)
|
||||
@@ -1,7 +1,10 @@
|
||||
<script lang="ts">
|
||||
import { onDestroy } from "svelte"
|
||||
import { mdiBookAccountOutline, mdiCreation, mdiFaceAgent, mdiHours24 } from "@mdi/js"
|
||||
import ProductCategoryFrame from "../widgets/ProductCategoryFrame.svelte"
|
||||
import CrinkledSection from "../CrinkledSection.svelte"
|
||||
import { base64ToUint8, createPlayer, createRecorder, SAMPLE_RATE, uint8ToBase64 } from "../voicebotDemo/helper"
|
||||
import { RealtimeServerEvent as RSE } from "../voicebotDemo/events"
|
||||
|
||||
const voiceProperties: Array<{ title: string; icon: string; color: string }> = [
|
||||
{
|
||||
@@ -25,6 +28,302 @@
|
||||
color: "#EB5757",
|
||||
},
|
||||
]
|
||||
|
||||
const browser = typeof window !== "undefined"
|
||||
const VOICE_WS_URL =
|
||||
browser && window.location.protocol === "http:"
|
||||
? "ws://2svoice-server.kontextwerk.info/api/v1/voicebot/ws"
|
||||
: "wss://2svoice-server.kontextwerk.info/api/v1/voicebot/ws"
|
||||
const CHUNK_DURATION_MS = 200
|
||||
const CHUNK_SIZE_BYTES = Math.round((SAMPLE_RATE * CHUNK_DURATION_MS) / 1000) * 2
|
||||
|
||||
type VoiceStatus = "idle" | "connecting" | "connected" | "error"
|
||||
|
||||
let status: VoiceStatus = "idle"
|
||||
let errorMessage = ""
|
||||
|
||||
let ws: WebSocket | null = null
|
||||
let recorder: ReturnType<typeof createRecorder> | null = null
|
||||
let player: ReturnType<typeof createPlayer> | null = null
|
||||
let outboundBuffer = new Uint8Array(0)
|
||||
let closing = false
|
||||
let cleanupPromise: Promise<void> | null = null
|
||||
let startPromise: Promise<void> | null = null
|
||||
|
||||
$: statusHint =
|
||||
status === "idle"
|
||||
? "Tippen, um die Voice-Demo zu starten"
|
||||
: status === "connecting"
|
||||
? "Verbindung wird aufgebaut …"
|
||||
: status === "connected"
|
||||
? "Live – sprechen Sie jetzt"
|
||||
: errorMessage || "Verbindung fehlgeschlagen"
|
||||
|
||||
const toggleVoiceDemo = async () => {
|
||||
if (status === "connecting") return
|
||||
if (status === "connected") {
|
||||
await stopVoiceDemo()
|
||||
return
|
||||
}
|
||||
await startVoiceDemo()
|
||||
}
|
||||
|
||||
const handleKeydown = (event: KeyboardEvent) => {
|
||||
if (event.key !== "Enter" && event.key !== " ") return
|
||||
event.preventDefault()
|
||||
void toggleVoiceDemo()
|
||||
}
|
||||
|
||||
const startVoiceDemo = async () => {
|
||||
if (!browser) {
|
||||
status = "error"
|
||||
errorMessage = "Die Sprach-Demo steht nur im Browser zur Verfügung."
|
||||
return
|
||||
}
|
||||
if (startPromise || status === "connecting" || status === "connected") return
|
||||
|
||||
startPromise = (async () => {
|
||||
await stopVoiceDemo({ resetStatus: false })
|
||||
status = "connecting"
|
||||
errorMessage = ""
|
||||
outboundBuffer = new Uint8Array(0)
|
||||
closing = false
|
||||
|
||||
try {
|
||||
const newPlayer = createPlayer()
|
||||
await newPlayer.init()
|
||||
player = newPlayer
|
||||
|
||||
const handleChunk = (pcm: Int16Array) => {
|
||||
if (pcm.length === 0) return
|
||||
const bytes = new Uint8Array(pcm.byteLength)
|
||||
bytes.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength))
|
||||
appendToOutboundBuffer(bytes)
|
||||
}
|
||||
|
||||
const newRecorder = createRecorder(handleChunk)
|
||||
await newRecorder.start()
|
||||
recorder = newRecorder
|
||||
} catch (err) {
|
||||
const message = extractErrorMessage(err, "Mikrofon konnte nicht gestartet werden.")
|
||||
handleConnectionError(message, err)
|
||||
return
|
||||
}
|
||||
|
||||
try {
|
||||
ws = new WebSocket(VOICE_WS_URL)
|
||||
} catch (err) {
|
||||
const message = extractErrorMessage(err, "WebSocket-Verbindung konnte nicht aufgebaut werden.")
|
||||
handleConnectionError(message, err)
|
||||
return
|
||||
}
|
||||
|
||||
if (!ws) return
|
||||
|
||||
ws.onopen = () => {
|
||||
status = "connected"
|
||||
flushOutboundBuffer(true)
|
||||
}
|
||||
|
||||
ws.onmessage = (event) => handleServerMessage(event)
|
||||
|
||||
ws.onerror = (event) => {
|
||||
handleConnectionError("WebSocket-Fehler – bitte später erneut versuchen.", event)
|
||||
}
|
||||
|
||||
ws.onclose = () => {
|
||||
if (!closing && status === "connected") {
|
||||
status = "idle"
|
||||
errorMessage = ""
|
||||
}
|
||||
}
|
||||
})()
|
||||
|
||||
try {
|
||||
await startPromise
|
||||
} finally {
|
||||
startPromise = null
|
||||
}
|
||||
}
|
||||
|
||||
const stopVoiceDemo = async ({ resetStatus = true }: { resetStatus?: boolean } = {}) => {
|
||||
if (cleanupPromise) {
|
||||
await cleanupPromise
|
||||
if (resetStatus && status !== "error") {
|
||||
status = "idle"
|
||||
errorMessage = ""
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
closing = true
|
||||
|
||||
cleanupPromise = (async () => {
|
||||
try {
|
||||
flushOutboundBuffer(true)
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
|
||||
if (recorder) {
|
||||
try {
|
||||
await recorder.stop()
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
recorder = null
|
||||
|
||||
if (player) {
|
||||
try {
|
||||
player.stop()
|
||||
await player.destroy()
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
player = null
|
||||
|
||||
if (ws && ws.readyState === WebSocket.OPEN) {
|
||||
try {
|
||||
ws.close(1000, "client-stop")
|
||||
} catch {
|
||||
/* ignore */
|
||||
}
|
||||
}
|
||||
ws = null
|
||||
outboundBuffer = new Uint8Array(0)
|
||||
})()
|
||||
|
||||
try {
|
||||
await cleanupPromise
|
||||
} finally {
|
||||
cleanupPromise = null
|
||||
closing = false
|
||||
if (resetStatus && status !== "error") {
|
||||
status = "idle"
|
||||
errorMessage = ""
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const handleServerMessage = (event: MessageEvent) => {
|
||||
if (!player) return
|
||||
let payload: unknown = event.data
|
||||
|
||||
if (typeof payload !== "string") return
|
||||
|
||||
try {
|
||||
payload = JSON.parse(payload)
|
||||
} catch (err) {
|
||||
console.warn("VoiceBot Preview: Konnte Nachricht nicht parsen.", err)
|
||||
return
|
||||
}
|
||||
|
||||
const message = payload as Record<string, unknown>
|
||||
const type = typeof message.type === "string" ? message.type : "<unbekannt>"
|
||||
if (type === RSE.INPUT_AUDIO_BUFFER_SPEECH_STARTED) {
|
||||
const { item_id, played_ms } = player.getNowPlaying()
|
||||
if (item_id) {
|
||||
player.stop()
|
||||
ws?.send(
|
||||
JSON.stringify({
|
||||
type: "last_item_played_ms.truncate",
|
||||
details: { item_id, played_ms: played_ms || 0 },
|
||||
})
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
if (type === RSE.RESPONSE_AUDIO_DELTA) {
|
||||
const bytes = base64ToUint8((message as any).delta)
|
||||
const pcm = new Int16Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 2)
|
||||
player.play({
|
||||
response_id: message.response_id,
|
||||
item_id: message.item_id,
|
||||
delta: message.delta,
|
||||
pcmInt16: pcm,
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
if (type === "error") {
|
||||
console.log("VoiceBot Preview: Server hat einen Fehler gemeldet.", message)
|
||||
}
|
||||
}
|
||||
|
||||
const appendToOutboundBuffer = (chunk: Uint8Array) => {
|
||||
if (!chunk.length) return
|
||||
const combined = new Uint8Array(outboundBuffer.length + chunk.length)
|
||||
combined.set(outboundBuffer)
|
||||
combined.set(chunk, outboundBuffer.length)
|
||||
outboundBuffer = combined
|
||||
flushOutboundBuffer()
|
||||
}
|
||||
|
||||
const flushOutboundBuffer = (force = false) => {
|
||||
if (!ws || ws.readyState !== WebSocket.OPEN || outboundBuffer.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const chunkSize = CHUNK_SIZE_BYTES > 0 ? CHUNK_SIZE_BYTES : outboundBuffer.length
|
||||
let buffer = outboundBuffer
|
||||
outboundBuffer = new Uint8Array(0)
|
||||
|
||||
while (buffer.length >= chunkSize && chunkSize > 0) {
|
||||
const part = buffer.slice(0, chunkSize)
|
||||
buffer = buffer.slice(chunkSize)
|
||||
sendChunk(part)
|
||||
}
|
||||
|
||||
if (force && buffer.length > 0) {
|
||||
sendChunk(buffer)
|
||||
} else if (buffer.length > 0) {
|
||||
outboundBuffer = buffer
|
||||
}
|
||||
}
|
||||
|
||||
const sendChunk = (chunk: Uint8Array) => {
|
||||
if (!ws || ws.readyState !== WebSocket.OPEN) return
|
||||
|
||||
try {
|
||||
ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: uint8ToBase64(chunk) }))
|
||||
} catch (err) {
|
||||
if (!closing) {
|
||||
handleConnectionError("Senden des Audiostreams fehlgeschlagen.", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const handleConnectionError = (message: string, err?: unknown) => {
|
||||
console.error("VoiceBot Preview Fehler:", err ?? message)
|
||||
errorMessage = message
|
||||
status = "error"
|
||||
void stopVoiceDemo({ resetStatus: false })
|
||||
}
|
||||
|
||||
const extractErrorMessage = (err: unknown, fallback: string) => {
|
||||
if (err instanceof DOMException) {
|
||||
if (err.name === "NotAllowedError") {
|
||||
return "Zugriff auf das Mikrofon wurde verweigert."
|
||||
}
|
||||
if (err.name === "NotFoundError") {
|
||||
return "Kein Mikrofon gefunden oder verfügbar."
|
||||
}
|
||||
if (err.name === "NotReadableError") {
|
||||
return "Auf das Mikrofon konnte nicht zugegriffen werden (ggf. bereits in Verwendung)."
|
||||
}
|
||||
if (err.name === "SecurityError") {
|
||||
return "Der Browser blockiert den Zugriff – bitte die Seite über HTTPS öffnen."
|
||||
}
|
||||
}
|
||||
if (err instanceof Error && err.message) return err.message
|
||||
return fallback
|
||||
}
|
||||
|
||||
onDestroy(() => {
|
||||
void stopVoiceDemo({ resetStatus: false })
|
||||
})
|
||||
</script>
|
||||
|
||||
<CrinkledSection
|
||||
@@ -40,12 +339,29 @@
|
||||
lowerDescription="Durch den Einsatz modernster KI-Technologien gewährleisten wir eine intelligente und effiziente Kommunikation, die den höchsten Datenschutzstandards entspricht."
|
||||
>
|
||||
{#snippet primaryContent()}
|
||||
<div class="img">
|
||||
<div
|
||||
class="img"
|
||||
class:connected={status === "connected"}
|
||||
class:errored={status === "error"}
|
||||
role="button"
|
||||
tabindex="0"
|
||||
aria-pressed={status === "connected"}
|
||||
aria-busy={status === "connecting"}
|
||||
aria-label="Voicebot Demo starten"
|
||||
on:click={() => void toggleVoiceDemo()}
|
||||
on:keydown={handleKeydown}
|
||||
>
|
||||
<img
|
||||
src="/media/iphone.png"
|
||||
alt="Kontextwerk is calling"
|
||||
/>
|
||||
<div class="shadow"></div>
|
||||
<div
|
||||
class="voice-overlay"
|
||||
data-status={status}
|
||||
aria-live="polite"
|
||||
>
|
||||
<span>{statusHint}</span>
|
||||
</div>
|
||||
</div>
|
||||
{/snippet}
|
||||
</ProductCategoryFrame>
|
||||
@@ -54,19 +370,78 @@
|
||||
|
||||
<style lang="less">
|
||||
.img {
|
||||
position: relative;
|
||||
width: 400px;
|
||||
min-width: 400px;
|
||||
max-height: 100%;
|
||||
height: 100%;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
align-items: center;
|
||||
cursor: pointer;
|
||||
border-radius: 1.6rem;
|
||||
transition:
|
||||
transform 0.2s ease,
|
||||
border-color 0.2s ease;
|
||||
outline: none;
|
||||
|
||||
img {
|
||||
width: 60% !important;
|
||||
height: 100%;
|
||||
object-fit: contain;
|
||||
pointer-events: none;
|
||||
user-select: none;
|
||||
}
|
||||
.shadow {
|
||||
display: none;
|
||||
|
||||
.voice-overlay {
|
||||
position: absolute;
|
||||
bottom: 1.2rem;
|
||||
left: 50%;
|
||||
transform: translateX(-50%);
|
||||
padding: 0.45rem 1.1rem;
|
||||
border-radius: 999px;
|
||||
background: rgba(13, 12, 12, 0.8);
|
||||
color: white;
|
||||
font-size: 0.8rem;
|
||||
font-weight: 500;
|
||||
letter-spacing: 0.01em;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.4rem;
|
||||
pointer-events: none;
|
||||
white-space: nowrap;
|
||||
transition:
|
||||
background 0.2s ease,
|
||||
color 0.2s ease;
|
||||
}
|
||||
|
||||
.voice-overlay[data-status="connected"] {
|
||||
background: rgba(76, 175, 80, 0.85);
|
||||
}
|
||||
|
||||
.voice-overlay[data-status="connecting"] {
|
||||
background: rgba(255, 152, 0, 0.85);
|
||||
}
|
||||
|
||||
.voice-overlay[data-status="error"] {
|
||||
background: rgba(235, 87, 87, 0.9);
|
||||
}
|
||||
|
||||
&:hover {
|
||||
transform: translateY(-4px);
|
||||
}
|
||||
|
||||
&.connected {
|
||||
border-color: rgba(76, 175, 80, 0.4);
|
||||
}
|
||||
|
||||
&.errored {
|
||||
border-color: rgba(235, 87, 87, 0.45);
|
||||
}
|
||||
|
||||
&:focus-visible {
|
||||
outline: 2px solid var(--primary-200);
|
||||
outline-offset: 4px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
|
||||
90
frontend/src/lib/components/voicebotDemo/events.ts
Normal file
90
frontend/src/lib/components/voicebotDemo/events.ts
Normal file
@@ -0,0 +1,90 @@
|
||||
/** -----------------------------
|
||||
* Client-Events (Client → Server)
|
||||
* ----------------------------- */
|
||||
export enum RealtimeClientEvent {
|
||||
// Session / Config
|
||||
SESSION_UPDATE = "session.update",
|
||||
|
||||
// Input-Audio-Buffer (Client → Server)
|
||||
INPUT_AUDIO_BUFFER_APPEND = "input_audio_buffer.append",
|
||||
INPUT_AUDIO_BUFFER_COMMIT = "input_audio_buffer.commit",
|
||||
INPUT_AUDIO_BUFFER_CLEAR = "input_audio_buffer.clear",
|
||||
|
||||
// WebRTC-spezifischer Output-Buffer (Client → Server)
|
||||
OUTPUT_AUDIO_BUFFER_CLEAR = "output_audio_buffer.clear",
|
||||
|
||||
// Conversation-Items (Client → Server)
|
||||
CONVERSATION_ITEM_CREATE = "conversation.item.create",
|
||||
CONVERSATION_ITEM_DELETE = "conversation.item.delete",
|
||||
CONVERSATION_ITEM_RETRIEVE = "conversation.item.retrieve",
|
||||
CONVERSATION_ITEM_TRUNCATE = "conversation.item.truncate",
|
||||
|
||||
// Inferenzsteuerung
|
||||
RESPONSE_CREATE = "response.create",
|
||||
RESPONSE_CANCEL = "response.cancel",
|
||||
}
|
||||
|
||||
/** ----------------------------
|
||||
* Server-Events (Server → Client)
|
||||
* ---------------------------- */
|
||||
export enum RealtimeServerEvent {
|
||||
// Session / Conversation
|
||||
SESSION_CREATED = "session.created",
|
||||
SESSION_UPDATED = "session.updated",
|
||||
CONVERSATION_CREATED = "conversation.created",
|
||||
|
||||
// Conversation-Items (Server → Client)
|
||||
CONVERSATION_ITEM_DONE = "conversation.item.done",
|
||||
CONVERSATION_ITEM_RETRIEVED = "conversation.item.retrieved",
|
||||
CONVERSATION_ITEM_DELETED = "conversation.item.deleted",
|
||||
CONVERSATION_ITEM_TRUNCATED = "conversation.item.truncated",
|
||||
CONVERSATION_ITEM_INPUT_AUDIO_TRANSCRIPTION_COMPLETED = "conversation.item.input_audio_transcription.completed",
|
||||
CONVERSATION_ITEM_INPUT_AUDIO_TRANSCRIPTION_FAILED = "conversation.item.input_audio_transcription.failed",
|
||||
|
||||
// Input-Audio-Buffer (Server → Client)
|
||||
INPUT_AUDIO_BUFFER_CLEARED = "input_audio_buffer.cleared",
|
||||
INPUT_AUDIO_BUFFER_COMMITTED = "input_audio_buffer.committed",
|
||||
INPUT_AUDIO_BUFFER_SPEECH_STARTED = "input_audio_buffer.speech_started",
|
||||
INPUT_AUDIO_BUFFER_SPEECH_STOPPED = "input_audio_buffer.speech_stopped",
|
||||
|
||||
// WebRTC-spezifischer Output-Buffer (Server → Client)
|
||||
OUTPUT_AUDIO_BUFFER_CLEARED = "output_audio_buffer.cleared",
|
||||
OUTPUT_AUDIO_BUFFER_STARTED = "output_audio_buffer.started",
|
||||
OUTPUT_AUDIO_BUFFER_STOPPED = "output_audio_buffer.stopped",
|
||||
|
||||
// Rate Limits
|
||||
RATE_LIMITS_UPDATED = "rate_limits.updated",
|
||||
|
||||
// Response-Lifecycle (Server → Client)
|
||||
RESPONSE_CREATED = "response.created",
|
||||
RESPONSE_OUTPUT_ITEM_ADDED = "response.output_item.added",
|
||||
RESPONSE_OUTPUT_ITEM_DONE = "response.output_item.done",
|
||||
RESPONSE_CONTENT_PART_ADDED = "response.content_part.added",
|
||||
RESPONSE_CONTENT_PART_DONE = "response.content_part.done",
|
||||
|
||||
// Streaming-Deltas
|
||||
RESPONSE_TEXT_DELTA = "response.output_text.delta",
|
||||
RESPONSE_TEXT_DONE = "response.output_text.done",
|
||||
RESPONSE_AUDIO_DELTA = "response.output_audio.delta",
|
||||
RESPONSE_AUDIO_DONE = "response.output_audio.done",
|
||||
RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.output_audio_transcript.delta",
|
||||
RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.output_audio_transcript.done",
|
||||
RESPONSE_FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta",
|
||||
RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done",
|
||||
|
||||
// Abschluss / Abbruch / Fehler
|
||||
RESPONSE_DONE = "response.done",
|
||||
RESPONSE_CANCELLED = "response.cancelled",
|
||||
ERROR = "error",
|
||||
|
||||
// Graph Synchronisation
|
||||
GRAPH_CURSOR = "graph_cursor",
|
||||
}
|
||||
|
||||
/** Optional: gemeinsame Typen */
|
||||
export type AnyRealtimeEvent = RealtimeClientEvent | RealtimeServerEvent
|
||||
export function extractSessionId(path: string): string | null {
|
||||
const pathRegex = /^\/develop\/chats\/([^\/]+)(?:\/|$)/
|
||||
const match = path.match(pathRegex)
|
||||
return match && match[1] !== "new" ? match[1] : null
|
||||
}
|
||||
148
frontend/src/lib/components/voicebotDemo/helper.ts
Normal file
148
frontend/src/lib/components/voicebotDemo/helper.ts
Normal file
@@ -0,0 +1,148 @@
|
||||
const SAMPLE_RATE = 24_000
|
||||
const BUFFER_SIZE = 4_800
|
||||
|
||||
const AUDIO_PLAYBACK_WORKLET_URL = "/assets/audio-playback-worklet.js"
|
||||
const AUDIO_PROCESSOR_WORKLET_URL = "/assets/audio-processor-worklet.js"
|
||||
|
||||
const uint8ToBase64 = (u8: Uint8Array): string => {
|
||||
let bin = ""
|
||||
for (let i = 0; i < u8.length; i++) bin += String.fromCharCode(u8[i])
|
||||
return btoa(bin)
|
||||
}
|
||||
|
||||
const base64ToUint8 = (b64: string): Uint8Array => {
|
||||
const bin = atob(b64)
|
||||
const out = new Uint8Array(bin.length)
|
||||
for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i)
|
||||
return out
|
||||
}
|
||||
|
||||
interface NowPlayingMessage {
|
||||
type: "nowPlaying"
|
||||
item_id: string
|
||||
played_ms: number
|
||||
}
|
||||
|
||||
interface NowPlayingState {
|
||||
item_id: string | null
|
||||
played_ms: number
|
||||
}
|
||||
|
||||
interface Player {
|
||||
init: (sampleRate?: number) => Promise<void>
|
||||
play: (delta) => void
|
||||
deleteItem: (item_id: string) => void
|
||||
stop: () => void
|
||||
setSourceRate: (hz: number) => void
|
||||
getNowPlaying: () => NowPlayingState
|
||||
destroy: () => Promise<void>
|
||||
mute: () => void
|
||||
unmute: () => void
|
||||
node?: AudioWorkletNode | null
|
||||
}
|
||||
|
||||
const createPlayer = (defaultSampleRate = 48000): Player => {
|
||||
let ctx: AudioContext | null = null
|
||||
let node: AudioWorkletNode | null = null
|
||||
let nowItemId: string | null = null
|
||||
let playedMs = 0
|
||||
|
||||
const isNowPlayingMessage = (m: unknown): m is NowPlayingMessage => {
|
||||
if (!m || typeof m !== "object") return false
|
||||
const x = m as Record<string, unknown>
|
||||
return x["type"] === "nowPlaying" && "played_ms" in x
|
||||
}
|
||||
|
||||
const init = async (sampleRate = defaultSampleRate): Promise<void> => {
|
||||
ctx = new AudioContext({ sampleRate })
|
||||
await ctx.audioWorklet.addModule(AUDIO_PLAYBACK_WORKLET_URL)
|
||||
node = new AudioWorkletNode(ctx, "audio-playback-worklet")
|
||||
node.port.onmessage = (e: MessageEvent) => {
|
||||
const m = e.data
|
||||
if (isNowPlayingMessage(m)) {
|
||||
nowItemId = m.item_id
|
||||
playedMs = m.played_ms | 0
|
||||
}
|
||||
}
|
||||
node.connect(ctx.destination)
|
||||
}
|
||||
|
||||
const play = (delta: ResponseAudioDelta): void => {
|
||||
if (!node) return
|
||||
const buf = delta.pcmInt16.buffer
|
||||
node.port.postMessage({ type: "appendDelta", delta }, [buf])
|
||||
}
|
||||
const deleteItem = (item_id: string): void => {
|
||||
node?.port.postMessage({ type: "deleteItem", item_id })
|
||||
}
|
||||
|
||||
const stop = (): void => {
|
||||
node?.port.postMessage({ type: "clear" })
|
||||
}
|
||||
|
||||
const setSourceRate = (hz: number): void => {
|
||||
node?.port.postMessage({ type: "setSourceRate", hz })
|
||||
}
|
||||
|
||||
const getNowPlaying = (): NowPlayingState => {
|
||||
return { item_id: nowItemId, played_ms: playedMs }
|
||||
}
|
||||
const mute = (): void => {
|
||||
node?.port.postMessage({ type: "mute" })
|
||||
}
|
||||
const unmute = (): void => {
|
||||
node?.port.postMessage({ type: "unmute" })
|
||||
}
|
||||
|
||||
const destroy = async (): Promise<void> => {
|
||||
if (!ctx) return
|
||||
try {
|
||||
await ctx.close()
|
||||
} finally {
|
||||
ctx = null
|
||||
node = null
|
||||
nowItemId = null
|
||||
playedMs = 0
|
||||
}
|
||||
}
|
||||
|
||||
return { init, play, deleteItem, stop, setSourceRate, getNowPlaying, destroy, mute, unmute }
|
||||
}
|
||||
|
||||
const createRecorder = (onChunk: (pcm: Int16Array) => void) => {
|
||||
let ctx: AudioContext | null = null
|
||||
let stream: MediaStream | null = null
|
||||
let source: MediaStreamAudioSourceNode | null = null
|
||||
let worklet: AudioWorkletNode | null = null
|
||||
|
||||
const start = async () => {
|
||||
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
|
||||
if (ctx) await ctx.close()
|
||||
ctx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: SAMPLE_RATE })
|
||||
await ctx.audioWorklet.addModule(AUDIO_PROCESSOR_WORKLET_URL)
|
||||
|
||||
source = ctx.createMediaStreamSource(stream)
|
||||
worklet = new AudioWorkletNode(ctx, "audio-processor-worklet")
|
||||
worklet.port.onmessage = (ev: MessageEvent<Int16Array>) => onChunk(ev.data)
|
||||
|
||||
source.connect(worklet)
|
||||
worklet.connect(ctx.destination)
|
||||
}
|
||||
const stop = async () => {
|
||||
if (stream) {
|
||||
stream.getTracks().forEach((t) => t.stop())
|
||||
stream = null
|
||||
}
|
||||
if (ctx) {
|
||||
try {
|
||||
await ctx.close()
|
||||
} finally {
|
||||
ctx = null
|
||||
}
|
||||
}
|
||||
source = null
|
||||
worklet = null
|
||||
}
|
||||
return { start, stop }
|
||||
}
|
||||
export { uint8ToBase64, base64ToUint8, createPlayer, createRecorder,SAMPLE_RATE }
|
||||
Reference in New Issue
Block a user