voicebot demo

2025-10-05 06:34:07 +00:00
parent c2be654e2f
commit 33cab69410
6 changed files with 930 additions and 4 deletions
--- a/frontend/assets/audio-playback-worklet.js
+++ b/frontend/assets/audio-playback-worklet.js
@@ -0,0 +1,235 @@
 /**
 * @typedef {Object} AppendDeltaMessage
 * @property {"appendDelta"} type
 * @property {ResponseAudioDelta} delta
 */
 /**
 * @typedef {Object} DeleteItemMessage
 * @property {"deleteItem"} type
 * @property {string} item_id
 */
 /**
 * @typedef {Object} ClearMessage
 * @property {"clear"} type
 */
 /**
 * @typedef {Object} SetSourceRateMessage
 * @property {"setSourceRate"} type
 * @property {number} hz
 */
 /**
 * @typedef {Object} NowPlayingMessage
 * @property {"nowPlaying"} type
 * @property {string|null} item_id
 * @property {number} played_ms
 */
 /**
 * @typedef {Object} MuteMessage
 * @property {"mute"} type
 */
 /**
 * @typedef {Object} UnmuteMessage
 * @property {"unmute"} type
 */
 /**
 * @typedef {AppendDeltaMessage | DeleteItemMessage | ClearMessage | SetSourceRateMessage | MuteMessage | UnmuteMessage} PlaybackMessage
 */
 /**
 * @typedef {Object} Chunk
 * @property {string} item_id
 * @property {Int16Array} data
 * @property {number} off
 */
 class AudioPlaybackWorklet extends AudioWorkletProcessor {
    constructor() {
        super()
        /** @type {number} */ this.srcRate = 24000
        /** @type {number} */ this.dstRate = sampleRate
        /** @type {number} */ this.step = this.srcRate / this.dstRate
        /** @type {Chunk[]} */ this.queue = []
        /** @type {Chunk|null} */ this.cur = null
        /** @type {number} */ this.hold = 0
        /** @type {number} */ this.phase = 0
        /** @type {number|undefined} */ this._x0 = undefined
        /** @type {number|undefined} */ this._x1 = undefined
        /** @type {string|null} */ this._x0ItemId = null
        /** @type {string|null} */ this._x1ItemId = null
        /** @type {string|null} */ this._nextItemId = null
        /** @type {string|null} */ this.nowItemId = null
        /** @type {number} */ this.nowItemSamples = 0
        /** @type {number} */ this._notifyFrames = 0
        /** @type {boolean} */ this.muted = false
        this.port.onmessage = (e) => this._onMessage(/** @type {PlaybackMessage} */ (e.data))
    }
    /** @param {PlaybackMessage} msg */
    _onMessage(msg) {
        if (!msg || !msg.type) return
        if (msg.type === "appendDelta" && msg.delta && msg.delta.pcmInt16 instanceof Int16Array) {
            this.queue.push({ item_id: msg.delta.item_id, data: msg.delta.pcmInt16, off: 0 })
            return
        }
        if (msg.type === "deleteItem") {
            const id = msg.item_id
            this.queue = this.queue.filter((ch) => ch.item_id !== id)
            if (this.cur && this.cur.item_id === id) {
                this.cur = null
                this.hold = 0
            }
            if (this.nowItemId === id) {
                this.nowItemId = null
                this.nowItemSamples = 0
                this._postNowPlaying()
            }
            return
        }
        if (msg.type === "clear") {
            this.queue.length = 0
            this.cur = null
            this.hold = 0
            this.phase = 0
            this._x0 = undefined
            this._x1 = undefined
            this._x0ItemId = null
            this._x1ItemId = null
            this._nextItemId = null
            this.nowItemId = null
            this.nowItemSamples = 0
            this._notifyFrames = 0
            this._postNowPlaying()
            return
        }
        if (msg.type === "setSourceRate" && Number.isFinite(msg.hz) && msg.hz > 0) {
            this.srcRate = msg.hz | 0
            this.step = this.srcRate / this.dstRate
            return
        }
        if (msg.type === "mute") {
            this.muted = true
            return
        }
        if (msg.type === "unmute") {
            this.muted = false
            return
        }
    }
    /** @returns {boolean} */
    _ensureCurrent() {
        if (this.cur == null) {
            if (this.queue.length === 0) return false
            this.cur = this.queue.shift() || null
            if (this.cur == null) return false
        }
        return true
    }
    /** @returns {number} */
    _nextInt16() {
        for (;;) {
            if (!this._ensureCurrent()) {
                this._nextItemId = null
                this.hold = 0
                return 0
            }
            const d = this.cur.data
            const o = this.cur.off | 0
            if (o < d.length) {
                const s = d[o]
                this.cur.off = o + 1
                this.hold = s
                this._nextItemId = this.cur.item_id
                return s
            }
            this.cur = null
        }
    }
    _postNowPlaying() {
        /** @type {NowPlayingMessage} */
        const m = {
            type: "nowPlaying",
            item_id: this.nowItemId,
            played_ms: Math.max(0, Math.floor((this.nowItemSamples * 1000) / this.srcRate)),
        }
        this.port.postMessage(m)
    }
    /**
     * @param {Float32Array[][]} _inputs
     * @param {Float32Array[][]} outputs
     * @param {Record<string, Float32Array>} _parameters
     * @returns {boolean}
     */
    process(_inputs, outputs, _parameters) {
        const out = outputs[0]
        if (!out || out.length === 0) return true
        const ch0 = out[0]
        const N = ch0.length
        if (this._x1 === undefined) {
            this._x1 = this._nextInt16()
            this._x1ItemId = this._nextItemId
            this._x0 = this._x1
            this._x0ItemId = this._x1ItemId
            this.phase = 0
            this.nowItemId = this._x0ItemId
            this.nowItemSamples = 0
            this._postNowPlaying()
        }
        const common = () => {
            this.phase += this.step
            while (this.phase >= 1) {
                this.phase -= 1
                this._x0 = this._x1
                this._x0ItemId = this._x1ItemId
                this._x1 = this._nextInt16()
                this._x1ItemId = this._nextItemId
                if (this.nowItemId !== this._x0ItemId) {
                    this.nowItemId = this._x0ItemId
                    this.nowItemSamples = 0
                    this._postNowPlaying()
                }
                if (this.nowItemId) this.nowItemSamples += 1
            }
        }
        if (this.muted) {
            for (let i = 0; i < N; i++) {
                ch0[i] = 0
                for (let c = 1; c < out.length; c++) out[c][i] = 0
                common()
            }
        } else {
            for (let i = 0; i < N; i++) {
                const yI16 =
                    /** @type {number} */ (this._x0) +
                    this.phase * /** @type {number} */ (this._x1 - /** @type {number} */ (this._x0))
                const yF32 = Math.max(-1, Math.min(1, yI16 / 32768))
                ch0[i] = yF32
                for (let c = 1; c < out.length; c++) out[c][i] = yF32
                common()
            }
        }
        this._notifyFrames += N
        if (this._notifyFrames >= this.dstRate / 20) {
            this._postNowPlaying()
            this._notifyFrames = 0
        }
        return true
    }
 }
 registerProcessor("audio-playback-worklet", AudioPlaybackWorklet)
--- a/frontend/assets/audio-processor-worklet.js
+++ b/frontend/assets/audio-processor-worklet.js
@@ -0,0 +1,72 @@
 class PCMAudioProcessor extends AudioWorkletProcessor {
    constructor() {
        super()
        this._inRate = sampleRate // z.B. 48000
        this._pos = 0 // Phase in Eingangssamples [0, step)
        this._carry = null // letztes Eingangssample des vorigen Blocks
        this._outRate = 24000
    }
    process(inputs) {
        const chs = inputs[0]
        if (!chs || chs.length === 0) return true
        const inF32 = chs[0] // mono Float32
        const step = this._inRate / this._outRate
        // src ggf. mit carry präfixieren, damit i+1 existiert
        let src = inF32
        if (this._carry !== null) {
            const tmp = new Float32Array(1 + inF32.length)
            tmp[0] = this._carry
            tmp.set(inF32, 1)
            src = tmp
        }
        // Anzahl ausgebbarer Samples (lineare Interp.: i+1 < src.length)
        const avail = src.length - 1 - this._pos
        const outLen = avail > 0 ? Math.ceil(avail / step) : 0
        const outI16 = new Int16Array(outLen)
        let pos = this._pos
        for (let k = 0; k < outLen; k++) {
            const i = Math.floor(pos)
            const frac = pos - i
            const x0 = src[i]
            const x1 = src[i + 1] // existiert garantiert durch -1 in avail
            // Linearinterp. + Clamping
            let y = x0 + frac * (x1 - x0)
            if (y > 1) y = 1
            else if (y < -1) y = -1
            // Float32 -> int16 (runden, saturieren)
            const s = y <= -1 ? -0x8000 : Math.round(y * 0x7fff)
            outI16[k] = s
            pos += step
        }
        // Phase für nächsten Block relativ zu dessen src verschieben
        // (src[0] == letztes Sample des aktuellen Eingangs)
        this._pos = pos - (src.length - 1)
        if (this._pos < 0) this._pos = 0 // numerische Sicherheit
        // letztes echtes Eingangssample als carry behalten
        this._carry = src[src.length - 1]
        // zero-copy an die UI (Transfer des Buffers)
        try {
            this.port.postMessage(outI16, [outI16.buffer])
        } catch {
            // Fallback ohne Transferliste (z. B. in älteren Browsern)
            this.port.postMessage(outI16)
        }
        return true
    }
 }
 registerProcessor("audio-processor-worklet", PCMAudioProcessor)
--- a/frontend/src/lib/components/staticPageRows/VoicebotPreview.svelte
+++ b/frontend/src/lib/components/staticPageRows/VoicebotPreview.svelte
@@ -1,7 +1,10 @@
 <script lang="ts">
    import { onDestroy } from "svelte"
    import { mdiBookAccountOutline, mdiCreation, mdiFaceAgent, mdiHours24 } from "@mdi/js"
    import ProductCategoryFrame from "../widgets/ProductCategoryFrame.svelte"
    import CrinkledSection from "../CrinkledSection.svelte"
    import { base64ToUint8, createPlayer, createRecorder, SAMPLE_RATE, uint8ToBase64 } from "../voicebotDemo/helper"
    import { RealtimeServerEvent as RSE } from "../voicebotDemo/events"
    const voiceProperties: Array<{ title: string; icon: string; color: string }> = [
        {
@@ -25,6 +28,302 @@
            color: "#EB5757",
        },
    ]
    const browser = typeof window !== "undefined"
    const VOICE_WS_URL =
        browser && window.location.protocol === "http:"
            ? "ws://2svoice-server.kontextwerk.info/api/v1/voicebot/ws"
            : "wss://2svoice-server.kontextwerk.info/api/v1/voicebot/ws"
    const CHUNK_DURATION_MS = 200
    const CHUNK_SIZE_BYTES = Math.round((SAMPLE_RATE * CHUNK_DURATION_MS) / 1000) * 2
    type VoiceStatus = "idle" | "connecting" | "connected" | "error"
    let status: VoiceStatus = "idle"
    let errorMessage = ""
    let ws: WebSocket | null = null
    let recorder: ReturnType<typeof createRecorder> | null = null
    let player: ReturnType<typeof createPlayer> | null = null
    let outboundBuffer = new Uint8Array(0)
    let closing = false
    let cleanupPromise: Promise<void> | null = null
    let startPromise: Promise<void> | null = null
    $: statusHint =
        status === "idle"
            ? "Tippen, um die Voice-Demo zu starten"
            : status === "connecting"
              ? "Verbindung wird aufgebaut …"
              : status === "connected"
                ? "Live – sprechen Sie jetzt"
                : errorMessage || "Verbindung fehlgeschlagen"
    const toggleVoiceDemo = async () => {
        if (status === "connecting") return
        if (status === "connected") {
            await stopVoiceDemo()
            return
        }
        await startVoiceDemo()
    }
    const handleKeydown = (event: KeyboardEvent) => {
        if (event.key !== "Enter" && event.key !== " ") return
        event.preventDefault()
        void toggleVoiceDemo()
    }
    const startVoiceDemo = async () => {
        if (!browser) {
            status = "error"
            errorMessage = "Die Sprach-Demo steht nur im Browser zur Verfügung."
            return
        }
        if (startPromise || status === "connecting" || status === "connected") return
        startPromise = (async () => {
            await stopVoiceDemo({ resetStatus: false })
            status = "connecting"
            errorMessage = ""
            outboundBuffer = new Uint8Array(0)
            closing = false
            try {
                const newPlayer = createPlayer()
                await newPlayer.init()
                player = newPlayer
                const handleChunk = (pcm: Int16Array) => {
                    if (pcm.length === 0) return
                    const bytes = new Uint8Array(pcm.byteLength)
                    bytes.set(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength))
                    appendToOutboundBuffer(bytes)
                }
                const newRecorder = createRecorder(handleChunk)
                await newRecorder.start()
                recorder = newRecorder
            } catch (err) {
                const message = extractErrorMessage(err, "Mikrofon konnte nicht gestartet werden.")
                handleConnectionError(message, err)
                return
            }
            try {
                ws = new WebSocket(VOICE_WS_URL)
            } catch (err) {
                const message = extractErrorMessage(err, "WebSocket-Verbindung konnte nicht aufgebaut werden.")
                handleConnectionError(message, err)
                return
            }
            if (!ws) return
            ws.onopen = () => {
                status = "connected"
                flushOutboundBuffer(true)
            }
            ws.onmessage = (event) => handleServerMessage(event)
            ws.onerror = (event) => {
                handleConnectionError("WebSocket-Fehler – bitte später erneut versuchen.", event)
            }
            ws.onclose = () => {
                if (!closing && status === "connected") {
                    status = "idle"
                    errorMessage = ""
                }
            }
        })()
        try {
            await startPromise
        } finally {
            startPromise = null
        }
    }
    const stopVoiceDemo = async ({ resetStatus = true }: { resetStatus?: boolean } = {}) => {
        if (cleanupPromise) {
            await cleanupPromise
            if (resetStatus && status !== "error") {
                status = "idle"
                errorMessage = ""
            }
            return
        }
        closing = true
        cleanupPromise = (async () => {
            try {
                flushOutboundBuffer(true)
            } catch {
                /* ignore */
            }
            if (recorder) {
                try {
                    await recorder.stop()
                } catch {
                    /* ignore */
                }
            }
            recorder = null
            if (player) {
                try {
                    player.stop()
                    await player.destroy()
                } catch {
                    /* ignore */
                }
            }
            player = null
            if (ws && ws.readyState === WebSocket.OPEN) {
                try {
                    ws.close(1000, "client-stop")
                } catch {
                    /* ignore */
                }
            }
            ws = null
            outboundBuffer = new Uint8Array(0)
        })()
        try {
            await cleanupPromise
        } finally {
            cleanupPromise = null
            closing = false
            if (resetStatus && status !== "error") {
                status = "idle"
                errorMessage = ""
            }
        }
    }
    const handleServerMessage = (event: MessageEvent) => {
        if (!player) return
        let payload: unknown = event.data
        if (typeof payload !== "string") return
        try {
            payload = JSON.parse(payload)
        } catch (err) {
            console.warn("VoiceBot Preview: Konnte Nachricht nicht parsen.", err)
            return
        }
        const message = payload as Record<string, unknown>
        const type = typeof message.type === "string" ? message.type : "<unbekannt>"
        if (type === RSE.INPUT_AUDIO_BUFFER_SPEECH_STARTED) {
            const { item_id, played_ms } = player.getNowPlaying()
            if (item_id) {
                player.stop()
                ws?.send(
                    JSON.stringify({
                        type: "last_item_played_ms.truncate",
                        details: { item_id, played_ms: played_ms || 0 },
                    })
                )
            }
            return
        }
        if (type === RSE.RESPONSE_AUDIO_DELTA) {
            const bytes = base64ToUint8((message as any).delta)
            const pcm = new Int16Array(bytes.buffer, bytes.byteOffset, bytes.byteLength / 2)
            player.play({
                response_id: message.response_id,
                item_id: message.item_id,
                delta: message.delta,
                pcmInt16: pcm,
            })
            return
        }
        if (type === "error") {
            console.log("VoiceBot Preview: Server hat einen Fehler gemeldet.", message)
        }
    }
    const appendToOutboundBuffer = (chunk: Uint8Array) => {
        if (!chunk.length) return
        const combined = new Uint8Array(outboundBuffer.length + chunk.length)
        combined.set(outboundBuffer)
        combined.set(chunk, outboundBuffer.length)
        outboundBuffer = combined
        flushOutboundBuffer()
    }
    const flushOutboundBuffer = (force = false) => {
        if (!ws || ws.readyState !== WebSocket.OPEN || outboundBuffer.length === 0) {
            return
        }
        const chunkSize = CHUNK_SIZE_BYTES > 0 ? CHUNK_SIZE_BYTES : outboundBuffer.length
        let buffer = outboundBuffer
        outboundBuffer = new Uint8Array(0)
        while (buffer.length >= chunkSize && chunkSize > 0) {
            const part = buffer.slice(0, chunkSize)
            buffer = buffer.slice(chunkSize)
            sendChunk(part)
        }
        if (force && buffer.length > 0) {
            sendChunk(buffer)
        } else if (buffer.length > 0) {
            outboundBuffer = buffer
        }
    }
    const sendChunk = (chunk: Uint8Array) => {
        if (!ws || ws.readyState !== WebSocket.OPEN) return
        try {
            ws.send(JSON.stringify({ type: "input_audio_buffer.append", audio: uint8ToBase64(chunk) }))
        } catch (err) {
            if (!closing) {
                handleConnectionError("Senden des Audiostreams fehlgeschlagen.", err)
            }
        }
    }
    const handleConnectionError = (message: string, err?: unknown) => {
        console.error("VoiceBot Preview Fehler:", err ?? message)
        errorMessage = message
        status = "error"
        void stopVoiceDemo({ resetStatus: false })
    }
    const extractErrorMessage = (err: unknown, fallback: string) => {
        if (err instanceof DOMException) {
            if (err.name === "NotAllowedError") {
                return "Zugriff auf das Mikrofon wurde verweigert."
            }
            if (err.name === "NotFoundError") {
                return "Kein Mikrofon gefunden oder verfügbar."
            }
            if (err.name === "NotReadableError") {
                return "Auf das Mikrofon konnte nicht zugegriffen werden (ggf. bereits in Verwendung)."
            }
            if (err.name === "SecurityError") {
                return "Der Browser blockiert den Zugriff – bitte die Seite über HTTPS öffnen."
            }
        }
        if (err instanceof Error && err.message) return err.message
        return fallback
    }
    onDestroy(() => {
        void stopVoiceDemo({ resetStatus: false })
    })
 </script>
 <CrinkledSection
@@ -40,12 +339,29 @@
            lowerDescription="Durch den Einsatz modernster KI-Technologien gewährleisten wir eine intelligente und effiziente Kommunikation, die den höchsten Datenschutzstandards entspricht."
        >
            {#snippet primaryContent()}
-                <div class="img">
+                <div
                    class="img"
                    class:connected={status === "connected"}
                    class:errored={status === "error"}
                    role="button"
                    tabindex="0"
                    aria-pressed={status === "connected"}
                    aria-busy={status === "connecting"}
                    aria-label="Voicebot Demo starten"
                    on:click={() => void toggleVoiceDemo()}
                    on:keydown={handleKeydown}
                >
                    <img
                        src="/media/iphone.png"
                        alt="Kontextwerk is calling"
                    />
-                    <div class="shadow"></div>
+                    <div
                        class="voice-overlay"
                        data-status={status}
                        aria-live="polite"
                    >
                        <span>{statusHint}</span>
                    </div>
                </div>
            {/snippet}
        </ProductCategoryFrame>
@@ -54,19 +370,78 @@
 <style lang="less">
    .img {
        position: relative;
        width: 400px;
        min-width: 400px;
        max-height: 100%;
        height: 100%;
        display: flex;
        justify-content: center;
        align-items: center;
        cursor: pointer;
        border-radius: 1.6rem;
        transition:
            transform 0.2s ease,
            border-color 0.2s ease;
        outline: none;
        img {
            width: 60% !important;
            height: 100%;
            object-fit: contain;
            pointer-events: none;
            user-select: none;
        }
-        .shadow {
+
-            display: none;
+        .voice-overlay {
            position: absolute;
            bottom: 1.2rem;
            left: 50%;
            transform: translateX(-50%);
            padding: 0.45rem 1.1rem;
            border-radius: 999px;
            background: rgba(13, 12, 12, 0.8);
            color: white;
            font-size: 0.8rem;
            font-weight: 500;
            letter-spacing: 0.01em;
            display: flex;
            align-items: center;
            gap: 0.4rem;
            pointer-events: none;
            white-space: nowrap;
            transition:
                background 0.2s ease,
                color 0.2s ease;
        }
        .voice-overlay[data-status="connected"] {
            background: rgba(76, 175, 80, 0.85);
        }
        .voice-overlay[data-status="connecting"] {
            background: rgba(255, 152, 0, 0.85);
        }
        .voice-overlay[data-status="error"] {
            background: rgba(235, 87, 87, 0.9);
        }
        &:hover {
            transform: translateY(-4px);
        }
        &.connected {
            border-color: rgba(76, 175, 80, 0.4);
        }
        &.errored {
            border-color: rgba(235, 87, 87, 0.45);
        }
        &:focus-visible {
            outline: 2px solid var(--primary-200);
            outline-offset: 4px;
        }
    }
 </style>
--- a/frontend/src/lib/components/voicebotDemo/events.ts
+++ b/frontend/src/lib/components/voicebotDemo/events.ts
@@ -0,0 +1,90 @@
 /** -----------------------------
 *  Client-Events (Client → Server)
 *  ----------------------------- */
 export enum RealtimeClientEvent {
    // Session / Config
    SESSION_UPDATE = "session.update",
    // Input-Audio-Buffer (Client → Server)
    INPUT_AUDIO_BUFFER_APPEND = "input_audio_buffer.append",
    INPUT_AUDIO_BUFFER_COMMIT = "input_audio_buffer.commit",
    INPUT_AUDIO_BUFFER_CLEAR = "input_audio_buffer.clear",
    // WebRTC-spezifischer Output-Buffer (Client → Server)
    OUTPUT_AUDIO_BUFFER_CLEAR = "output_audio_buffer.clear",
    // Conversation-Items (Client → Server)
    CONVERSATION_ITEM_CREATE = "conversation.item.create",
    CONVERSATION_ITEM_DELETE = "conversation.item.delete",
    CONVERSATION_ITEM_RETRIEVE = "conversation.item.retrieve",
    CONVERSATION_ITEM_TRUNCATE = "conversation.item.truncate",
    // Inferenzsteuerung
    RESPONSE_CREATE = "response.create",
    RESPONSE_CANCEL = "response.cancel",
 }
 /** ----------------------------
 *  Server-Events (Server → Client)
 *  ---------------------------- */
 export enum RealtimeServerEvent {
    // Session / Conversation
    SESSION_CREATED = "session.created",
    SESSION_UPDATED = "session.updated",
    CONVERSATION_CREATED = "conversation.created",
    // Conversation-Items (Server → Client)
    CONVERSATION_ITEM_DONE = "conversation.item.done",
    CONVERSATION_ITEM_RETRIEVED = "conversation.item.retrieved",
    CONVERSATION_ITEM_DELETED = "conversation.item.deleted",
    CONVERSATION_ITEM_TRUNCATED = "conversation.item.truncated",
    CONVERSATION_ITEM_INPUT_AUDIO_TRANSCRIPTION_COMPLETED = "conversation.item.input_audio_transcription.completed",
    CONVERSATION_ITEM_INPUT_AUDIO_TRANSCRIPTION_FAILED = "conversation.item.input_audio_transcription.failed",
    // Input-Audio-Buffer (Server → Client)
    INPUT_AUDIO_BUFFER_CLEARED = "input_audio_buffer.cleared",
    INPUT_AUDIO_BUFFER_COMMITTED = "input_audio_buffer.committed",
    INPUT_AUDIO_BUFFER_SPEECH_STARTED = "input_audio_buffer.speech_started",
    INPUT_AUDIO_BUFFER_SPEECH_STOPPED = "input_audio_buffer.speech_stopped",
    // WebRTC-spezifischer Output-Buffer (Server → Client)
    OUTPUT_AUDIO_BUFFER_CLEARED = "output_audio_buffer.cleared",
    OUTPUT_AUDIO_BUFFER_STARTED = "output_audio_buffer.started",
    OUTPUT_AUDIO_BUFFER_STOPPED = "output_audio_buffer.stopped",
    // Rate Limits
    RATE_LIMITS_UPDATED = "rate_limits.updated",
    // Response-Lifecycle (Server → Client)
    RESPONSE_CREATED = "response.created",
    RESPONSE_OUTPUT_ITEM_ADDED = "response.output_item.added",
    RESPONSE_OUTPUT_ITEM_DONE = "response.output_item.done",
    RESPONSE_CONTENT_PART_ADDED = "response.content_part.added",
    RESPONSE_CONTENT_PART_DONE = "response.content_part.done",
    // Streaming-Deltas
    RESPONSE_TEXT_DELTA = "response.output_text.delta",
    RESPONSE_TEXT_DONE = "response.output_text.done",
    RESPONSE_AUDIO_DELTA = "response.output_audio.delta",
    RESPONSE_AUDIO_DONE = "response.output_audio.done",
    RESPONSE_AUDIO_TRANSCRIPT_DELTA = "response.output_audio_transcript.delta",
    RESPONSE_AUDIO_TRANSCRIPT_DONE = "response.output_audio_transcript.done",
    RESPONSE_FUNCTION_CALL_ARGUMENTS_DELTA = "response.function_call_arguments.delta",
    RESPONSE_FUNCTION_CALL_ARGUMENTS_DONE = "response.function_call_arguments.done",
    // Abschluss / Abbruch / Fehler
    RESPONSE_DONE = "response.done",
    RESPONSE_CANCELLED = "response.cancelled",
    ERROR = "error",
    // Graph Synchronisation
    GRAPH_CURSOR = "graph_cursor",
 }
 /** Optional: gemeinsame Typen */
 export type AnyRealtimeEvent = RealtimeClientEvent | RealtimeServerEvent
 export function extractSessionId(path: string): string | null {
    const pathRegex = /^\/develop\/chats\/([^\/]+)(?:\/|$)/
    const match = path.match(pathRegex)
    return match && match[1] !== "new" ? match[1] : null
 }
--- a/frontend/src/lib/components/voicebotDemo/helper.ts
+++ b/frontend/src/lib/components/voicebotDemo/helper.ts
@@ -0,0 +1,148 @@
 const SAMPLE_RATE = 24_000
 const BUFFER_SIZE = 4_800
 const AUDIO_PLAYBACK_WORKLET_URL = "/assets/audio-playback-worklet.js"
 const AUDIO_PROCESSOR_WORKLET_URL = "/assets/audio-processor-worklet.js"
 const uint8ToBase64 = (u8: Uint8Array): string => {
    let bin = ""
    for (let i = 0; i < u8.length; i++) bin += String.fromCharCode(u8[i])
    return btoa(bin)
 }
 const base64ToUint8 = (b64: string): Uint8Array => {
    const bin = atob(b64)
    const out = new Uint8Array(bin.length)
    for (let i = 0; i < bin.length; i++) out[i] = bin.charCodeAt(i)
    return out
 }
 interface NowPlayingMessage {
    type: "nowPlaying"
    item_id: string
    played_ms: number
 }
 interface NowPlayingState {
    item_id: string | null
    played_ms: number
 }
 interface Player {
    init: (sampleRate?: number) => Promise<void>
    play: (delta) => void
    deleteItem: (item_id: string) => void
    stop: () => void
    setSourceRate: (hz: number) => void
    getNowPlaying: () => NowPlayingState
    destroy: () => Promise<void>
    mute: () => void
    unmute: () => void
    node?: AudioWorkletNode | null
 }
 const createPlayer = (defaultSampleRate = 48000): Player => {
    let ctx: AudioContext | null = null
    let node: AudioWorkletNode | null = null
    let nowItemId: string | null = null
    let playedMs = 0
    const isNowPlayingMessage = (m: unknown): m is NowPlayingMessage => {
        if (!m || typeof m !== "object") return false
        const x = m as Record<string, unknown>
        return x["type"] === "nowPlaying" && "played_ms" in x
    }
    const init = async (sampleRate = defaultSampleRate): Promise<void> => {
        ctx = new AudioContext({ sampleRate })
        await ctx.audioWorklet.addModule(AUDIO_PLAYBACK_WORKLET_URL)
        node = new AudioWorkletNode(ctx, "audio-playback-worklet")
        node.port.onmessage = (e: MessageEvent) => {
            const m = e.data
            if (isNowPlayingMessage(m)) {
                nowItemId = m.item_id
                playedMs = m.played_ms | 0
            }
        }
        node.connect(ctx.destination)
    }
    const play = (delta: ResponseAudioDelta): void => {
        if (!node) return
        const buf = delta.pcmInt16.buffer
        node.port.postMessage({ type: "appendDelta", delta }, [buf])
    }
    const deleteItem = (item_id: string): void => {
        node?.port.postMessage({ type: "deleteItem", item_id })
    }
    const stop = (): void => {
        node?.port.postMessage({ type: "clear" })
    }
    const setSourceRate = (hz: number): void => {
        node?.port.postMessage({ type: "setSourceRate", hz })
    }
    const getNowPlaying = (): NowPlayingState => {
        return { item_id: nowItemId, played_ms: playedMs }
    }
    const mute = (): void => {
        node?.port.postMessage({ type: "mute" })
    }
    const unmute = (): void => {
        node?.port.postMessage({ type: "unmute" })
    }
    const destroy = async (): Promise<void> => {
        if (!ctx) return
        try {
            await ctx.close()
        } finally {
            ctx = null
            node = null
            nowItemId = null
            playedMs = 0
        }
    }
    return { init, play, deleteItem, stop, setSourceRate, getNowPlaying, destroy, mute, unmute }
 }
 const createRecorder = (onChunk: (pcm: Int16Array) => void) => {
    let ctx: AudioContext | null = null
    let stream: MediaStream | null = null
    let source: MediaStreamAudioSourceNode | null = null
    let worklet: AudioWorkletNode | null = null
    const start = async () => {
        stream = await navigator.mediaDevices.getUserMedia({ audio: true })
        if (ctx) await ctx.close()
        ctx = new (window.AudioContext || (window as any).webkitAudioContext)({ sampleRate: SAMPLE_RATE })
        await ctx.audioWorklet.addModule(AUDIO_PROCESSOR_WORKLET_URL)
        source = ctx.createMediaStreamSource(stream)
        worklet = new AudioWorkletNode(ctx, "audio-processor-worklet")
        worklet.port.onmessage = (ev: MessageEvent<Int16Array>) => onChunk(ev.data)
        source.connect(worklet)
        worklet.connect(ctx.destination)
    }
    const stop = async () => {
        if (stream) {
            stream.getTracks().forEach((t) => t.stop())
            stream = null
        }
        if (ctx) {
            try {
                await ctx.close()
            } finally {
                ctx = null
            }
        }
        source = null
        worklet = null
    }
    return { start, stop }
 }
 export { uint8ToBase64, base64ToUint8, createPlayer, createRecorder,SAMPLE_RATE }
--- a/types/global.d.ts
+++ b/types/global.d.ts
@@ -172,3 +172,9 @@ interface ActionApproval {
    modalText: string
    callback: () => void
 }
 interface ResponseAudioDelta {
    response_id: string
    item_id: string
    delta: string
    pcmInt16: Int16Array
 }