transcription

OpenAI logoWhisper Streaming Large v3

Realtime audio streaming using whisper large v3 and websockets for production workloads

Model details

Example usage

The script below shows how to stream audio directly from your laptops microphone

1import asyncio
2import websockets
3import sounddevice as sd
4import numpy as np
5import json
6import os
7
8# Audio config
9SAMPLE_RATE = 16000
10CHUNK_SIZE = 512
11CHUNK_DURATION = CHUNK_SIZE / SAMPLE_RATE
12CHANNELS = 1
13
14headers = {"Authorization": f"Api-Key {os.getenv('BASETEN_API_KEY')}"}
15model_id = ""  # Baseten model id here
16
17# Metadata to send first
18metadata = {
19    "vad_params": {
20        "threshold": 0.5,
21        "min_silence_duration_ms": 300,
22        "speech_pad_ms": 30
23    },
24    "streaming_whisper_params": {
25        "encoding": "pcm_s16le",
26        "sample_rate": 16000,
27        "enable_partial_transcripts": True,
28        "audio_language": "en"
29    },
30}
31
32async def stream_microphone_audio(ws_url):
33    loop = asyncio.get_running_loop()
34    async with websockets.connect(ws_url, additional_headers=headers) as ws:
35        print("Connected to server")
36
37        # Send the metadata JSON blob
38        await ws.send(json.dumps(metadata))
39        print("Sent metadata to server")
40
41        send_queue = asyncio.Queue()
42
43        # Start audio stream
44        def audio_callback(indata, frames, time_info, status):
45            if status:
46                print(f"Audio warning: {status}")
47            int16_data = (indata * 32767).astype(np.int16).tobytes()
48            loop.call_soon_threadsafe(send_queue.put_nowait, int16_data)
49
50        with sd.InputStream(
51                samplerate=SAMPLE_RATE,
52                blocksize=CHUNK_SIZE,
53                channels=CHANNELS,
54                dtype="float32",
55                callback=audio_callback,
56        ):
57            print("Streaming mic audio...")
58
59            async def send_audio():
60                while True:
61                    chunk = await send_queue.get()
62                    await ws.send(chunk)
63
64            async def receive_server_messages():
65                while True:
66                    response = await ws.recv()
67                    try:
68                        message = json.loads(response)
69                        is_final = message.get("is_final")
70                        transcript = message.get("transcript")
71
72                        if not is_final:
73                            print(f"[partial] {transcript}")
74                        elif is_final:
75                            print(f"[final] {transcript}")
76                        else:
77                            print(f"[unknown type] {message}")
78                    except Exception as e:
79                        print("Non-JSON message or parse error:", response, "| Error:", str(e))
80
81            # Run send + receive tasks concurrently
82            await asyncio.gather(send_audio(), receive_server_messages())
83
84
85# Change this to your actual WebSocket URL
86ws_url = f"wss://model-{model_id}.api.baseten.co/environments/production/websocket"
87
88asyncio.run(stream_microphone_audio(ws_url))
Input
JSON output
1null

transcription models

See all
OpenAI logo
Transcription

Whisper Streaming Large v3

H100 MIG 40GB
OpenAI logo
Transcription

Whisper Streaming Large v3 Turbo

H100 MIG 40GB
OpenAI logo
Transcription

Whisper Large V3 (best performance)

V3 - H100 MIG 40GB

OpenAI models

See all
OpenAI logo
Transcription

Whisper Streaming Large v3

H100 MIG 40GB
OpenAI logo
Transcription

Whisper Streaming Large v3 Turbo

H100 MIG 40GB
OpenAI logo
Model API
LLM

GPT OSS 120B

MoE

🔥 Trending models