Migrating from Streaming v2 to Streaming v3 (JavaScript)

This cookbook guides you through migrating from AssemblyAI’s legacy Streaming STT model (v2) to our latest Universal Streaming STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions.

Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

  • API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
  • Enhanced Error Handling: Robust cleanup and resource management
  • Modern Message Format: Updated message types and structure
  • Configuration Options: More flexible connection parameters
  • Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

1const WebSocket = require("ws");
2const mic = require("mic");
3const querystring = require("querystring");
4const fs = require("fs");
5
6// --- Configuration ---
7const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8const CONNECTION_PARAMS = {
9 sample_rate: 16000,
10 format_turns: true, // Request formatted final transcripts
11};
12const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
13const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
14
15// Audio Configuration
16const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
17const CHANNELS = 1;
18
19// Global variables
20let micInstance = null;
21let micInputStream = null;
22let ws = null;
23let stopRequested = false;
24
25// WAV recording variables
26let recordedFrames = []; // Store audio frames for WAV file
27
28// --- Helper functions ---
29function clearLine() {
30 process.stdout.write("\r" + " ".repeat(80) + "\r");
31}
32
33function formatTimestamp(timestamp) {
34 return new Date(timestamp * 1000).toISOString();
35}
36
37function createWavHeader(sampleRate, channels, dataLength) {
38 const buffer = Buffer.alloc(44);
39
40 // RIFF header
41 buffer.write("RIFF", 0);
42 buffer.writeUInt32LE(36 + dataLength, 4);
43 buffer.write("WAVE", 8);
44
45 // fmt chunk
46 buffer.write("fmt ", 12);
47 buffer.writeUInt32LE(16, 16); // fmt chunk size
48 buffer.writeUInt16LE(1, 20); // PCM format
49 buffer.writeUInt16LE(channels, 22);
50 buffer.writeUInt32LE(sampleRate, 24);
51 buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
52 buffer.writeUInt16LE(channels * 2, 32); // block align
53 buffer.writeUInt16LE(16, 34); // bits per sample
54
55 // data chunk
56 buffer.write("data", 36);
57 buffer.writeUInt32LE(dataLength, 40);
58
59 return buffer;
60}
61
62function saveWavFile() {
63 if (recordedFrames.length === 0) {
64 console.log("No audio data recorded.");
65 return;
66 }
67
68 // Generate filename with timestamp
69 const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
70 const filename = `recorded_audio_${timestamp}.wav`;
71
72 try {
73 // Combine all recorded frames
74 const audioData = Buffer.concat(recordedFrames);
75 const dataLength = audioData.length;
76
77 // Create WAV header
78 const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
79
80 // Write WAV file
81 const wavFile = Buffer.concat([wavHeader, audioData]);
82 fs.writeFileSync(filename, wavFile);
83
84 console.log(`Audio saved to: ${filename}`);
85 console.log(
86 `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
87 );
88 } catch (error) {
89 console.error(`Error saving WAV file: ${error}`);
90 }
91}
92
93// --- Main function ---
94async function run() {
95 console.log("Starting AssemblyAI streaming transcription...");
96 console.log("Audio will be saved to a WAV file when the session ends.");
97
98 // Initialize WebSocket connection
99 ws = new WebSocket(API_ENDPOINT, {
100 headers: {
101 Authorization: YOUR_API_KEY,
102 },
103 });
104
105 // Setup WebSocket event handlers
106 ws.on("open", () => {
107 console.log("WebSocket connection opened.");
108 console.log(`Connected to: ${API_ENDPOINT}`);
109 // Start the microphone
110 startMicrophone();
111 });
112
113 ws.on("message", (message) => {
114 try {
115 const data = JSON.parse(message);
116 const msgType = data.type;
117
118 if (msgType === "Begin") {
119 const sessionId = data.id;
120 const expiresAt = data.expires_at;
121 console.log(
122 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
123 );
124 } else if (msgType === "Turn") {
125 const transcript = data.transcript || "";
126 const formatted = data.turn_is_formatted;
127
128 if (formatted) {
129 clearLine();
130 console.log(transcript);
131 } else {
132 process.stdout.write(`\r${transcript}`);
133 }
134 } else if (msgType === "Termination") {
135 const audioDuration = data.audio_duration_seconds;
136 const sessionDuration = data.session_duration_seconds;
137 console.log(
138 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
139 );
140 }
141 } catch (error) {
142 console.error(`\nError handling message: ${error}`);
143 console.error(`Message data: ${message}`);
144 }
145 });
146
147 ws.on("error", (error) => {
148 console.error(`\nWebSocket Error: ${error}`);
149 cleanup();
150 });
151
152 ws.on("close", (code, reason) => {
153 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
154 cleanup();
155 });
156
157 // Handle process termination
158 setupTerminationHandlers();
159}
160
161function startMicrophone() {
162 try {
163 micInstance = mic({
164 rate: SAMPLE_RATE.toString(),
165 channels: CHANNELS.toString(),
166 debug: false,
167 exitOnSilence: 6, // This won't actually exit, just a parameter for mic
168 });
169
170 micInputStream = micInstance.getAudioStream();
171
172 micInputStream.on("data", (data) => {
173 if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
174 // Store audio data for WAV recording
175 recordedFrames.push(Buffer.from(data));
176
177 // Send audio data to WebSocket
178 ws.send(data);
179 }
180 });
181
182 micInputStream.on("error", (err) => {
183 console.error(`Microphone Error: ${err}`);
184 cleanup();
185 });
186
187 micInstance.start();
188 console.log("Microphone stream opened successfully.");
189 console.log("Speak into your microphone. Press Ctrl+C to stop.");
190 } catch (error) {
191 console.error(`Error opening microphone stream: ${error}`);
192 cleanup();
193 }
194}
195
196function cleanup() {
197 stopRequested = true;
198
199 // Save recorded audio to WAV file
200 saveWavFile();
201
202 // Stop microphone if it's running
203 if (micInstance) {
204 try {
205 micInstance.stop();
206 } catch (error) {
207 console.error(`Error stopping microphone: ${error}`);
208 }
209 micInstance = null;
210 }
211
212 // Close WebSocket connection if it's open
213 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
214 try {
215 // Send termination message if possible
216 if (ws.readyState === WebSocket.OPEN) {
217 const terminateMessage = { type: "Terminate" };
218 console.log(
219 `Sending termination message: ${JSON.stringify(terminateMessage)}`
220 );
221 ws.send(JSON.stringify(terminateMessage));
222 }
223 ws.close();
224 } catch (error) {
225 console.error(`Error closing WebSocket: ${error}`);
226 }
227 ws = null;
228 }
229
230 console.log("Cleanup complete.");
231}
232
233function setupTerminationHandlers() {
234 // Handle Ctrl+C and other termination signals
235 process.on("SIGINT", () => {
236 console.log("\nCtrl+C received. Stopping...");
237 cleanup();
238 // Give time for cleanup before exiting
239 setTimeout(() => process.exit(0), 1000);
240 });
241
242 process.on("SIGTERM", () => {
243 console.log("\nTermination signal received. Stopping...");
244 cleanup();
245 // Give time for cleanup before exiting
246 setTimeout(() => process.exit(0), 1000);
247 });
248
249 // Handle uncaught exceptions
250 process.on("uncaughtException", (error) => {
251 console.error(`\nUncaught exception: ${error}`);
252 cleanup();
253 // Give time for cleanup before exiting
254 setTimeout(() => process.exit(1), 1000);
255 });
256}
257
258// Start the application
259run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

1const API_KEY = "<YOUR_API_KEY>";
2const SAMPLE_RATE = 16000; // 16kHz sample rate
3
4const ws = new WebSocket(
5 `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
6 {
7 headers: {
8 Authorization: API_KEY,
9 },
10 }
11);

After (v3):

1// --- Configuration ---
2const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
3const CONNECTION_PARAMS = {
4 sample_rate: 16000,
5 format_turns: true, // Request formatted final transcripts
6};
7const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
8const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
9
10// Initialize WebSocket connection
11ws = new WebSocket(API_ENDPOINT, {
12 headers: {
13 Authorization: YOUR_API_KEY,
14 },
15});

Key Changes:

  • New base URL: streaming.assemblyai.com instead of api.assemblyai.com
  • Version upgrade: /v3/ws instead of /v2/realtime/ws
  • Configuration via URL parameters using querystring
  • Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

1const SAMPLE_RATE = 16000;
2const CHANNELS = 1;

After (v3):

1const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
2const CHANNELS = 1;

Key Changes:

  • Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

1ws.on("message", (message) => {
2 try {
3 const msg = JSON.parse(message);
4 const msgType = msg.message_type;
5
6 if (msgType === "SessionBegins") {
7 const sessionId = msg.session_id;
8 console.log("Session ID:", sessionId);
9 return;
10 }
11
12 const text = msg.text || "";
13 if (!text) {
14 return;
15 }
16
17 if (msgType === "PartialTranscript") {
18 console.log("Partial:", text);
19 } else if (msgType === "FinalTranscript") {
20 console.log("Final:", text);
21 } else if (msgType === "error") {
22 console.error("Error:", msg.error);
23 }
24 } catch (error) {
25 console.error("Error handling message:", error);
26 }
27});

After (v3):

1ws.on("message", (message) => {
2 try {
3 const data = JSON.parse(message);
4 const msgType = data.type;
5 if (msgType === "Begin") {
6 const sessionId = data.id;
7 const expiresAt = data.expires_at;
8 console.log(
9 `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
10 );
11 } else if (msgType === "Turn") {
12 const transcript = data.transcript || "";
13 const formatted = data.turn_is_formatted;
14 if (formatted) {
15 clearLine();
16 console.log(transcript);
17 } else {
18 process.stdout.write(`\r${transcript}`);
19 }
20 } else if (msgType === "Termination") {
21 const audioDuration = data.audio_duration_seconds;
22 const sessionDuration = data.session_duration_seconds;
23 console.log(
24 `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
25 );
26 }
27 } catch (error) {
28 console.error(`\nError handling message: ${error}`);
29 console.error(`Message data: ${message}`);
30 }
31});

Key Changes:

  • Message types renamed: SessionBeginsBegin, PartialTranscript/FinalTranscriptTurn
  • Field names updated: message_typetype, session_idid, texttranscript
  • Added session expiration timestamp handling (expires_at)
  • New transcript formatting with turn_is_formatted flag
  • Added turn tracking with turn_order and end_of_turn fields
  • New confidence scoring with end_of_turn_confidence
  • Added Termination message with session statistics
  • Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

1ws.on("close", (code, reason) => onClose(ws, code, reason));
2
3function onClose(ws, code, reason) {
4 if (recording) {
5 recording.end();
6 }
7 console.log("Disconnected");
8}
9
10process.on("SIGINT", async function () {
11 console.log();
12 console.log("Stopping recording");
13 if (recording) {
14 recording.end();
15 }
16 console.log("Closing real-time transcript connection");
17 if (ws.readyState === WebSocket.OPEN) {
18 ws.close();
19 }
20 process.exit();
21});

After (v3):

1ws.on("close", (code, reason) => {
2 console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
3 cleanup();
4});
5
6function cleanup() {
7 stopRequested = true;
8 // Save recorded audio to WAV file
9 saveWavFile();
10 // Stop microphone if it's running
11 if (micInstance) {
12 try {
13 micInstance.stop();
14 } catch (error) {
15 console.error(`Error stopping microphone: ${error}`);
16 }
17 micInstance = null;
18 }
19 // Close WebSocket connection if it's open
20 if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
21 try {
22 // Send termination message if possible
23 if (ws.readyState === WebSocket.OPEN) {
24 const terminateMessage = { type: "Terminate" };
25 console.log(
26 `Sending termination message: ${JSON.stringify(terminateMessage)}`
27 );
28 ws.send(JSON.stringify(terminateMessage));
29 }
30 ws.close();
31 } catch (error) {
32 console.error(`Error closing WebSocket: ${error}`);
33 }
34 ws = null;
35 }
36 console.log("Cleanup complete.");
37}
38
39function setupTerminationHandlers() {
40 // Handle Ctrl+C and other termination signals
41 process.on("SIGINT", () => {
42 console.log("\nCtrl+C received. Stopping...");
43 cleanup();
44 // Give time for cleanup before exiting
45 setTimeout(() => process.exit(0), 1000);
46 });
47 process.on("SIGTERM", () => {
48 console.log("\nTermination signal received. Stopping...");
49 cleanup();
50 // Give time for cleanup before exiting
51 setTimeout(() => process.exit(0), 1000);
52 });
53 // Handle uncaught exceptions
54 process.on("uncaughtException", (error) => {
55 console.error(`\nUncaught exception: ${error}`);
56 cleanup();
57 // Give time for cleanup before exiting
58 setTimeout(() => process.exit(1), 1000);
59 });
60}

Key Changes:

  • Proper KeyboardInterrupt handling
  • Graceful termination message sending
  • Detailed error context and timestamps
  • Proper exception type handling
  • Resource cleanup on all error paths
  • Connection status checking before operations

Note: Pricing is based on session duration so it is very important to close sessions properly to avoid unexpected usage and cost.

Migration checklist

  • Update API endpoint from v2 to v3
  • Update message type handling (Begin, Turn, Termination)
  • Add proper resource cleanup in all code paths
  • Update field names in message parsing
  • Add graceful shutdown with termination messages
  • Add detailed error logging with context
  • Test KeyboardInterrupt handling
  • Verify audio resource cleanup
  • Test connection failure scenarios

Testing your migration

  1. Basic Functionality: Verify transcription works with simple speech
  2. Error Handling: Test with invalid API keys or network issues
  3. Graceful Shutdown: Test Ctrl+C interruption
  4. Resource Cleanup: Monitor for memory leaks during extended use
  5. Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

  • Improved Reliability: Better error handling and recovery
  • Lower Latency: Reduced buffer sizes for faster response
  • Enhanced Features: Formatted transcripts and session statistics
  • Better Resource Management: Proper cleanup prevents memory leaks
  • Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.