Migrating from Streaming v2 to Streaming v3 (JavaScript) | AssemblyAI

This cookbook guides you through migrating from AssemblyAI’s legacy Streaming STT model (v2) to our latest Universal Streaming STT model (v3), which provides ultra-low latency for faster transcription, intelligent endpointing for more natural speech detection, and improved accuracy across various audio conditions.

Check out this blog post to learn more about this new model!

Overview of changes

The migration involves several key improvements:

API Version: Upgrade from v2 (/v2/realtime/ws) to v3 (/v3/ws)
Enhanced Error Handling: Robust cleanup and resource management
Modern Message Format: Updated message types and structure
Configuration Options: More flexible connection parameters
Graceful Shutdown: Proper termination handling

You can follow the step-by-step guide below to make changes to your existing code but here is what your code should look like in the end:

1 const WebSocket = require("ws");
2 const mic = require("mic");
3 const querystring = require("querystring");
4 const fs = require("fs");
5 
6 // --- Configuration ---
7 const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
8 const CONNECTION_PARAMS = {
9   sample_rate: 16000,
10   format_turns: true, // Request formatted final transcripts
11 };
12 const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
13 const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
14 
15 // Audio Configuration
16 const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
17 const CHANNELS = 1;
18 
19 // Global variables
20 let micInstance = null;
21 let micInputStream = null;
22 let ws = null;
23 let stopRequested = false;
24 
25 // WAV recording variables
26 let recordedFrames = []; // Store audio frames for WAV file
27 
28 // --- Helper functions ---
29 function clearLine() {
30   process.stdout.write("\r" + " ".repeat(80) + "\r");
31 }
32 
33 function formatTimestamp(timestamp) {
34   return new Date(timestamp * 1000).toISOString();
35 }
36 
37 function createWavHeader(sampleRate, channels, dataLength) {
38   const buffer = Buffer.alloc(44);
39 
40   // RIFF header
41   buffer.write("RIFF", 0);
42   buffer.writeUInt32LE(36 + dataLength, 4);
43   buffer.write("WAVE", 8);
44 
45   // fmt chunk
46   buffer.write("fmt ", 12);
47   buffer.writeUInt32LE(16, 16); // fmt chunk size
48   buffer.writeUInt16LE(1, 20); // PCM format
49   buffer.writeUInt16LE(channels, 22);
50   buffer.writeUInt32LE(sampleRate, 24);
51   buffer.writeUInt32LE(sampleRate * channels * 2, 28); // byte rate
52   buffer.writeUInt16LE(channels * 2, 32); // block align
53   buffer.writeUInt16LE(16, 34); // bits per sample
54 
55   // data chunk
56   buffer.write("data", 36);
57   buffer.writeUInt32LE(dataLength, 40);
58 
59   return buffer;
60 }
61 
62 function saveWavFile() {
63   if (recordedFrames.length === 0) {
64     console.log("No audio data recorded.");
65     return;
66   }
67 
68   // Generate filename with timestamp
69   const timestamp = new Date().toISOString().replace(/[:.]/g, "-").slice(0, 19);
70   const filename = `recorded_audio_${timestamp}.wav`;
71 
72   try {
73     // Combine all recorded frames
74     const audioData = Buffer.concat(recordedFrames);
75     const dataLength = audioData.length;
76 
77     // Create WAV header
78     const wavHeader = createWavHeader(SAMPLE_RATE, CHANNELS, dataLength);
79 
80     // Write WAV file
81     const wavFile = Buffer.concat([wavHeader, audioData]);
82     fs.writeFileSync(filename, wavFile);
83 
84     console.log(`Audio saved to: ${filename}`);
85     console.log(
86       `Duration: ${(dataLength / (SAMPLE_RATE * CHANNELS * 2)).toFixed(2)} seconds`
87     );
88   } catch (error) {
89     console.error(`Error saving WAV file: ${error}`);
90   }
91 }
92 
93 // --- Main function ---
94 async function run() {
95   console.log("Starting AssemblyAI streaming transcription...");
96   console.log("Audio will be saved to a WAV file when the session ends.");
97 
98   // Initialize WebSocket connection
99   ws = new WebSocket(API_ENDPOINT, {
100     headers: {
101       Authorization: YOUR_API_KEY,
102     },
103   });
104 
105   // Setup WebSocket event handlers
106   ws.on("open", () => {
107     console.log("WebSocket connection opened.");
108     console.log(`Connected to: ${API_ENDPOINT}`);
109     // Start the microphone
110     startMicrophone();
111   });
112 
113   ws.on("message", (message) => {
114     try {
115       const data = JSON.parse(message);
116       const msgType = data.type;
117 
118       if (msgType === "Begin") {
119         const sessionId = data.id;
120         const expiresAt = data.expires_at;
121         console.log(
122           `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
123         );
124       } else if (msgType === "Turn") {
125         const transcript = data.transcript || "";
126         const formatted = data.turn_is_formatted;
127 
128         if (formatted) {
129           clearLine();
130           console.log(transcript);
131         } else {
132           process.stdout.write(`\r${transcript}`);
133         }
134       } else if (msgType === "Termination") {
135         const audioDuration = data.audio_duration_seconds;
136         const sessionDuration = data.session_duration_seconds;
137         console.log(
138           `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
139         );
140       }
141     } catch (error) {
142       console.error(`\nError handling message: ${error}`);
143       console.error(`Message data: ${message}`);
144     }
145   });
146 
147   ws.on("error", (error) => {
148     console.error(`\nWebSocket Error: ${error}`);
149     cleanup();
150   });
151 
152   ws.on("close", (code, reason) => {
153     console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
154     cleanup();
155   });
156 
157   // Handle process termination
158   setupTerminationHandlers();
159 }
160 
161 function startMicrophone() {
162   try {
163     micInstance = mic({
164       rate: SAMPLE_RATE.toString(),
165       channels: CHANNELS.toString(),
166       debug: false,
167       exitOnSilence: 6, // This won't actually exit, just a parameter for mic
168     });
169 
170     micInputStream = micInstance.getAudioStream();
171 
172     micInputStream.on("data", (data) => {
173       if (ws && ws.readyState === WebSocket.OPEN && !stopRequested) {
174         // Store audio data for WAV recording
175         recordedFrames.push(Buffer.from(data));
176 
177         // Send audio data to WebSocket
178         ws.send(data);
179       }
180     });
181 
182     micInputStream.on("error", (err) => {
183       console.error(`Microphone Error: ${err}`);
184       cleanup();
185     });
186 
187     micInstance.start();
188     console.log("Microphone stream opened successfully.");
189     console.log("Speak into your microphone. Press Ctrl+C to stop.");
190   } catch (error) {
191     console.error(`Error opening microphone stream: ${error}`);
192     cleanup();
193   }
194 }
195 
196 function cleanup() {
197   stopRequested = true;
198 
199   // Save recorded audio to WAV file
200   saveWavFile();
201 
202   // Stop microphone if it's running
203   if (micInstance) {
204     try {
205       micInstance.stop();
206     } catch (error) {
207       console.error(`Error stopping microphone: ${error}`);
208     }
209     micInstance = null;
210   }
211 
212   // Close WebSocket connection if it's open
213   if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
214     try {
215       // Send termination message if possible
216       if (ws.readyState === WebSocket.OPEN) {
217         const terminateMessage = { type: "Terminate" };
218         console.log(
219           `Sending termination message: ${JSON.stringify(terminateMessage)}`
220         );
221         ws.send(JSON.stringify(terminateMessage));
222       }
223       ws.close();
224     } catch (error) {
225       console.error(`Error closing WebSocket: ${error}`);
226     }
227     ws = null;
228   }
229 
230   console.log("Cleanup complete.");
231 }
232 
233 function setupTerminationHandlers() {
234   // Handle Ctrl+C and other termination signals
235   process.on("SIGINT", () => {
236     console.log("\nCtrl+C received. Stopping...");
237     cleanup();
238     // Give time for cleanup before exiting
239     setTimeout(() => process.exit(0), 1000);
240   });
241 
242   process.on("SIGTERM", () => {
243     console.log("\nTermination signal received. Stopping...");
244     cleanup();
245     // Give time for cleanup before exiting
246     setTimeout(() => process.exit(0), 1000);
247   });
248 
249   // Handle uncaught exceptions
250   process.on("uncaughtException", (error) => {
251     console.error(`\nUncaught exception: ${error}`);
252     cleanup();
253     // Give time for cleanup before exiting
254     setTimeout(() => process.exit(1), 1000);
255   });
256 }
257 
258 // Start the application
259 run();

For more information on our Universal Streaming feature, see this section of our official documentation.

Step-by-step migration guide

1. Update API endpoint and configuration

Before (v2):

1 const API_KEY = "<YOUR_API_KEY>";
2 const SAMPLE_RATE = 16000; // 16kHz sample rate
3 
4 const ws = new WebSocket(
5   `wss://api.assemblyai.com/v2/realtime/ws?sample_rate=${SAMPLE_RATE}`,
6   {
7     headers: {
8       Authorization: API_KEY,
9     },
10   }
11 );

After (v3):

1 // --- Configuration ---
2 const YOUR_API_KEY = "YOUR-API-KEY"; // Replace with your actual API key
3 const CONNECTION_PARAMS = {
4   sample_rate: 16000,
5   format_turns: true, // Request formatted final transcripts
6 };
7 const API_ENDPOINT_BASE_URL = "wss://streaming.assemblyai.com/v3/ws";
8 const API_ENDPOINT = `${API_ENDPOINT_BASE_URL}?${querystring.stringify(CONNECTION_PARAMS)}`;
9 
10 // Initialize WebSocket connection
11 ws = new WebSocket(API_ENDPOINT, {
12   headers: {
13     Authorization: YOUR_API_KEY,
14   },
15 });

Key Changes:

New base URL: streaming.assemblyai.com instead of api.assemblyai.com
Version upgrade: /v3/ws instead of /v2/realtime/ws
Configuration via URL parameters using querystring
Added format_turns option for better transcript formatting

2. Audio configuration

Before (v2):

1 const SAMPLE_RATE = 16000;
2 const CHANNELS = 1;

After (v3):

1 const SAMPLE_RATE = CONNECTION_PARAMS.sample_rate;
2 const CHANNELS = 1;

Key Changes:

Sample rate now references the configuration parameter

3. Update message handling schema

Before (v2):

1 ws.on("message", (message) => {
2   try {
3     const msg = JSON.parse(message);
4     const msgType = msg.message_type;
5 
6     if (msgType === "SessionBegins") {
7       const sessionId = msg.session_id;
8       console.log("Session ID:", sessionId);
9       return;
10     }
11 
12     const text = msg.text || "";
13     if (!text) {
14       return;
15     }
16 
17     if (msgType === "PartialTranscript") {
18       console.log("Partial:", text);
19     } else if (msgType === "FinalTranscript") {
20       console.log("Final:", text);
21     } else if (msgType === "error") {
22       console.error("Error:", msg.error);
23     }
24   } catch (error) {
25     console.error("Error handling message:", error);
26   }
27 });

After (v3):

1 ws.on("message", (message) => {
2   try {
3     const data = JSON.parse(message);
4     const msgType = data.type;
5     if (msgType === "Begin") {
6       const sessionId = data.id;
7       const expiresAt = data.expires_at;
8       console.log(
9         `\nSession began: ID=${sessionId}, ExpiresAt=${formatTimestamp(expiresAt)}`
10       );
11     } else if (msgType === "Turn") {
12       const transcript = data.transcript || "";
13       const formatted = data.turn_is_formatted;
14       if (formatted) {
15         clearLine();
16         console.log(transcript);
17       } else {
18         process.stdout.write(`\r${transcript}`);
19       }
20     } else if (msgType === "Termination") {
21       const audioDuration = data.audio_duration_seconds;
22       const sessionDuration = data.session_duration_seconds;
23       console.log(
24         `\nSession Terminated: Audio Duration=${audioDuration}s, Session Duration=${sessionDuration}s`
25       );
26     }
27   } catch (error) {
28     console.error(`\nError handling message: ${error}`);
29     console.error(`Message data: ${message}`);
30   }
31 });

Key Changes:

Message types renamed: SessionBegins → Begin, PartialTranscript/FinalTranscript → Turn
Field names updated: message_type → type, session_id → id, text → transcript
Added session expiration timestamp handling (expires_at)
New transcript formatting with turn_is_formatted flag
Added turn tracking with turn_order and end_of_turn fields
New confidence scoring with end_of_turn_confidence
Added Termination message with session statistics
Error handling moved from message-based to WebSocket events

4. Add graceful shutdown handling and improve error handling and logging

Before (v2):

1 ws.on("close", (code, reason) => onClose(ws, code, reason));
2 
3 function onClose(ws, code, reason) {
4   if (recording) {
5     recording.end();
6   }
7   console.log("Disconnected");
8 }
9 
10 process.on("SIGINT", async function () {
11   console.log();
12   console.log("Stopping recording");
13   if (recording) {
14     recording.end();
15   }
16   console.log("Closing real-time transcript connection");
17   if (ws.readyState === WebSocket.OPEN) {
18     ws.close();
19   }
20   process.exit();
21 });

After (v3):

1 ws.on("close", (code, reason) => {
2   console.log(`\nWebSocket Disconnected: Status=${code}, Msg=${reason}`);
3   cleanup();
4 });
5 
6 function cleanup() {
7   stopRequested = true;
8   // Save recorded audio to WAV file
9   saveWavFile();
10   // Stop microphone if it's running
11   if (micInstance) {
12     try {
13       micInstance.stop();
14     } catch (error) {
15       console.error(`Error stopping microphone: ${error}`);
16     }
17     micInstance = null;
18   }
19   // Close WebSocket connection if it's open
20   if (ws && [WebSocket.OPEN, WebSocket.CONNECTING].includes(ws.readyState)) {
21     try {
22       // Send termination message if possible
23       if (ws.readyState === WebSocket.OPEN) {
24         const terminateMessage = { type: "Terminate" };
25         console.log(
26           `Sending termination message: ${JSON.stringify(terminateMessage)}`
27         );
28         ws.send(JSON.stringify(terminateMessage));
29       }
30       ws.close();
31     } catch (error) {
32       console.error(`Error closing WebSocket: ${error}`);
33     }
34     ws = null;
35   }
36   console.log("Cleanup complete.");
37 }
38 
39 function setupTerminationHandlers() {
40   // Handle Ctrl+C and other termination signals
41   process.on("SIGINT", () => {
42     console.log("\nCtrl+C received. Stopping...");
43     cleanup();
44     // Give time for cleanup before exiting
45     setTimeout(() => process.exit(0), 1000);
46   });
47   process.on("SIGTERM", () => {
48     console.log("\nTermination signal received. Stopping...");
49     cleanup();
50     // Give time for cleanup before exiting
51     setTimeout(() => process.exit(0), 1000);
52   });
53   // Handle uncaught exceptions
54   process.on("uncaughtException", (error) => {
55     console.error(`\nUncaught exception: ${error}`);
56     cleanup();
57     // Give time for cleanup before exiting
58     setTimeout(() => process.exit(1), 1000);
59   });
60 }

Key Changes:

Proper KeyboardInterrupt handling
Graceful termination message sending
Detailed error context and timestamps
Proper exception type handling
Resource cleanup on all error paths
Connection status checking before operations

Note: Pricing is based on session duration so it is very important to close sessions properly to avoid unexpected usage and cost.

Migration checklist

Update API endpoint from v2 to v3
Update message type handling (Begin, Turn, Termination)
Add proper resource cleanup in all code paths
Update field names in message parsing
Add graceful shutdown with termination messages
Add detailed error logging with context
Test KeyboardInterrupt handling
Verify audio resource cleanup
Test connection failure scenarios

Testing your migration

Basic Functionality: Verify transcription works with simple speech
Error Handling: Test with invalid API keys or network issues
Graceful Shutdown: Test Ctrl+C interruption
Resource Cleanup: Monitor for memory leaks during extended use
Message Formatting: Test with format_turns enabled/disabled

Common migration issues

Issue: “WebSocket connection failed”

Solution: Verify you’re using the new v3 endpoint URL and proper authentication header format.

Issue: “Message type not recognized”

Solution: Update message type handling from old names (SessionBegins, PartialTranscript) to new ones (Begin, Turn).

Benefits of migration

Improved Reliability: Better error handling and recovery
Lower Latency: Reduced buffer sizes for faster response
Enhanced Features: Formatted transcripts and session statistics
Better Resource Management: Proper cleanup prevents memory leaks
Graceful Shutdown: Clean termination with proper cleanup

Conclusion

This migration provides a more robust, maintainable, and feature-rich streaming transcription implementation. The enhanced error handling, resource management, and modern API features make it suitable for production use cases where reliability and performance are critical.