Migration guide: Speechmatics to AssemblyAI

This guide walks through the process of migrating from Speechmatics to AssemblyAI for streaming Speech-to-text.

Get started

Before we begin, make sure you have an AssemblyAI account and an API key. You can sign up for a free account and get your API key from your dashboard.

Side-by-side code comparison

Below is a side-by-side comparison of a basic Python code snippet to transcribe streaming audio by Speechmatics and AssemblyAI.

1import pyaudio
2import websocket
3import json
4import threading
5import time
6
7# --- Configuration ---
8
9YOUR_API_KEY = "YOUR-API-KEY" # Replace with your actual API key
10
11CONNECTION_PARAMS = {
12"language": "en",
13"enable_partials": True,
14"max_delay": 2.0
15}
16API_ENDPOINT = "wss://eu2.rt.speechmatics.com/v2/en"
17
18# Audio Configuration
19
20FRAMES_PER_BUFFER = 1024 # Chunk size
21SAMPLE_RATE = None # Will be set based on device capabilities
22CHANNELS = 1
23FORMAT = pyaudio.paFloat32 # Speechmatics uses float32 format
24
25# Global variables for audio stream and websocket
26
27audio = None
28stream = None
29ws_app = None
30audio_thread = None
31stop_event = threading.Event() # To signal the audio thread to stop
32audio_seq_no = 0 # Track number of audio chunks sent
33
34# --- WebSocket Event Handlers ---
35
36def on_open(ws):
37"""Called when the WebSocket connection is established."""
38print("WebSocket connection opened.")
39print(f"Connected to: {API_ENDPOINT}")
40
41 # Send StartRecognition message
42 start_message = {
43 "message": "StartRecognition",
44 "audio_format": {
45 "type": "raw",
46 "encoding": "pcm_f32le",
47 "sample_rate": SAMPLE_RATE
48 },
49 "transcription_config": {
50 "language": CONNECTION_PARAMS["language"],
51 "enable_partials": CONNECTION_PARAMS["enable_partials"],
52 "max_delay": CONNECTION_PARAMS["max_delay"]
53 }
54 }
55 ws.send(json.dumps(start_message))
56
57def on_message(ws, message):
58global audio_seq_no
59
60 try:
61 data = json.loads(message)
62 msg_type = data.get('message')
63
64 if msg_type == "RecognitionStarted":
65 session_id = data.get('id')
66 print(f"\nSession began: ID={session_id}")
67
68 # Start sending audio data in a separate thread
69 def stream_audio():
70 global audio_seq_no, stream
71 print("Starting audio streaming...")
72 while not stop_event.is_set():
73 try:
74 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
75 # Send audio data as binary message
76 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
77 audio_seq_no += 1
78 except Exception as e:
79 print(f"Error streaming audio: {e}")
80 # If stream read fails, likely means it's closed, stop the loop
81 break
82 print("Audio streaming stopped.")
83
84 global audio_thread
85 audio_thread = threading.Thread(target=stream_audio)
86 audio_thread.daemon = (
87 True # Allow main thread to exit even if this thread is running
88 )
89 audio_thread.start()
90
91 elif msg_type == "AddPartialTranscript":
92 transcript = data.get('metadata', {}).get('transcript', '')
93 if transcript:
94 print(f"\r{transcript}", end='')
95
96 elif msg_type == "AddTranscript":
97 transcript = data.get('metadata', {}).get('transcript', '')
98 if transcript:
99 # Clear previous line for final messages
100 print('\r' + ' ' * 80 + '\r', end='')
101 print(transcript)
102
103 elif msg_type == "EndOfTranscript":
104 print("\nSession Terminated: Transcription complete")
105
106 elif msg_type == "Error":
107 error_type = data.get('type')
108 reason = data.get('reason')
109 print(f"\nWebSocket Error: {error_type} - {reason}")
110 stop_event.set()
111
112 except json.JSONDecodeError as e:
113 print(f"Error decoding message: {e}")
114 except Exception as e:
115 print(f"Error handling message: {e}")
116
117def on_error(ws, error):
118"""Called when a WebSocket error occurs."""
119print(f"\nWebSocket Error: {error}") # Attempt to signal stop on error
120stop_event.set()
121
122def on_close(ws, close_status_code, close_msg):
123"""Called when the WebSocket connection is closed."""
124print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}") # Ensure audio resources are released
125global stream, audio
126stop_event.set() # Signal audio thread just in case it's still running
127
128 if stream:
129 if stream.is_active():
130 stream.stop_stream()
131 stream.close()
132 stream = None
133 if audio:
134 audio.terminate()
135 audio = None
136 # Try to join the audio thread to ensure clean exit
137 if audio_thread and audio_thread.is_alive():
138 audio_thread.join(timeout=1.0)
139
140# --- Main Execution ---
141
142def run():
143global audio, stream, ws_app, SAMPLE_RATE
144
145 # Initialize PyAudio
146 audio = pyaudio.PyAudio()
147
148 # Get default input device (can alter to specify specific device)
149 default_device = audio.get_default_input_device_info()
150 device_index = default_device['index']
151 SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
152
153 print(f"Using microphone: {default_device['name']}")
154
155 # Open microphone stream
156 try:
157 stream = audio.open(
158 input=True,
159 frames_per_buffer=FRAMES_PER_BUFFER,
160 channels=CHANNELS,
161 format=FORMAT,
162 rate=SAMPLE_RATE,
163 input_device_index=device_index
164 )
165 print("Microphone stream opened successfully.")
166 print("Speak into your microphone. Press Ctrl+C to stop.")
167 except Exception as e:
168 print(f"Error opening microphone stream: {e}")
169 if audio:
170 audio.terminate()
171 return # Exit if microphone cannot be opened
172
173 # Create WebSocketApp
174 ws_app = websocket.WebSocketApp(
175 API_ENDPOINT,
176 header={"Authorization": f"Bearer {YOUR_API_KEY}"}, # Speechmatics uses Bearer token
177 on_open=on_open,
178 on_message=on_message,
179 on_error=on_error,
180 on_close=on_close,
181 )
182
183 # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
184 ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
185 ws_thread.daemon = True
186 ws_thread.start()
187
188 try:
189 # Keep main thread alive until interrupted
190 while ws_thread.is_alive():
191 time.sleep(0.1)
192 except KeyboardInterrupt:
193 print("\nCtrl+C received. Stopping...")
194 stop_event.set() # Signal audio thread to stop
195
196 # Send EndOfStream message to the server
197 if ws_app and ws_app.sock and ws_app.sock.connected:
198 try:
199 end_message = {
200 "message": "EndOfStream",
201 "last_seq_no": audio_seq_no
202 }
203 print(f"Sending termination message: {json.dumps(end_message)}")
204 ws_app.send(json.dumps(end_message))
205 # Give a moment for messages to process before forceful close
206 time.sleep(1)
207 except Exception as e:
208 print(f"Error sending termination message: {e}")
209
210 # Close the WebSocket connection (will trigger on_close)
211 if ws_app:
212 ws_app.close()
213
214 # Wait for WebSocket thread to finish
215 ws_thread.join(timeout=2.0)
216
217 except Exception as e:
218 print(f"\nAn unexpected error occurred: {e}")
219 stop_event.set()
220 if ws_app:
221 ws_app.close()
222 ws_thread.join(timeout=2.0)
223
224 finally:
225 # Final cleanup (already handled in on_close, but good as a fallback)
226 if stream and stream.is_active():
227 stream.stop_stream()
228 if stream:
229 stream.close()
230 if audio:
231 audio.terminate()
232 print("Cleanup complete. Exiting.")
233
234if **name** == "**main**":
235run()

Step 1: Install dependencies

1

Install the required Python packages.

$pip install websocket-client pyaudio

Step 2: Configure the API key

In this step, you’ll configure your API key to authenticate your requests.

1

Navigate to API Keys in your account settings and copy your API key.

2

Store your API key in a variable. Replace <YOUR_API_KEY> with your copied API key.

1import pyaudio
2import websocket
3import json
4import threading
5import time
6
7YOUR_API_KEY = "YOUR-API-KEY"
1import requests
2
3def generate_temp_token(api_key, ttl=60):
4 """Generate a temporary authentication token that expires after the specified time."""
5 url = "https://mp.speechmatics.com/v1/api_keys?type=rt"
6 headers = {
7 "Content-Type": "application/json",
8 "Authorization": f"Bearer {api_key}"
9 }
10 payload = {
11 "ttl": ttl
12 }
13
14 response = requests.post(url, json=payload, headers=headers)
15 data = response.json()
16 return data.get("key_value")
Token usage

Instead of authorizing your request with YOUR_API_KEY (via request header), use the temporary token generated by this function when establishing the WebSocket connection.

1 API_ENDPOINT= f"wss://eu2.rt.speechmatics.com/v2?jwt={generate_temp_token(api_key)}"
2 ws_app = websocket.WebSocketApp(
3 API_ENDPOINT,
4 on_open=on_open,
5 on_message=on_message,
6 on_error=on_error,
7 on_close=on_close,
8 )

Step 3: Set up audio configuration

1

Configure the audio settings for your microphone stream.

1import pyaudio
2
3# Audio Configuration
4FRAMES_PER_BUFFER = 1024 # Chunk size
5SAMPLE_RATE = None # Will be set based on device capabilities
6CHANNELS = 1
7FORMAT = pyaudio.paFloat32 # Speechmatics uses float32 format
8
9# Global variables for audio stream and websocket
10audio = None
11stream = None
12ws_app = None
13audio_thread = None
14stop_event = threading.Event() # To signal the audio thread to stop
15audio_seq_no = 0 # Track number of audio chunks sent
16
17def run():
18 global audio, stream, ws_app, SAMPLE_RATE
19
20 # Initialize PyAudio
21 audio = pyaudio.PyAudio()
22
23 # Get default input device (can alter to specify specific device)
24 default_device = audio.get_default_input_device_info()
25 device_index = default_device['index']
26 SAMPLE_RATE = int(audio.get_device_info_by_index(device_index)['defaultSampleRate'])
27
28 print(f"Using microphone: {default_device['name']}")
29
30 # Open microphone stream
31 try:
32 stream = audio.open(
33 input=True,
34 frames_per_buffer=FRAMES_PER_BUFFER,
35 channels=CHANNELS,
36 format=FORMAT,
37 rate=SAMPLE_RATE,
38 input_device_index=device_index
39 )
40 print("Microphone stream opened successfully.")
41 print("Speak into your microphone. Press Ctrl+C to stop.")
42 except Exception as e:
43 print(f"Error opening microphone stream: {e}")
44 if audio:
45 audio.terminate()
46 return # Exit if microphone cannot be opened
Sample rate

Speechmatics recommends using a 16 kHz sample rate for speech audio. Anything higher will be downsampled server-side.

Audio data format

If you want to stream data from elsewhere, make sure that your audio data is in the following format:

  • Single-channel
  • PCM16 (default) or Mu-law encoding (see Specifying the encoding)
  • A sample rate that matches the value of the sample_rate parameter (16 kHz is recommended)
  • 50 milliseconds of audio per message (larger chunk sizes are workable, but may result in latency fluctuations)

Step 4: Create event handlers

In this step, you’ll set up callback functions that handle the different events.

1

Create functions to handle the events from the real-time service.

1import json
2
3def on_open(ws):
4 """Called when the WebSocket connection is established."""
5 print("WebSocket connection opened.")
6 print(f"Connected to: {API_ENDPOINT}")
7
8 # Send StartRecognition message
9 start_message = {
10 "message": "StartRecognition",
11 "audio_format": {
12 "type": "raw",
13 "encoding": "pcm_f32le",
14 "sample_rate": SAMPLE_RATE
15 },
16 "transcription_config": {
17 "language": CONNECTION_PARAMS["language"],
18 "enable_partials": CONNECTION_PARAMS["enable_partials"],
19 "max_delay": CONNECTION_PARAMS["max_delay"]
20 }
21 }
22 ws.send(json.dumps(start_message))
23
24def on_error(ws, error):
25 """Called when a WebSocket error occurs."""
26 print(f"\nWebSocket Error: {error}")
27 # Attempt to signal stop on error
28 stop_event.set()
29
30def on_close(ws, close_status_code, close_msg):
31 """Called when the WebSocket connection is closed."""
32 print(f"\nWebSocket Disconnected: Status={close_status_code}, Msg={close_msg}")
33 # Ensure audio resources are released
34 global stream, audio
35 stop_event.set() # Signal audio thread just in case it's still running
36
37 if stream:
38 if stream.is_active():
39 stream.stop_stream()
40 stream.close()
41 stream = None
42 if audio:
43 audio.terminate()
44 audio = None
45 # Try to join the audio thread to ensure clean exit
46 if audio_thread and audio_thread.is_alive():
47 audio_thread.join(timeout=1.0)
Connection configuration

Speechmatics requires a handshake where the connection configuration is specified before audio is streamed. AssemblyAI allows you to configure the connection via query parameters in the URL and start streaming audio immediately.

The Speechmatics handshake begins when on_open sends a StartRecognition message to configure the session. Audio streaming only starts after the RecognitionStarted message type is parsed and confirmed in the on_message callback.

2

Create another function to handle transcripts.

Speechmatics has separate partial (AddPartialTranscript) and final (AddTranscript) transcripts. The terminate session message is EndOfTranscript.

AssemblyAI instead uses a Turn object with a turn_is_formatted boolean flag to indicate finality. The terminate session message is Termination. For more on the Turn object, see Streaming Core concepts section.

1def on_message(ws, message):
2 global audio_seq_no
3
4 try:
5 data = json.loads(message)
6 msg_type = data.get('message')
7
8 if msg_type == "RecognitionStarted":
9 session_id = data.get('id')
10 print(f"\nSession began: ID={session_id}")
11
12 # Start sending audio data in a separate thread
13 def stream_audio():
14 global audio_seq_no, stream
15 print("Starting audio streaming...")
16 while not stop_event.is_set():
17 try:
18 audio_data = stream.read(FRAMES_PER_BUFFER, exception_on_overflow=False)
19 # Send audio data as binary message
20 ws.send(audio_data, websocket.ABNF.OPCODE_BINARY)
21 audio_seq_no += 1
22 except Exception as e:
23 print(f"Error streaming audio: {e}")
24 # If stream read fails, likely means it's closed, stop the loop
25 break
26 print("Audio streaming stopped.")
27
28 global audio_thread
29 audio_thread = threading.Thread(target=stream_audio)
30 audio_thread.daemon = (
31 True # Allow main thread to exit even if this thread is running
32 )
33 audio_thread.start()
34
35 elif msg_type == "AddPartialTranscript":
36 transcript = data.get('metadata', {}).get('transcript', '')
37 if transcript:
38 print(f"\r{transcript}", end='')
39
40 elif msg_type == "AddTranscript":
41 transcript = data.get('metadata', {}).get('transcript', '')
42 if transcript:
43 # Clear previous line for final messages
44 print('\r' + ' ' * 80 + '\r', end='')
45 print(transcript)
46
47 elif msg_type == "EndOfTranscript":
48 print("\nSession Terminated: Transcription complete")
49
50 elif msg_type == "Error":
51 error_type = data.get('type')
52 reason = data.get('reason')
53 print(f"\nWebSocket Error: {error_type} - {reason}")
54 stop_event.set()
55
56 except json.JSONDecodeError as e:
57 print(f"Error decoding message: {e}")
58 except Exception as e:
59 print(f"Error handling message: {e}")
Transcript message structure

Please note the difference in transcript message structure below:

1# Speechmatics
2{
3 "message": "AddPartialTranscript",
4 "metadata": {
5 "transcript": "hello world"
6 },
7 # Other transcript data...
8}
9
10# AssemblyAI
11{
12 "type": "Turn",
13 "transcript": "hello world",
14 "turn_is_formatted": false,
15 # Other transcript data...
16}

Step 5: Connect and start transcription

1

To stream audio, establish a connection to the API via WebSockets.

Create a WebSocket connection to the Realtime service.

1def run():
2 global audio, stream, ws_app, SAMPLE_RATE
3 # Skipping audio/microphone setup code...
4
5 # Create WebSocketApp
6 ws_app = websocket.WebSocketApp(
7 API_ENDPOINT,
8 header={"Authorization": f"Bearer {YOUR_API_KEY}"}, # Speechmatics uses Bearer token
9 on_open=on_open,
10 on_message=on_message,
11 on_error=on_error,
12 on_close=on_close,
13 )
14
15 # Run WebSocketApp in a separate thread to allow main thread to catch KeyboardInterrupt
16 ws_thread = threading.Thread(target=lambda: ws_app.run_forever(ping_interval=30, ping_timeout=10))
17 ws_thread.daemon = True
18 ws_thread.start()
Authorization

Note that while both services use an Authorization header to authenticate the WebSocket connection, Speechmatics uses a Bearer prefix, while AssemblyAI does not.

Step 6: Close the connection

1

Keep the main thread alive until interrupted, handle keyboard interrupts and thrown exceptions, and clean up upon closing of the WebSocket connection.

1def run():
2 global audio, stream, ws_app, SAMPLE_RATE
3 # Skipping audio/microphone setup and WebSocket connection code...
4
5 try:
6 # Keep main thread alive until interrupted
7 while ws_thread.is_alive():
8 time.sleep(0.1)
9 except KeyboardInterrupt:
10 print("\nCtrl+C received. Stopping...")
11 stop_event.set() # Signal audio thread to stop
12
13 # Send EndOfStream message to the server
14 if ws_app and ws_app.sock and ws_app.sock.connected:
15 try:
16 end_message = {
17 "message": "EndOfStream",
18 "last_seq_no": audio_seq_no
19 }
20 print(f"Sending termination message: {json.dumps(end_message)}")
21 ws_app.send(json.dumps(end_message))
22 # Give a moment for messages to process before forceful close
23 time.sleep(1)
24 except Exception as e:
25 print(f"Error sending termination message: {e}")
26
27 # Close the WebSocket connection (will trigger on_close)
28 if ws_app:
29 ws_app.close()
30
31 # Wait for WebSocket thread to finish
32 ws_thread.join(timeout=2.0)
33
34 except Exception as e:
35 print(f"\nAn unexpected error occurred: {e}")
36 stop_event.set()
37 if ws_app:
38 ws_app.close()
39 ws_thread.join(timeout=2.0)
40
41 finally:
42 # Final cleanup (already handled in on_close, but good as a fallback)
43 if stream and stream.is_active():
44 stream.stop_stream()
45 if stream:
46 stream.close()
47 if audio:
48 audio.terminate()
49 print("Cleanup complete. Exiting.")

The connection will close automatically when you press Ctrl+C. In both cases, the on_close handler will clean up the audio resources.

Step 7: Execute the main function

Finally, run the main function to start the main execution.

1if __name__ == "__main__":
2 run()

Next steps

To learn more about both Streaming APIs, their key differences, and how to best migrate, see the following resources:

AssemblyAI

Speechmatics

Need some help?

If you get stuck or have any other questions, contact our support team at support@assemblyai.com or create a support ticket.