Multiplayer Voice Chat Flickering/Distortion

Godot Version:

4.3.stable

Question:

I have been trying to create a real time voice chat system for my backrooms game. I have been trying for the past 3 days to get this to smoothly work but every single new system I make comes out sounding all flickery, distorted or pitched incorrectly. The code below is my best system that I have been able to come up with it somewhat works.I have tried most online tutorials for this. Even using the demos and following the tutorials they still come out sounding similar to this system.
Originally I thought it was microphone not going into Godot correctly but when using AudioEffectRecorders or just un-muting the mic channel my microphone comes through just fine it seems to be the way either I am using AudioEffectCaptures or the way AudioEffectCaptures deal with the audio. Any help would be greatly appreciated as I would like to just get this working so I can move on.

extends Node

@export var voice_chat_active:bool = false

const VOICE_CHAT_CHANNEL = 4
const AMPLIFY_GAIN = 8.0
const PACKET_SIZE = 1024
const MAX_BUFFERED_FRAMES = 4096
const SINE_FREQ = 440.0
const SINE_AMP = 0.05

var sine_phase := 0.0
var is_sending_sine := false
var is_talking := false
var effect: AudioEffectCapture

@onready var rendered_subviewport := get_tree().root.get_node("MainViewport/Rendered")
@onready var microphone := $VoiceInput
var peer_buffers := {}

func _ready():
	if voice_chat_active:
		if is_multiplayer_authority():
			# Microphone input setup
			microphone.stream = AudioStreamMicrophone.new()
			microphone.play()
			var idx = AudioServer.get_bus_index("Record")
			effect = AudioServer.get_bus_effect(idx, 0)
			effect.buffer_length = 0.1
 
		# Local playback setup
		var gen := AudioStreamGenerator.new()
		gen.mix_rate = AudioServer.get_mix_rate()
		gen.buffer_length = 2.0
		$VoicePlayback.stream = gen
		$VoicePlayback.play()
	else:
		set_process(false)

func _process(_delta):
	if is_multiplayer_authority():
		is_talking = Input.is_action_pressed("push_to_talk")
		is_sending_sine = Input.is_action_pressed("push_to_sine")

		if is_talking:
			stream_mic_live()
		if is_sending_sine:
			stream_sine_live()

	# Smoothly feed local playback
	play_local_voiceback()

func stream_mic_live():
	if effect and effect.can_get_buffer(PACKET_SIZE):
		var audio_data = effect.get_buffer(PACKET_SIZE)
		var mono_data = PackedFloat32Array()
		for frame in audio_data:
			var sample = clamp(frame.x * AMPLIFY_GAIN, -1.0, 1.0)
			mono_data.append(sample)
		
		# Buffer locally
		buffer_audio(multiplayer.get_unique_id(), mono_data)

		# Send to peers
		rpc_id(0, "receive_voice_data", multiplayer.get_unique_id(), mono_data)

func stream_sine_live():
	var sample_rate = AudioServer.get_mix_rate()
	var sine_data = PackedFloat32Array()
	sine_data.resize(PACKET_SIZE)
	for i in range(PACKET_SIZE):
		var sine_sample = sin(sine_phase) * SINE_AMP
		sine_data[i] = sine_sample
		sine_phase += 2.0 * PI * SINE_FREQ / sample_rate
		if sine_phase > 2.0 * PI:
			sine_phase -= 2.0 * PI
	
	buffer_audio(multiplayer.get_unique_id(), sine_data)
	rpc_id(0, "receive_voice_data", multiplayer.get_unique_id(), sine_data)

func play_local_voiceback():
	var id = multiplayer.get_unique_id()
	if not peer_buffers.has(id):
		return

	var playback = $VoicePlayback.get_stream_playback()
	if not playback or not playback.can_push_buffer(PACKET_SIZE):
		return

	var buffer = peer_buffers[id]
	var to_push = min(PACKET_SIZE, buffer.size())
	if to_push == 0:
		return

	for i in range(to_push):
		var s = buffer.pop_front()
		playback.push_frame(Vector2(s, s))

@rpc("any_peer", "call_remote", "reliable", VOICE_CHAT_CHANNEL)
func receive_voice_data(peer_id: int, mono_data: PackedFloat32Array):
	buffer_audio(peer_id, mono_data)

func buffer_audio(peer_id: int, data: PackedFloat32Array):
	if not peer_buffers.has(peer_id):
		peer_buffers[peer_id] = []

	var buf = peer_buffers[peer_id]

	# Prevent overflow
	if buf.size() + data.size() > MAX_BUFFERED_FRAMES:
		var overflow = buf.size() + data.size() - MAX_BUFFERED_FRAMES
		for i in range(overflow):
			buf.pop_front()

	for i in range(data.size()):
		buf.append(data[i])

	# If this is a remote peer, make sure playback is active
	if peer_id != multiplayer.get_unique_id():
		var node_path = str(peer_id) + "/VoiceChatManager/VoicePlayback"
		if rendered_subviewport.has_node(node_path):
			var playback = rendered_subviewport.get_node(node_path).get_stream_playback()
			if playback and playback.can_push_buffer(PACKET_SIZE):
				for i in range(min(PACKET_SIZE, buf.size())):
					var sample = buf.pop_front()
					playback.push_frame(Vector2(sample, sample))

This function may return -1 if there are not enough frames to fill the requested. I would probably put some extra checking here. Maybe return early of there is no audio to send.

Another thing is that im not sure you handle underflow, i.e. you should buffer some audio before starting, two periods at least.

Also you didn’t specify the sample rate used here, this could have a large impact if you are using a high sample rate like 44.1.

Well I implemented those fixes you suggested which were actually very useful buttttt it turns out the issue was sample rate mismatches. I thought I completely eliminated the possibility of that being an issue but it turns out since I am using Steelseries Sonar to manage my audio it defaults my sample rate to 96kHz for some reason and my mic uses 48kHz. So it completely breaks my system. I tried to resample the audio but it was not happy doing that it broke even more. My solution was that since my headphones are 48kHz which matches the output and it completely fixes all my issues.
This doesn’t feel like a great solution but for now I think it is good enough and will keep you updated if I come up with a better solution.

1 Like