Compute shader RenderingDevice buffer_update is hanging

Godot Version

4.0.3

Question

I’m trying to use a compute shader to generate 3D noise at each vertex of a mesh I’ve constructed, to offset the position of that vertex, for the purposes of creating a procedurally generated planet. At first I had some trouble with the fact I plan for the number of vertices to change during runtime, causing buffers to change sizes. I got around this by resizing the PackedByteArray inputs to those to a really high number (in this case 12810241024 (128 megabytes)). When calling buffer_update to update my buffer storing the vertices, the thread its running on hangs (seemingly indefinitely, but its impossible to tell). I put print statements in the code to determine what exactly it was hanging on. The code is as follows:

extends MeshInstance3D

var arr_mesh
var last_t
var octahedron
var camera
var semaphore
var thread

var rd: RenderingDevice
var shader: RID
var pipeline: RID
var compute_list: int
var bindings: Array
var uniform_set: RID

var vertex_buffer: RID
var height_buffer: RID

@export var base_height = 0.95
@export var height_scale = 0.05
@export var radius = 10.0
@export var subdivisions = 18;
# Called when the node enters the scene tree for the first time.
func _ready():
	setup_shader()
	semaphore = Semaphore.new()
	thread = Thread.new()
	thread.start(extra_thread)
	octahedron = create_octahedron()
	arr_mesh = subdivide_tri(octahedron, get_all_tri_indices(octahedron), subdivisions)
	self.mesh = arr_mesh
	camera = self.get_parent().get_parent().find_child("Camera3D")

func _physics_process(delta):
	semaphore.post()

func extra_thread():
	while true:
		semaphore.wait()
		
		arr_mesh = subdivide_tri(octahedron, get_all_tri_indices(octahedron), subdivisions)
		self.mesh = arr_mesh
		
		print("should go")
		var heights = calculate_heights(arr_mesh.get_faces())
		print(heights)

func get_UVs(v1:Vector3,v2:Vector3,v3:Vector3):
	var phi1 = -asin(v1.y)
	var phi2 = -asin(v2.y)
	var phi3 = -asin(v3.y)
	var theta1 = -atan2(v1.x, v1.z)
	var theta2 = -atan2(v2.x, v2.z)
	var theta3 = -atan2(v3.x, v3.z)
	
	var r1 = (PI/2 - abs(phi1))*2/PI
	var r2 = (PI/2 - abs(phi2))*2/PI
	var r3 = (PI/2 - abs(phi3))*2/PI

	var sgn1 = sign(phi1)
	var sgn2 = sign(phi2)
	var sgn3 = sign(phi3)
	
	if sgn1 == 0:
		if sgn3 == 0:
			sgn1 = sgn2
		else:
			sgn1 = sgn3
	
	if sgn2 == 0:
		if sgn1 == 0:
			sgn2 = sgn3
		else:
			sgn2 = sgn1
			
	if sgn3 == 0:
		if sgn2 == 0:
			sgn3 = sgn1
		else:
			sgn3 = sgn2
	
	var U1 = (-sin(theta1)*r1 + sgn1 + 2)/4
	var V1 = (cos(theta1)*r1 + 1)/2
	var U2 = (-sin(theta2)*r2 + sgn2 + 2)/4
	var V2 = (cos(theta2)*r2 + 1)/2
	var U3 = (-sin(theta3)*r3 + sgn3 + 2)/4
	var V3 = (cos(theta3)*r3 + 1)/2
	return PackedVector2Array([Vector2(U1,V1), Vector2(U2,V2), Vector2(U3,V3)])

func create_octahedron():
	var st = SurfaceTool.new()
	st.begin(Mesh.PRIMITIVE_TRIANGLES)
	
	st.add_vertex(Vector3(0,0,-1))
	st.add_vertex(Vector3(0,1,0))
	st.add_vertex(Vector3(-1,0,0))
	
	st.add_vertex(Vector3(0,0,-1))
	st.add_vertex(Vector3(0,-1,0))
	st.add_vertex(Vector3(1,0,0))
	
	st.add_vertex(Vector3(0,0,-1))
	st.add_vertex(Vector3(1,0,0))
	st.add_vertex(Vector3(0,1,0))
	
	st.add_vertex(Vector3(0,0,-1))
	st.add_vertex(Vector3(-1,0,0))
	st.add_vertex(Vector3(0,-1,0))
	
	st.add_vertex(Vector3(0,0,1))
	st.add_vertex(Vector3(-1,0,0))
	st.add_vertex(Vector3(0,1,0))
	
	st.add_vertex(Vector3(0,0,1))
	st.add_vertex(Vector3(1,0,0))
	st.add_vertex(Vector3(0,-1,0))
	
	st.add_vertex(Vector3(0,0,1))
	st.add_vertex(Vector3(0,1,0))
	st.add_vertex(Vector3(1,0,0))
	
	st.add_vertex(Vector3(0,0,1))
	st.add_vertex(Vector3(0,-1,0))
	st.add_vertex(Vector3(-1,0,0))
	
	st.index()
	return st.commit()

func get_all_tri_indices(mesh: ArrayMesh):
	return PackedInt32Array(range(mesh.get_faces().size()/3))

func subdivide_tri(mesh: ArrayMesh, tri_indices: PackedInt32Array, sub: int):
	var vertices = mesh.get_faces()
	var new_vertices = PackedVector3Array()
	if sub == 1 or tri_indices.is_empty():
		return mesh
	for i in range(tri_indices.size()):
		#fill out the mesh inbetween
		var old_vertices
		if i == 0:
			old_vertices = vertices.slice(0, tri_indices[i]*3)
		else:
			old_vertices = vertices.slice(tri_indices[i-1]*3 + 3, tri_indices[i]*3)
		for vertex in old_vertices:
			new_vertices.append(vertex)
		
		#compute the division
		var a = vertices[tri_indices[i]*3]
		var b = vertices[tri_indices[i]*3 +1]
		var c = vertices[tri_indices[i]*3 +2]
		
		#handle half of the tris
		for u in range(sub+1):
			var u1 = float(u)/sub
			var u2 = float(u+1)/sub
			var u3 = float(u)/sub
			for v in range(0, sub-u):
				var v1 = float(v)/sub
				var v2 = float(v)/sub
				var v3 = float(v+1)/sub
				var coords1 = u1*c + v1*a + (1-u1-v1)*b
				var coords2 = u2*c + v2*a + (1-u2-v2)*b
				var coords3 = u3*c + v3*a + (1-u3-v3)*b
				new_vertices.append(coords1)
				new_vertices.append(coords2)
				new_vertices.append(coords3)
		
		#handle the other half of the tris
		for u in range(1,sub):
			var u1 = float(u)/sub
			var u2 = float(u)/sub
			var u3 = float(u-1)/sub
			for v in range(0, sub-u):
				var v1 = float(v)/sub
				var v2 = float(v+1)/sub
				var v3 = float(v+1)/sub
				var coords1 = u1*c + v1*a + (1-u1-v1)*b
				var coords2 = u2*c + v2*a + (1-u2-v2)*b
				var coords3 = u3*c + v3*a + (1-u3-v3)*b
				new_vertices.append(coords1)
				new_vertices.append(coords2)
				new_vertices.append(coords3)
		
	var final_vertices = vertices.slice(tri_indices[tri_indices.size()-1]*3 +3)
	for vertex in final_vertices:
		new_vertices.append(vertex)
		
	
	var st = SurfaceTool.new()
	st.begin(Mesh.PRIMITIVE_TRIANGLES)
	for vertex in new_vertices:
		st.add_vertex(vertex)
		
	st.index()
	st.generate_normals()
	var output = st.commit()
	return output

func setup_shader():
	rd = RenderingServer.create_local_rendering_device()
	var shader_file := preload("res://computeHeights.glsl")
	var shader_spirv := shader_file.get_spirv()
	shader = rd.shader_create_from_spirv(shader_spirv)
	
	pipeline = rd.compute_pipeline_create(shader)
	compute_list = rd.compute_list_begin()
	rd.compute_list_bind_compute_pipeline(compute_list, pipeline)
	
	var vertices_floats = PackedFloat32Array([0.0])
	var vertices_bytes = vertices_floats.to_byte_array()
	vertices_bytes.resize(134217728)
	
	var heights := PackedFloat32Array([0.0])
	var heights_bytes := heights.to_byte_array()
	heights_bytes.resize(134217728)
	
	vertex_buffer = rd.storage_buffer_create(134217728, vertices_bytes)
	height_buffer = rd.storage_buffer_create(134217728, heights_bytes)
	
	bindings = [generate_uniform(vertex_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 0), \
	generate_uniform(height_buffer, RenderingDevice.UNIFORM_TYPE_STORAGE_BUFFER, 1)]
	
	uniform_set = rd.uniform_set_create(bindings, shader, 0)
	
func ComputeHeights(vertices: PackedVector3Array):
	print("0")
	var vertices_floats = PackedFloat32Array()
	for vertex in vertices:
		vertices_floats.append(vertex.x)
		vertices_floats.append(vertex.y)
		vertices_floats.append(vertex.z)
		vertices_floats.append(0.0)
	var vertices_bytes = vertices_floats.to_byte_array()
	
	print("1")
	rd.buffer_update(vertex_buffer, 0, vertices_bytes.size(), vertices_bytes)
	
	print("2")
	compute_list = rd.compute_list_begin()
	rd.compute_list_bind_compute_pipeline(compute_list, pipeline)
	rd.compute_list_bind_uniform_set(compute_list, uniform_set, 0)
	rd.compute_list_dispatch(compute_list, ceil(vertices.size()/1024.0), 1, 1)
	rd.compute_list_end()
	
	print("3")
	rd.submit()

func calculate_heights(vertices: PackedVector3Array):
	ComputeHeights(vertices)
	rd.sync()
	var height_bytes = rd.buffer_get_data(height_buffer)
	var heights = height_bytes.to_float32_array()
	return heights

func generate_uniform(data_buffer, type, bind: int):
	var uniform := RDUniform.new()
	uniform.uniform_type = type
	uniform.binding = bind # this needs to match the "binding" in our shader file
	uniform.add_id(data_buffer)
	return uniform
	
func _exit_tree():
	rd.free_rid(pipeline)

	rd.free_rid(uniform_set)

	rd.free_rid(vertex_buffer)

	rd.free_rid(height_buffer)

	rd.free()

the GLSL code for the compute shader isn’t relevent, I already tested that it works without trying to update the buffer.
if anyone can solve this for me, that would be great
it would also be great if I maybe could get an additional solution that isnt a workaround, to the problem of dynamic amounts of vertices.