Fuzzy Search In Games?

Godot Version

Replace this line with your Godot version

Question

Hellooooooooooo!
This is going to be sort of weird, i dont know if im phrasing my quiestion correct but here goes.

I want to have my charecter be able to look at a stack of something, and the player can search something in there, but it wouldnt need to be exact (fuzzy search). Is there any way i can do that without having to put the multitude of different variations in and running a check for each one? Is there just a simple way to enable fuzzy search so the charecter can search for the name of something but not get it exact? (i only want to be able to search for a few things, and id give the player hints on what to type)

I find String’s is_subsequence_ofn to be a good fuzzy-ish search function

such as

item.visible = search_query.is_subsequence_ofn(item.name)

Godot’s String also has similarity() function that can help with fuzzy search. You can put all candidates into an array and sort it by similarity.

You can also just implement a fuzzy string search function yourself if you want, there’s plenty of tutorials for it in other programming languages.

Hopefully this helps. I’m using it for an editor plugin where I wanted to recreate the script list fuzzy search.

fuzzy_search.gd
## fuzzy_search.gd
## Near-direct port of Godot's core/string/fuzzy_search.{h,cpp} to GDScript.
## Reproduced API:  FuzzySearch, FuzzySearchToken, FuzzyTokenMatch, FuzzySearchResult.
extends RefCounted

## Note: If `class_name FuzzySearch` is enabled, inner classes can access static
## helpers directly, and the namespace indirection inside them can be removed.
#class_name FuzzySearch

const CULL_FACTOR: float = 0.1
const CULL_CUTOFF: float = 30.0

var tokens: Array[FuzzySearchToken] = []
var case_sensitive: bool = false

var start_offset: int = 0
var max_results: int = 100
var max_misses: int = 2
var allow_subsequences: bool = true


static func _is_boundary_codepoint(cp: int) -> bool:
	return cp == 47 or cp == 92 or cp == 45 or cp == 95 or cp == 46 # "/\\-_."


static func _is_valid_interval(iv: Vector2i) -> bool:
	# Empty intervals are represented as (-1, -1).
	return iv.x >= 0 and iv.y >= iv.x


static func _extend_interval(a: Vector2i, b: Vector2i) -> Vector2i:
	if not _is_valid_interval(a):
		return b
	if not _is_valid_interval(b):
		return a
	return Vector2i(min(a.x, b.x), max(a.y, b.y))


static func _is_word_boundary(s: String, index: int) -> bool:
	if index == -1 or index == s.length():
		return true
	return _is_boundary_codepoint(s.unicode_at(index))


static func _find_codepoint(s: String, cp: int, from: int) -> int:
	# Equivalent to C++ String::find_char(cp, from)
	var ch := String.chr(cp)
	var n := s.length()
	for i in range(max(from, 0), n):
		if s[i] == ch:
			return i
	return -1


func set_query(p_query: String, p_case_sensitive: bool = (not _is_lowercase(p_query))) -> void:
	tokens.clear()
	case_sensitive = p_case_sensitive

	for s in p_query.split(" ", false):
		var t := FuzzySearchToken.new()
		t.idx = tokens.size()
		t.string = s if p_case_sensitive else s.to_lower()
		tokens.append(t)

	# Prioritize matching longer tokens before shorter ones since match overlaps are not accepted.
	tokens.sort_custom(
		func(a: FuzzySearchToken, b: FuzzySearchToken) -> bool:
			if a.string.length() == b.string.length():
				return a.idx < b.idx
			return a.string.length() > b.string.length()
	)


func search(p_target: String, p_result: FuzzySearchResult) -> bool:
	p_result._reset_for_search(p_target, p_target.rfind("/"), max_misses)

	var adjusted_target := p_target if case_sensitive else p_target.to_lower()

	# Eagerly generate matches for each token, keep best-scoring non-conflicting one.
	for token in tokens:
		var best_match: FuzzyTokenMatch = null
		var offset := start_offset

		while true:
			var m := FuzzyTokenMatch.new()
			var ok := false

			if allow_subsequences:
				ok = token.try_fuzzy_match(m, adjusted_target, offset, p_result.miss_budget)
			else:
				ok = token.try_exact_match(m, adjusted_target, offset)

			if not ok:
				break

			if p_result.can_add_token_match(m):
				p_result.score_token_match(m, m.is_case_insensitive(p_target, adjusted_target))
				if best_match == null or best_match.score < m.score:
					best_match = m

			if self._is_valid_interval(m.interval):
				offset = m.interval.x + 1
			else:
				break

		if best_match == null:
			return false

		p_result.add_token_match(best_match)

	p_result.maybe_apply_score_bonus()
	return true


func search_all(p_targets: PackedStringArray, p_results: Array) -> void:
	p_results.clear()

	for i in range(p_targets.size()):
		var r := FuzzySearchResult.new()
		r.original_index = i
		if search(p_targets[i], r):
			p_results.append(r)

	_sort_and_filter(p_results)


static func _remove_low_scores(p_results: Array, p_cull_score: float) -> void:
	# Removes all results with score < p_cull_score in-place (two pointers).
	var i := 0
	var j := p_results.size() - 1

	while true:
		while j >= i and p_results[j].score < p_cull_score:
			j -= 1
		while i < j and p_results[i].score >= p_cull_score:
			i += 1
		if i >= j:
			break
		var tmp = p_results[i]
		p_results[i] = p_results[j]
		p_results[j] = tmp
		i += 1
		j -= 1

	p_results.resize(j + 1)


func _sort_and_filter(p_results: Array) -> void:
	if p_results.is_empty():
		return

	var avg_score: float = 0.0
	var max_score: float = 0.0

	for r in p_results:
		avg_score += float(r.score)
		max_score = max(max_score, float(r.score))

	avg_score /= float(p_results.size())
	var cull_score: float = min(CULL_CUTOFF, lerp(avg_score, max_score, CULL_FACTOR))
	_remove_low_scores(p_results, cull_score)

	# Sort on (score desc, length asc, alphanumeric asc) for consistent ordering.
	p_results.sort_custom(
		func(a: FuzzySearchResult, b: FuzzySearchResult) -> bool:
			if a.score == b.score:
				if a.target.length() == b.target.length():
					return a.target < b.target
				return a.target.length() < b.target.length()
			return a.score > b.score
	)

	# En C++: partial_sort si > max_results. Ici: tri complet puis resize (même résultat, perf différente).
	if p_results.size() > max_results:
		p_results.resize(max_results)


func _is_lowercase(s: String) -> bool:
	return s == s.to_lower()


class FuzzySearchToken:
	const Namespace := preload("res://addons/yard/editor_only/namespace.gd")
	const FuzzySearch := Namespace.FuzzySearch

	var idx: int = -1
	var string: String = ""


	func try_exact_match(p_match: FuzzyTokenMatch, p_target: String, p_offset: int) -> bool:
		p_match._reset(idx, string.length())
		var match_idx := p_target.find(string, p_offset)
		if match_idx == -1:
			return false
		p_match.add_substring(match_idx, string.length())
		return true


	func try_fuzzy_match(p_match: FuzzyTokenMatch, p_target: String, p_offset: int, p_miss_budget: int) -> bool:
		p_match._reset(idx, string.length())

		var run_start := -1
		var run_len := 0

		# Search for the subsequence token in target starting from p_offset.
		# Record each substring for later scoring and display.
		var offset := p_offset
		var miss_budget := p_miss_budget

		for i in range(string.length()):
			var cp := string.unicode_at(i)
			var new_offset := FuzzySearch._find_codepoint(p_target, cp, offset)

			if new_offset < 0:
				miss_budget -= 1
				if miss_budget < 0:
					return false
			else:
				if run_start == -1 or offset != new_offset:
					if run_start != -1:
						p_match.add_substring(run_start, run_len)
					run_start = new_offset
					run_len = 1
				else:
					run_len += 1
				offset = new_offset + 1

		if run_start != -1:
			p_match.add_substring(run_start, run_len)

		return true


class FuzzyTokenMatch:
	const Namespace := preload("res://addons/yard/editor_only/namespace.gd")
	const FuzzySearch := Namespace.FuzzySearch

	var score: int = 0
	var substrings: Array[Vector2i] = [] # x: start index, y: length

	var matched_length: int = 0
	var token_length: int = 0
	var token_idx: int = -1
	var interval: Vector2i = Vector2i(-1, -1) # inclusive indices


	func _reset(p_token_idx: int, p_token_length: int) -> void:
		score = 0
		substrings.clear()
		matched_length = 0
		token_length = p_token_length
		token_idx = p_token_idx
		interval = Vector2i(-1, -1)


	func add_substring(p_substring_start: int, p_substring_length: int) -> void:
		substrings.append(Vector2i(p_substring_start, p_substring_length))
		matched_length += p_substring_length
		var substring_interval := Vector2i(p_substring_start, p_substring_start + p_substring_length - 1)
		interval = FuzzySearch._extend_interval(interval, substring_interval)


	func get_miss_count() -> int:
		return token_length - matched_length


	func intersects(p_other_interval: Vector2i) -> bool:
		if not FuzzySearch._is_valid_interval(interval) or not FuzzySearch._is_valid_interval(p_other_interval):
			return false
		return interval.y >= p_other_interval.x and interval.x <= p_other_interval.y


	func is_case_insensitive(p_original: String, p_adjusted: String) -> bool:
		for substr in substrings:
			var end := substr.x + substr.y
			for i in range(substr.x, end):
				if p_original[i] != p_adjusted[i]:
					return true
		return false


class FuzzySearchResult:
	const Namespace := preload("res://addons/yard/editor_only/namespace.gd")
	const FuzzySearch := Namespace.FuzzySearch

	var target: String = ""
	var score: int = 0
	var original_index: int = -1
	var dir_index: int = -1
	var token_matches: Array[FuzzyTokenMatch] = []

	var miss_budget: int = 0
	var match_interval: Vector2i = Vector2i(-1, -1)


	func _reset_for_search(p_target: String, p_dir_index: int, p_max_misses: int) -> void:
		target = p_target
		dir_index = p_dir_index
		miss_budget = p_max_misses
		score = 0
		match_interval = Vector2i(-1, -1)
		token_matches.clear()


	func can_add_token_match(p_match: FuzzyTokenMatch) -> bool:
		if p_match.get_miss_count() > miss_budget:
			return false

		if p_match.intersects(match_interval):
			if token_matches.size() == 1:
				return false
			for existing_match in token_matches:
				if existing_match.intersects(p_match.interval):
					return false

		return true


	func score_token_match(p_match: FuzzyTokenMatch, p_case_insensitive: bool) -> void:
		# Exact matches should almost always be prioritized over broken up matches.
		p_match.score = -20 * p_match.get_miss_count() - (3 if p_case_insensitive else 0)

		for substring in p_match.substrings:
			# Score longer substrings higher than short substrings.
			var substring_score: int = substring.y * substring.y

			# Score matches deeper in path higher than shallower matches.
			if substring.x > dir_index:
				substring_score *= 2

			# Score matches on a word boundary higher than matches within a word.
			if FuzzySearch._is_word_boundary(target, substring.x - 1) \
			or FuzzySearch._is_word_boundary(target, substring.x + substring.y):
				substring_score += 4

			# Score exact query matches higher than non-compact subsequence matches.
			if substring.y == p_match.token_length:
				substring_score += 100

			p_match.score += substring_score


	func add_token_match(p_match: FuzzyTokenMatch) -> void:
		score += p_match.score
		match_interval = FuzzySearch._extend_interval(match_interval, p_match.interval)
		miss_budget -= p_match.get_miss_count()
		token_matches.append(p_match)


	func maybe_apply_score_bonus() -> void:
		# Adds a small bonus to results which match tokens in the same order they appear in the query.
		var token_range_starts := PackedInt32Array()
		token_range_starts.resize(token_matches.size())

		for m in token_matches:
			token_range_starts[m.token_idx] = m.interval.x

		var last := token_range_starts[0]
		for i in range(1, token_matches.size()):
			if last > token_range_starts[i]:
				return
			last = token_range_starts[i]

		score += 1