# -*- coding: utf-8 -*-
import operator
from typing import Any, Callable, Iterable, Iterator, Optional, Tuple, Union
import re
from . import export
COLORS_TO_ANSI = {
"red": "\033[41m",
"green": "\033[42m",
"yellow": "\033[43m",
"blue": "\033[44m",
"magenta": "\033[45m",
"cyan": "\033[46m",
}
ANSI_RESET = "\033[0m"
def highlight_substring(
substring: str,
string: str,
highlight: Union[bool, str, Tuple[str, str]],
ignore_case: bool,
) -> str:
"""Apply `highlight` to contiguous chunks of `substring` within `string`."""
assert highlight, "called function when 'highlight' was falsy"
default_highlight = "green"
if highlight is True:
highlight = default_highlight
if isinstance(highlight, str):
if not (ansi_code := COLORS_TO_ANSI.get(highlight.lower())):
raise ValueError(f"highlight, if a string, must be one of: {str(list(COLORS_TO_ANSI)).strip('[]')}")
highlight = ansi_code, ANSI_RESET
assert isinstance(highlight, tuple), "incorrect type for 'highlight'"
# We apply to the main string, pairwise, styling prefixes and suffixes
# which mark the matched substring. We iterate over both the main string
# and the substring exactly once, and make sure that prefix-suffix pairs
# wrap entire chunks of adjacent matched characters as opposed to each
# character individually.
prefix, suffix = highlight
highlighted_string = ""
# We use an iterator for iterating over the main string. This ensures that:
# (1) for each substring character, we start iteration where we stopped for
# the previous substring character;
# (2) after matching the final substring character, the remainder of the
# main string's characters is preserved within the iterator.
string_iter = iter(string)
unpaired_prefix = False
for substring_char in substring:
for string_char in string_iter:
match = string_char == substring_char
case_insensitive_match = string_char.lower() == substring_char.lower()
if match or (ignore_case and case_insensitive_match):
if not unpaired_prefix:
highlighted_string += prefix
unpaired_prefix = True
highlighted_string += string_char
break
else:
if unpaired_prefix:
highlighted_string += suffix
unpaired_prefix = False
highlighted_string += string_char
# If the final match is between the last characters of both strings,
# a prefix is left unpaired, so we check for it here.
if unpaired_prefix:
highlighted_string += suffix
remainder = "".join(string_iter)
highlighted_string += remainder
return highlighted_string
[docs]
@export
def fuzzyfinder(
input: str,
collection: Iterable[Any],
accessor: Optional[Callable[[Any], str]] = None,
sort_results: bool = True,
ignore_case: bool = True,
highlight: Union[bool, str, Tuple[str, str]] = False,
) -> Iterator[Any]:
"""Filter a collection of objects by fuzzy matching against an input string.
Args:
input:
A partial string which is typically entered by a user.
collection:
An iterable of objects which will be filtered based on the
``input``. If the objects are not strings, an appropriate
``accessor`` keyword argument must be passed.
accessor:
If the ``collection`` is not an iterable of strings, then use the
accessor to fetch the string that will be used for fuzzy matching.
Defaults to the identity function (simply returns the argument it
received as input).
sort_results:
The suggestions are sorted by considering the smallest contiguous
match, followed by where the match is found in the full string. If
two suggestions have the same rank, they are then sorted
alpha-numerically. This parameter controls the
*last tie-breaker-alpha-numeric sorting*. The sorting based on
match length and position will be intact.
Defaults to ``True``.
ignore_case:
If this parameter is set to ``False``, the filtering is
case-sensitive.
Defaults to ``True``.
highlight:
Highlight the matching substrings when printed to the terminal, or
elsewhere.
If ``True``, when printed to the terminal, the matching substrings
will have the default highlight (*green*).
If this parameter is a ``str``, when printed to the terminal, the
matching substrings will be highlighted in the corresponding color.
Accepted values are: ``'red'``, ``'green'``, ``'yellow'``,
``'blue'``, ``'magenta'``, and ``'cyan'``.
If this parameter is a ``tuple``, it must be a 2-tuple consisting
of a prefix and a suffix string. The prefix and suffix are
prepended and appended to contiguous substring chunks, and can
range from anything like parentheses or ANSI escape codes to HTML
tags with class or style attributes.
Defaults to ``False``.
Returns:
Iterator[Any]:
A generator object that produces a list of suggestions narrowed
down from ``collection`` using the ``input``.
Example:
>>> suggestions = fuzzyfinder('abc', ['acb', 'defabca', 'abcd', 'aagbec', 'xyz', 'qux'])
>>> list(suggestions)
['abcd', 'defabca', 'aagbec']
"""
suggestions = []
input = str(input) if not isinstance(input, str) else input
pat = ".*?".join(map(re.escape, input))
pat = "(?=({0}))".format(pat) # lookahead regex to manage overlapping matches
regex = re.compile(pat, re.IGNORECASE if ignore_case else 0)
for item in collection:
accessed_item = item if accessor is None else accessor(item)
r = tuple(regex.finditer(accessed_item))
if r:
best = min(r, key=lambda x: len(x.group(1))) # find shortest match
suggestions.append((len(best.group(1)), best.start(), accessed_item, item))
if sort_results:
suggestions.sort()
else:
suggestions.sort(key=lambda x: x[:2])
results: Iterator[Any] = map(operator.itemgetter(-1), suggestions)
if highlight:
results = (highlight_substring(input, result, highlight, ignore_case) for result in results)
return results