Coverage for sources/librovore/search.py: 13%
49 statements
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-29 01:14 +0000
« prev ^ index » next coverage.py v7.10.5, created at 2025-08-29 01:14 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Centralized search engine for universal matching across processors. '''
24import re as _re
26import rapidfuzz as _rapidfuzz
28from . import __
29from . import interfaces as _interfaces
30from . import results as _results
33def filter_by_name(
34 objects: __.cabc.Sequence[ _results.InventoryObject ],
35 query: str, /, *,
36 match_mode: _interfaces.MatchMode = _interfaces.MatchMode.Fuzzy,
37 fuzzy_threshold: int = 50,
38) -> tuple[ _results.SearchResult, ... ]:
39 ''' Filter objects by name using specified match mode. '''
40 if not query:
41 # Empty query returns all objects with neutral score
42 return tuple(
43 _results.SearchResult.from_inventory_object(
44 obj, score = 1.0, match_reasons = [ 'empty query' ] )
45 for obj in objects
46 )
48 query_lower = query.lower( )
49 results: list[ _results.SearchResult ] = [ ]
51 if match_mode == _interfaces.MatchMode.Exact:
52 results = _filter_exact( objects, query_lower )
53 elif match_mode == _interfaces.MatchMode.Regex:
54 results = _filter_regex( objects, query )
55 elif match_mode == _interfaces.MatchMode.Fuzzy:
56 results = _filter_fuzzy(
57 objects, query_lower, fuzzy_threshold )
59 sorted_results = sorted( results, key = lambda r: r.score, reverse = True )
60 return tuple( sorted_results )
63def _filter_exact(
64 objects: __.cabc.Sequence[ _results.InventoryObject ],
65 query_lower: str
66) -> list[ _results.SearchResult ]:
67 ''' Apply exact matching to objects. '''
68 results: list[ _results.SearchResult ] = [ ]
69 for obj in objects:
70 obj_name_lower = obj.name.lower( )
71 if query_lower in obj_name_lower:
72 # Score based on how well the query matches
73 if obj_name_lower == query_lower:
74 score = 1.0
75 reason = 'exact name match'
76 elif obj_name_lower.startswith( query_lower ):
77 score = 0.9
78 reason = 'name starts with query'
79 else:
80 score = 0.7
81 reason = 'name contains query'
83 results.append( _results.SearchResult.from_inventory_object(
84 obj, score = score, match_reasons = [ reason ] ) )
85 return results
88def _filter_regex(
89 objects: __.cabc.Sequence[ _results.InventoryObject ],
90 query: str
91) -> list[ _results.SearchResult ]:
92 ''' Apply regex matching to objects. '''
93 try:
94 pattern = _re.compile( query, _re.IGNORECASE )
95 except _re.error:
96 # Invalid regex, return no results
97 return [ ]
99 return [
100 _results.SearchResult.from_inventory_object(
101 obj, score = 1.0, match_reasons = [ 'regex match' ] )
102 for obj in objects if pattern.search( obj.name )
103 ]
106def _filter_fuzzy(
107 objects: __.cabc.Sequence[ _results.InventoryObject ],
108 query_lower: str,
109 fuzzy_threshold: int
110) -> list[ _results.SearchResult ]:
111 ''' Apply fuzzy matching to objects using rapidfuzz. '''
112 results: list[ _results.SearchResult ] = [ ]
114 for obj in objects:
115 obj_name = obj.name
116 obj_name_lower = obj_name.lower( )
118 # Use rapidfuzz ratio for basic fuzzy matching
119 ratio = _rapidfuzz.fuzz.ratio( query_lower, obj_name_lower )
121 if ratio >= fuzzy_threshold:
122 # Normalize score to 0.0-1.0 range
123 score = ratio / 100.0
124 results.append( _results.SearchResult.from_inventory_object(
125 obj,
126 score = score,
127 match_reasons = [ f'fuzzy match ({ratio}%)' ]
128 ) )
130 return results