Coverage for sources/librovore/search.py: 18%
53 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 22:48 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 22:48 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Centralized search engine for universal matching across processors. '''
24import re as _re
26import rapidfuzz as _rapidfuzz
28from . import __
29from . import interfaces as _interfaces
32class SearchResult( __.immut.DataclassObject ):
33 ''' Represents a search match with score and metadata. '''
35 object: dict[ str, __.typx.Any ]
36 score: float
37 match_reasons: list[ str ]
40def filter_by_name(
41 objects: list[ dict[ str, __.typx.Any ] ],
42 query: str, /, *,
43 match_mode: _interfaces.MatchMode = _interfaces.MatchMode.Fuzzy,
44 fuzzy_threshold: int = 50,
45) -> list[ SearchResult ]:
46 ''' Filter objects by name using specified match mode. '''
47 if not query:
48 # Empty query returns all objects with neutral score
49 return [
50 SearchResult(
51 object = obj, score = 1.0,
52 match_reasons = [ 'empty query' ] )
53 for obj in objects
54 ]
56 query_lower = query.lower( )
57 results: list[ SearchResult ] = [ ]
59 if match_mode == _interfaces.MatchMode.Exact:
60 results = _filter_exact( objects, query_lower )
61 elif match_mode == _interfaces.MatchMode.Regex:
62 results = _filter_regex( objects, query )
63 elif match_mode == _interfaces.MatchMode.Fuzzy:
64 results = _filter_fuzzy(
65 objects, query_lower, fuzzy_threshold )
67 return sorted( results, key = lambda r: r.score, reverse = True )
70def _filter_exact(
71 objects: list[ dict[ str, __.typx.Any ] ],
72 query_lower: str
73) -> list[ SearchResult ]:
74 ''' Apply exact matching to objects. '''
75 results: list[ SearchResult ] = [ ]
76 for obj in objects:
77 obj_name_lower = obj[ 'name' ].lower( )
78 if query_lower in obj_name_lower:
79 # Score based on how well the query matches
80 if obj_name_lower == query_lower:
81 score = 1.0
82 reason = 'exact name match'
83 elif obj_name_lower.startswith( query_lower ):
84 score = 0.9
85 reason = 'name starts with query'
86 else:
87 score = 0.7
88 reason = 'name contains query'
90 results.append( SearchResult(
91 object = obj, score = score, match_reasons = [ reason ] ) )
92 return results
95def _filter_regex(
96 objects: list[ dict[ str, __.typx.Any ] ],
97 query: str
98) -> list[ SearchResult ]:
99 ''' Apply regex matching to objects. '''
100 try:
101 pattern = _re.compile( query, _re.IGNORECASE )
102 except _re.error:
103 # Invalid regex, return no results
104 return [ ]
106 return [
107 SearchResult(
108 object = obj, score = 1.0, match_reasons = [ 'regex match' ] )
109 for obj in objects if pattern.search( obj[ 'name' ] )
110 ]
113def _filter_fuzzy(
114 objects: list[ dict[ str, __.typx.Any ] ],
115 query_lower: str,
116 fuzzy_threshold: int
117) -> list[ SearchResult ]:
118 ''' Apply fuzzy matching to objects using rapidfuzz. '''
119 results: list[ SearchResult ] = [ ]
121 for obj in objects:
122 obj_name = obj[ 'name' ]
123 obj_name_lower = obj_name.lower( )
125 # Use rapidfuzz ratio for basic fuzzy matching
126 ratio = _rapidfuzz.fuzz.ratio( query_lower, obj_name_lower )
128 if ratio >= fuzzy_threshold:
129 # Normalize score to 0.0-1.0 range
130 score = ratio / 100.0
131 results.append( SearchResult(
132 object = obj,
133 score = score,
134 match_reasons = [ f'fuzzy match ({ratio}%)' ]
135 ) )
137 return results
140def score_by_relevance(
141 results: list[ SearchResult ],
142 query: str
143) -> list[ SearchResult ]:
144 ''' Score and rank results by relevance (already done by filter_by_name).
145 '''
146 # Results are already scored and sorted by filter_by_name
147 # This method exists for future enhancement opportunities
148 return results