Coverage for sources/librovore/search.py: 13%
63 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-28 22:09 +0000
« prev ^ index » next coverage.py v7.10.7, created at 2025-09-28 22:09 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Centralized search engine for universal matching across processors. '''
24import re as _re
26import rapidfuzz as _rapidfuzz
28from . import __
29from . import interfaces as _interfaces
30from . import results as _results
33_SEARCH_BEHAVIORS_DEFAULT = _interfaces.SearchBehaviors( )
34_EXACT_THRESHOLD_MIN = 95
37def filter_by_name(
38 objects: __.cabc.Sequence[ _results.InventoryObject ],
39 term: str, /, *,
40 search_behaviors: _interfaces.SearchBehaviors = _SEARCH_BEHAVIORS_DEFAULT,
41) -> tuple[ _results.SearchResult, ... ]:
42 ''' Filters objects by name using specified match mode and options. '''
43 if not term:
44 return tuple(
45 _results.SearchResult.from_inventory_object(
46 obj, score = 1.0, match_reasons = [ 'empty term' ] )
47 for obj in objects
48 )
50 results: list[ _results.SearchResult ] = [ ]
52 match search_behaviors.match_mode:
53 case _interfaces.MatchMode.Exact:
54 results = _filter_exact(
55 objects, term, search_behaviors.contains_term,
56 search_behaviors.case_sensitive )
57 case _interfaces.MatchMode.Pattern:
58 results = _filter_regex( objects, term )
59 case _interfaces.MatchMode.Similar:
60 results = _filter_similar(
61 objects, term, search_behaviors.similarity_score_min,
62 search_behaviors.contains_term,
63 search_behaviors.case_sensitive )
65 sorted_results = sorted( results, key = lambda r: r.score, reverse = True )
66 return tuple( sorted_results )
69def _filter_exact(
70 objects: __.cabc.Sequence[ _results.InventoryObject ],
71 term: str,
72 contains_term: bool,
73 case_sensitive: bool
74) -> list[ _results.SearchResult ]:
75 ''' Applies exact matching with partial_ratio for precision discovery. '''
76 results: list[ _results.SearchResult ] = [ ]
77 term_compare = term if case_sensitive else term.lower( )
78 for obj in objects:
79 obj_name_compare = obj.name if case_sensitive else obj.name.lower( )
80 if obj_name_compare == term_compare:
81 score = 1.0
82 reason = 'exact match'
83 elif contains_term:
84 partial_score = _rapidfuzz.fuzz.partial_ratio(
85 term_compare, obj_name_compare )
86 if partial_score >= _EXACT_THRESHOLD_MIN:
87 score = partial_score / 100.0
88 reason = f'partial match ({partial_score}%)'
89 else:
90 continue
91 else:
92 continue
93 results.append( _results.SearchResult.from_inventory_object(
94 obj, score = score, match_reasons = [ reason ] ) )
95 return results
98def _filter_regex(
99 objects: __.cabc.Sequence[ _results.InventoryObject ],
100 query: str
101) -> list[ _results.SearchResult ]:
102 ''' Apply regex matching to objects. '''
103 try:
104 pattern = _re.compile( query, _re.IGNORECASE )
105 except _re.error:
106 return [ ]
108 return [
109 _results.SearchResult.from_inventory_object(
110 obj, score = 1.0, match_reasons = [ 'regex match' ] )
111 for obj in objects if pattern.search( obj.name )
112 ]
115def _filter_similar(
116 objects: __.cabc.Sequence[ _results.InventoryObject ],
117 term: str,
118 similarity_score_min: int,
119 contains_term: bool,
120 case_sensitive: bool
121) -> list[ _results.SearchResult ]:
122 ''' Applies similar matching with partial_ratio for discovery. '''
123 results: list[ _results.SearchResult ] = [ ]
124 term_compare = term if case_sensitive else term.lower( )
125 for obj in objects:
126 obj_name_compare = obj.name if case_sensitive else obj.name.lower( )
127 if obj_name_compare == term_compare:
128 score = 1.0
129 reason = 'exact match'
130 elif contains_term:
131 partial_score = _rapidfuzz.fuzz.partial_ratio(
132 term_compare, obj_name_compare )
133 regular_score = _rapidfuzz.fuzz.ratio(
134 term_compare, obj_name_compare )
135 ratio = max( partial_score, regular_score )
136 if ratio >= similarity_score_min:
137 score = ratio / 100.0
138 score_type = ( 'partial' if partial_score > regular_score
139 else 'similar' )
140 reason = f'{score_type} match ({ratio}%)'
141 else:
142 continue
143 else:
144 continue
145 results.append( _results.SearchResult.from_inventory_object(
146 obj, score = score, match_reasons = [ reason ] ) )
147 return results