Coverage for sources/librovore/search.py: 13%

63 statements  

« prev     ^ index     » next       coverage.py v7.10.6, created at 2025-09-06 02:25 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Centralized search engine for universal matching across processors. ''' 

22 

23 

24import re as _re 

25 

26import rapidfuzz as _rapidfuzz 

27 

28from . import __ 

29from . import interfaces as _interfaces 

30from . import results as _results 

31 

32 

33_SEARCH_BEHAVIORS_DEFAULT = _interfaces.SearchBehaviors( ) 

34_EXACT_THRESHOLD_MIN = 95 

35 

36 

37def filter_by_name( 

38 objects: __.cabc.Sequence[ _results.InventoryObject ], 

39 term: str, /, *, 

40 search_behaviors: _interfaces.SearchBehaviors = _SEARCH_BEHAVIORS_DEFAULT, 

41) -> tuple[ _results.SearchResult, ... ]: 

42 ''' Filters objects by name using specified match mode and options. ''' 

43 if not term: 

44 return tuple( 

45 _results.SearchResult.from_inventory_object( 

46 obj, score = 1.0, match_reasons = [ 'empty term' ] ) 

47 for obj in objects 

48 ) 

49 

50 results: list[ _results.SearchResult ] = [ ] 

51 

52 match search_behaviors.match_mode: 

53 case _interfaces.MatchMode.Exact: 

54 results = _filter_exact( 

55 objects, term, search_behaviors.contains_term, 

56 search_behaviors.case_sensitive ) 

57 case _interfaces.MatchMode.Pattern: 

58 results = _filter_regex( objects, term ) 

59 case _interfaces.MatchMode.Similar: 

60 results = _filter_similar( 

61 objects, term, search_behaviors.similarity_score_min, 

62 search_behaviors.contains_term, 

63 search_behaviors.case_sensitive ) 

64 

65 sorted_results = sorted( results, key = lambda r: r.score, reverse = True ) 

66 return tuple( sorted_results ) 

67 

68 

69def _filter_exact( 

70 objects: __.cabc.Sequence[ _results.InventoryObject ], 

71 term: str, 

72 contains_term: bool, 

73 case_sensitive: bool 

74) -> list[ _results.SearchResult ]: 

75 ''' Applies exact matching with partial_ratio for precision discovery. ''' 

76 results: list[ _results.SearchResult ] = [ ] 

77 term_compare = term if case_sensitive else term.lower( ) 

78 for obj in objects: 

79 obj_name_compare = obj.name if case_sensitive else obj.name.lower( ) 

80 if obj_name_compare == term_compare: 

81 score = 1.0 

82 reason = 'exact match' 

83 elif contains_term: 

84 partial_score = _rapidfuzz.fuzz.partial_ratio( 

85 term_compare, obj_name_compare ) 

86 if partial_score >= _EXACT_THRESHOLD_MIN: 

87 score = partial_score / 100.0 

88 reason = f'partial match ({partial_score}%)' 

89 else: 

90 continue 

91 else: 

92 continue 

93 results.append( _results.SearchResult.from_inventory_object( 

94 obj, score = score, match_reasons = [ reason ] ) ) 

95 return results 

96 

97 

98def _filter_regex( 

99 objects: __.cabc.Sequence[ _results.InventoryObject ], 

100 query: str 

101) -> list[ _results.SearchResult ]: 

102 ''' Apply regex matching to objects. ''' 

103 try: 

104 pattern = _re.compile( query, _re.IGNORECASE ) 

105 except _re.error: 

106 return [ ] 

107 

108 return [ 

109 _results.SearchResult.from_inventory_object( 

110 obj, score = 1.0, match_reasons = [ 'regex match' ] ) 

111 for obj in objects if pattern.search( obj.name ) 

112 ] 

113 

114 

115def _filter_similar( 

116 objects: __.cabc.Sequence[ _results.InventoryObject ], 

117 term: str, 

118 similarity_score_min: int, 

119 contains_term: bool, 

120 case_sensitive: bool 

121) -> list[ _results.SearchResult ]: 

122 ''' Applies similar matching with partial_ratio for discovery. ''' 

123 results: list[ _results.SearchResult ] = [ ] 

124 term_compare = term if case_sensitive else term.lower( ) 

125 for obj in objects: 

126 obj_name_compare = obj.name if case_sensitive else obj.name.lower( ) 

127 if obj_name_compare == term_compare: 

128 score = 1.0 

129 reason = 'exact match' 

130 elif contains_term: 

131 partial_score = _rapidfuzz.fuzz.partial_ratio( 

132 term_compare, obj_name_compare ) 

133 regular_score = _rapidfuzz.fuzz.ratio( 

134 term_compare, obj_name_compare ) 

135 ratio = max( partial_score, regular_score ) 

136 if ratio >= similarity_score_min: 

137 score = ratio / 100.0 

138 score_type = ( 'partial' if partial_score > regular_score 

139 else 'similar' ) 

140 reason = f'{score_type} match ({ratio}%)' 

141 else: 

142 continue 

143 else: 

144 continue 

145 results.append( _results.SearchResult.from_inventory_object( 

146 obj, score = score, match_reasons = [ reason ] ) ) 

147 return results