Coverage for sources/librovore/search.py: 18%

53 statements  

« prev     ^ index     » next       coverage.py v7.10.4, created at 2025-08-17 23:43 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Centralized search engine for universal matching across processors. ''' 

22 

23 

24import re as _re 

25 

26import rapidfuzz as _rapidfuzz 

27 

28from . import __ 

29from . import interfaces as _interfaces 

30 

31 

32class SearchResult( __.immut.DataclassObject ): 

33 ''' Represents a search match with score and metadata. ''' 

34 

35 object: dict[ str, __.typx.Any ] 

36 score: float 

37 match_reasons: list[ str ] 

38 

39 

40def filter_by_name( 

41 objects: list[ dict[ str, __.typx.Any ] ], 

42 query: str, /, *, 

43 match_mode: _interfaces.MatchMode = _interfaces.MatchMode.Fuzzy, 

44 fuzzy_threshold: int = 50, 

45) -> list[ SearchResult ]: 

46 ''' Filter objects by name using specified match mode. ''' 

47 if not query: 

48 # Empty query returns all objects with neutral score 

49 return [ 

50 SearchResult( 

51 object = obj, score = 1.0, 

52 match_reasons = [ 'empty query' ] ) 

53 for obj in objects 

54 ] 

55 

56 query_lower = query.lower( ) 

57 results: list[ SearchResult ] = [ ] 

58 

59 if match_mode == _interfaces.MatchMode.Exact: 

60 results = _filter_exact( objects, query_lower ) 

61 elif match_mode == _interfaces.MatchMode.Regex: 

62 results = _filter_regex( objects, query ) 

63 elif match_mode == _interfaces.MatchMode.Fuzzy: 

64 results = _filter_fuzzy( 

65 objects, query_lower, fuzzy_threshold ) 

66 

67 return sorted( results, key = lambda r: r.score, reverse = True ) 

68 

69 

70def _filter_exact( 

71 objects: list[ dict[ str, __.typx.Any ] ], 

72 query_lower: str 

73) -> list[ SearchResult ]: 

74 ''' Apply exact matching to objects. ''' 

75 results: list[ SearchResult ] = [ ] 

76 for obj in objects: 

77 obj_name_lower = obj[ 'name' ].lower( ) 

78 if query_lower in obj_name_lower: 

79 # Score based on how well the query matches 

80 if obj_name_lower == query_lower: 

81 score = 1.0 

82 reason = 'exact name match' 

83 elif obj_name_lower.startswith( query_lower ): 

84 score = 0.9 

85 reason = 'name starts with query' 

86 else: 

87 score = 0.7 

88 reason = 'name contains query' 

89 

90 results.append( SearchResult( 

91 object = obj, score = score, match_reasons = [ reason ] ) ) 

92 return results 

93 

94 

95def _filter_regex( 

96 objects: list[ dict[ str, __.typx.Any ] ], 

97 query: str 

98) -> list[ SearchResult ]: 

99 ''' Apply regex matching to objects. ''' 

100 try: 

101 pattern = _re.compile( query, _re.IGNORECASE ) 

102 except _re.error: 

103 # Invalid regex, return no results 

104 return [ ] 

105 

106 return [ 

107 SearchResult( 

108 object = obj, score = 1.0, match_reasons = [ 'regex match' ] ) 

109 for obj in objects if pattern.search( obj[ 'name' ] ) 

110 ] 

111 

112 

113def _filter_fuzzy( 

114 objects: list[ dict[ str, __.typx.Any ] ], 

115 query_lower: str, 

116 fuzzy_threshold: int 

117) -> list[ SearchResult ]: 

118 ''' Apply fuzzy matching to objects using rapidfuzz. ''' 

119 results: list[ SearchResult ] = [ ] 

120 

121 for obj in objects: 

122 obj_name = obj[ 'name' ] 

123 obj_name_lower = obj_name.lower( ) 

124 

125 # Use rapidfuzz ratio for basic fuzzy matching 

126 ratio = _rapidfuzz.fuzz.ratio( query_lower, obj_name_lower ) 

127 

128 if ratio >= fuzzy_threshold: 

129 # Normalize score to 0.0-1.0 range 

130 score = ratio / 100.0 

131 results.append( SearchResult( 

132 object = obj, 

133 score = score, 

134 match_reasons = [ f'fuzzy match ({ratio}%)' ] 

135 ) ) 

136 

137 return results 

138 

139 

140def score_by_relevance( 

141 results: list[ SearchResult ], 

142 query: str 

143) -> list[ SearchResult ]: 

144 ''' Score and rank results by relevance (already done by filter_by_name). 

145 ''' 

146 # Results are already scored and sorted by filter_by_name 

147 # This method exists for future enhancement opportunities 

148 return results