Coverage for sources/librovore/search.py: 13%

49 statements  

« prev     ^ index     » next       coverage.py v7.10.5, created at 2025-08-29 01:14 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Centralized search engine for universal matching across processors. ''' 

22 

23 

24import re as _re 

25 

26import rapidfuzz as _rapidfuzz 

27 

28from . import __ 

29from . import interfaces as _interfaces 

30from . import results as _results 

31 

32 

33def filter_by_name( 

34 objects: __.cabc.Sequence[ _results.InventoryObject ], 

35 query: str, /, *, 

36 match_mode: _interfaces.MatchMode = _interfaces.MatchMode.Fuzzy, 

37 fuzzy_threshold: int = 50, 

38) -> tuple[ _results.SearchResult, ... ]: 

39 ''' Filter objects by name using specified match mode. ''' 

40 if not query: 

41 # Empty query returns all objects with neutral score 

42 return tuple( 

43 _results.SearchResult.from_inventory_object( 

44 obj, score = 1.0, match_reasons = [ 'empty query' ] ) 

45 for obj in objects 

46 ) 

47 

48 query_lower = query.lower( ) 

49 results: list[ _results.SearchResult ] = [ ] 

50 

51 if match_mode == _interfaces.MatchMode.Exact: 

52 results = _filter_exact( objects, query_lower ) 

53 elif match_mode == _interfaces.MatchMode.Regex: 

54 results = _filter_regex( objects, query ) 

55 elif match_mode == _interfaces.MatchMode.Fuzzy: 

56 results = _filter_fuzzy( 

57 objects, query_lower, fuzzy_threshold ) 

58 

59 sorted_results = sorted( results, key = lambda r: r.score, reverse = True ) 

60 return tuple( sorted_results ) 

61 

62 

63def _filter_exact( 

64 objects: __.cabc.Sequence[ _results.InventoryObject ], 

65 query_lower: str 

66) -> list[ _results.SearchResult ]: 

67 ''' Apply exact matching to objects. ''' 

68 results: list[ _results.SearchResult ] = [ ] 

69 for obj in objects: 

70 obj_name_lower = obj.name.lower( ) 

71 if query_lower in obj_name_lower: 

72 # Score based on how well the query matches 

73 if obj_name_lower == query_lower: 

74 score = 1.0 

75 reason = 'exact name match' 

76 elif obj_name_lower.startswith( query_lower ): 

77 score = 0.9 

78 reason = 'name starts with query' 

79 else: 

80 score = 0.7 

81 reason = 'name contains query' 

82 

83 results.append( _results.SearchResult.from_inventory_object( 

84 obj, score = score, match_reasons = [ reason ] ) ) 

85 return results 

86 

87 

88def _filter_regex( 

89 objects: __.cabc.Sequence[ _results.InventoryObject ], 

90 query: str 

91) -> list[ _results.SearchResult ]: 

92 ''' Apply regex matching to objects. ''' 

93 try: 

94 pattern = _re.compile( query, _re.IGNORECASE ) 

95 except _re.error: 

96 # Invalid regex, return no results 

97 return [ ] 

98 

99 return [ 

100 _results.SearchResult.from_inventory_object( 

101 obj, score = 1.0, match_reasons = [ 'regex match' ] ) 

102 for obj in objects if pattern.search( obj.name ) 

103 ] 

104 

105 

106def _filter_fuzzy( 

107 objects: __.cabc.Sequence[ _results.InventoryObject ], 

108 query_lower: str, 

109 fuzzy_threshold: int 

110) -> list[ _results.SearchResult ]: 

111 ''' Apply fuzzy matching to objects using rapidfuzz. ''' 

112 results: list[ _results.SearchResult ] = [ ] 

113 

114 for obj in objects: 

115 obj_name = obj.name 

116 obj_name_lower = obj_name.lower( ) 

117 

118 # Use rapidfuzz ratio for basic fuzzy matching 

119 ratio = _rapidfuzz.fuzz.ratio( query_lower, obj_name_lower ) 

120 

121 if ratio >= fuzzy_threshold: 

122 # Normalize score to 0.0-1.0 range 

123 score = ratio / 100.0 

124 results.append( _results.SearchResult.from_inventory_object( 

125 obj, 

126 score = score, 

127 match_reasons = [ f'fuzzy match ({ratio}%)' ] 

128 ) ) 

129 

130 return results