Coverage for sources/librovore/search.py: 18%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' Centralized search engine for universal matching across processors. '''

24import re as _re

26import rapidfuzz as _rapidfuzz

28from . import __

29from . import interfaces as _interfaces

32class SearchResult( __.immut.DataclassObject ):

33 ''' Represents a search match with score and metadata. '''

35 object: dict[ str, __.typx.Any ]

36 score: float

37 match_reasons: list[ str ]

40def filter_by_name(

41 objects: list[ dict[ str, __.typx.Any ] ],

42 query: str, /, *,

43 match_mode: _interfaces.MatchMode = _interfaces.MatchMode.Fuzzy,

44 fuzzy_threshold: int = 50,

45) -> list[ SearchResult ]:

46 ''' Filter objects by name using specified match mode. '''

47 if not query:

48 # Empty query returns all objects with neutral score

49 return [

50 SearchResult(

51 object = obj, score = 1.0,

52 match_reasons = [ 'empty query' ] )

53 for obj in objects

54 ]

56 query_lower = query.lower( )

57 results: list[ SearchResult ] = [ ]

59 if match_mode == _interfaces.MatchMode.Exact:

60 results = _filter_exact( objects, query_lower )

61 elif match_mode == _interfaces.MatchMode.Regex:

62 results = _filter_regex( objects, query )

63 elif match_mode == _interfaces.MatchMode.Fuzzy:

64 results = _filter_fuzzy(

65 objects, query_lower, fuzzy_threshold )

67 return sorted( results, key = lambda r: r.score, reverse = True )

70def _filter_exact(

71 objects: list[ dict[ str, __.typx.Any ] ],

72 query_lower: str

73) -> list[ SearchResult ]:

74 ''' Apply exact matching to objects. '''

75 results: list[ SearchResult ] = [ ]

76 for obj in objects:

77 obj_name_lower = obj[ 'name' ].lower( )

78 if query_lower in obj_name_lower:

79 # Score based on how well the query matches

80 if obj_name_lower == query_lower:

81 score = 1.0

82 reason = 'exact name match'

83 elif obj_name_lower.startswith( query_lower ):

84 score = 0.9

85 reason = 'name starts with query'

86 else:

87 score = 0.7

88 reason = 'name contains query'

90 results.append( SearchResult(

91 object = obj, score = score, match_reasons = [ reason ] ) )

92 return results

95def _filter_regex(

96 objects: list[ dict[ str, __.typx.Any ] ],

97 query: str

98) -> list[ SearchResult ]:

99 ''' Apply regex matching to objects. '''

100 try:

101 pattern = _re.compile( query, _re.IGNORECASE )

102 except _re.error:

103 # Invalid regex, return no results

104 return [ ]

105

106 return [

107 SearchResult(

108 object = obj, score = 1.0, match_reasons = [ 'regex match' ] )

109 for obj in objects if pattern.search( obj[ 'name' ] )

110 ]

111

112

113def _filter_fuzzy(

114 objects: list[ dict[ str, __.typx.Any ] ],

115 query_lower: str,

116 fuzzy_threshold: int

117) -> list[ SearchResult ]:

118 ''' Apply fuzzy matching to objects using rapidfuzz. '''

119 results: list[ SearchResult ] = [ ]

120

121 for obj in objects:

122 obj_name = obj[ 'name' ]

123 obj_name_lower = obj_name.lower( )

124

125 # Use rapidfuzz ratio for basic fuzzy matching

126 ratio = _rapidfuzz.fuzz.ratio( query_lower, obj_name_lower )

127

128 if ratio >= fuzzy_threshold:

129 # Normalize score to 0.0-1.0 range

130 score = ratio / 100.0

131 results.append( SearchResult(

132 object = obj,

133 score = score,

134 match_reasons = [ f'fuzzy match ({ratio}%)' ]

135 ) )

136

137 return results

138

139

140def score_by_relevance(

141 results: list[ SearchResult ],

142 query: str

143) -> list[ SearchResult ]:

144 ''' Score and rank results by relevance (already done by filter_by_name).

145 '''

146 # Results are already scored and sorted by filter_by_name

147 # This method exists for future enhancement opportunities

148 return results