Coverage for sources/librovore/processors.py: 79%
56 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Site processors. '''
24from . import __
25from . import exceptions as _exceptions
26from . import interfaces as _interfaces
27from . import results as _results
28from . import state as _state
31class Processor( __.immut.DataclassProtocol ):
32 ''' Abstract base class for documentation source detectors. '''
34 name: str
36 @property
37 @__.abc.abstractmethod
38 def capabilities( self ) -> _interfaces.ProcessorCapabilities:
39 ''' Returns processor capabilities for advertisement. '''
40 raise NotImplementedError
42 @__.abc.abstractmethod
43 async def detect(
44 self, auxdata: _state.Globals, source: str
45 ) -> 'Detection':
46 ''' Detects if can process documentation from source. '''
47 raise NotImplementedError
50InventoryProcessorsRegistry: __.typx.TypeAlias = (
51 __.accret.ValidatorDictionary[ str, Processor ] )
52StructureProcessorsRegistry: __.typx.TypeAlias = (
53 __.accret.ValidatorDictionary[ str, Processor ] )
56class Detection( __.immut.DataclassProtocol ):
57 ''' Abstract base class for documentation detector selections. '''
59 processor: Processor
60 confidence: float
61 timestamp: float = __.dcls.field( default_factory = __.time.time )
63 @property
64 def capabilities( self ) -> _interfaces.ProcessorCapabilities:
65 ''' Returns capabilities for processor. '''
66 return self.processor.capabilities
68 def __post_init__( self ) -> None:
69 ''' Validates confidence is in valid range [0.0, 1.0]. '''
70 if not ( 0.0 <= self.confidence <= 1.0 ):
71 raise _exceptions.DetectionConfidenceInvalidity( self.confidence )
73 @classmethod
74 @__.abc.abstractmethod
75 async def from_source(
76 selfclass,
77 auxdata: _state.Globals,
78 processor: Processor,
79 source: str,
80 ) -> __.typx.Self:
81 ''' Constructs detection from source location. '''
82 raise NotImplementedError
85class InventoryDetection( Detection ):
86 ''' Base class for inventory detection results. '''
88 @__.abc.abstractmethod
89 async def filter_inventory(
90 self,
91 auxdata: _state.Globals,
92 source: str, /, *,
93 filters: __.cabc.Mapping[ str, __.typx.Any ],
94 ) -> tuple[ _results.InventoryObject, ... ]:
95 ''' Extracts and filters inventory objects from source. '''
96 raise NotImplementedError
99class StructureDetection( Detection ):
100 ''' Base class for structure detection results. '''
102 @classmethod
103 @__.abc.abstractmethod
104 def get_capabilities( cls ) -> _interfaces.StructureProcessorCapabilities:
105 ''' Returns processor capabilities for filtering and selection.
107 The content_extraction_features advertise what types of content
108 this processor can reliably extract:
109 - 'signatures': Function/class signatures with parameters
110 - 'descriptions': Descriptive content and documentation text
111 - 'code-examples': Code blocks with preserved language information
112 - 'cross-references': Links and references to other documentation
113 - 'arguments': Individual parameter documentation
114 - 'returns': Return value documentation
115 - 'attributes': Class and module attribute documentation
117 Based on comprehensive theme analysis, these features use
118 empirically-discovered universal patterns rather than
119 theme-specific guesswork.
120 '''
121 raise NotImplementedError
123 @__.abc.abstractmethod
124 async def extract_contents(
125 self,
126 auxdata: _state.Globals,
127 source: str,
128 objects: __.cabc.Sequence[ _results.InventoryObject ], /,
129 ) -> tuple[ _results.ContentDocument, ... ]:
130 ''' Extracts content using inventory object metadata for strategy
131 selection.
133 Uses inventory object roles and types to choose optimal extraction:
134 - API objects (functions, classes, methods): signature-aware
135 - Content objects (modules, pages): description-focused
136 - Code examples: language-preserving extraction
138 Based on universal patterns from comprehensive theme analysis.
139 '''
140 raise NotImplementedError
142 def can_process_inventory_type( self, inventory_type: str ) -> bool:
143 ''' Checks if processor can handle inventory type. '''
144 return self.get_capabilities( ).supports_inventory_type(
145 inventory_type )
148DetectionsByProcessor: __.typx.TypeAlias = __.cabc.Mapping[ str, Detection ]
151class DetectionsForLocation( __.immut.DataclassObject ):
152 ''' Detections for location. '''
154 source: str
155 detections: DetectionsByProcessor
156 detection_optimal: __.typx.Optional[ Detection ]
157 time_detection_ms: int
160def _inventory_validator( name: str, value: Processor ) -> bool:
161 return isinstance( value, Processor )
163def _structure_validator( name: str, value: Processor ) -> bool:
164 return isinstance( value, Processor )
167inventory_processors: InventoryProcessorsRegistry = (
168 __.accret.ValidatorDictionary( _inventory_validator ) )
169structure_processors: StructureProcessorsRegistry = (
170 __.accret.ValidatorDictionary( _structure_validator ) )