Coverage for sources/librovore/results.py: 26%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' Results structures.

23 Search results, inventory objects, content documents, etc....

24'''

27from . import __

28from . import exceptions as _exceptions

31_CONTENT_PREVIEW_LIMIT = 100

32_SUMMARY_ITEMS_LIMIT = 20

35class ResultBase( __.immut.DataclassProtocol, __.typx.Protocol ):

36 ''' Base protocol for all result objects with rendering methods. '''

38 @__.abc.abstractmethod

39 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

40 ''' Renders result as JSON-compatible dictionary. '''

41 raise NotImplementedError

43 @__.abc.abstractmethod

44 def render_as_markdown(

45 self, /, *,

46 reveal_internals: bool = False,

47 ) -> tuple[ str, ... ]:

48 ''' Renders result as Markdown lines for display. '''

49 raise NotImplementedError

52class InventoryObject( ResultBase ):

53 ''' Universal inventory object with complete source attribution.

55 Represents a single documentation object from any inventory source

56 with standardized fields and format-specific metadata container.

57 '''

59 name: __.typx.Annotated[

60 str,

61 __.ddoc.Doc( "Primary object identifier from inventory source." ),

62 ]

63 uri: __.typx.Annotated[

64 str,

65 __.ddoc.Doc( "Relative URI to object documentation content." ),

66 ]

67 inventory_type: __.typx.Annotated[

68 str,

69 __.ddoc.Doc(

70 "Inventory format identifier (e.g., sphinx_objects_inv)." ),

71 ]

72 location_url: __.typx.Annotated[

73 str, __.ddoc.Doc(

74 "Complete URL to inventory location for attribution." )

75 ]

76 display_name: __.typx.Annotated[

77 __.typx.Optional[ str ],

78 __.ddoc.Doc( "Human-readable name if different from name." ),

79 ] = None

80 specifics: __.typx.Annotated[

81 __.immut.Dictionary[ str, __.typx.Any ],

82 __.ddoc.Doc(

83 "Format-specific metadata (domain, role, priority, etc.)." ),

84 ] = __.dcls.field( default_factory = lambda: __.immut.Dictionary( ) )

87 @property

88 def effective_display_name( self ) -> str:

89 ''' Effective display name. Might be same as name. '''

90 if self.display_name is not None:

91 return self.display_name

92 return self.name

94 @__.abc.abstractmethod

95 def render_specifics_json(

96 self, /, *,

97 reveal_internals: bool = False,

98 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

99 ''' Renders specifics for JSON output. '''

100 raise NotImplementedError

101

102 @__.abc.abstractmethod

103 def render_specifics_markdown(

104 self, /, *,

105 reveal_internals: __.typx.Annotated[

106 bool,

107 __.ddoc.Doc( '''

108 Controls whether implementation-specific details (internal

109 field names, version numbers, priority scores) are included.

110 When False, only user-facing information is shown.

111 ''' ),

112 ] = False,

113 ) -> tuple[ str, ... ]:

114 ''' Renders specifics as Markdown lines for CLI display. '''

115 raise NotImplementedError

116

117 def render_as_json(

118 self, /, *,

119 reveal_internals: bool = False,

120 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

121 ''' Renders complete object as JSON-compatible dictionary. '''

122 base = __.immut.Dictionary[

123 str, __.typx.Any

124 ](

125 name = self.name,

126 uri = self.uri,

127 inventory_type = self.inventory_type,

128 location_url = self.location_url,

129 display_name = self.display_name,

130 effective_display_name = self.effective_display_name,

131 )

132 formatted_specifics = self.render_specifics_json(

133 reveal_internals = reveal_internals )

134 result_dict = dict( base )

135 result_dict.update( dict( formatted_specifics ) )

136 return __.immut.Dictionary[ str, __.typx.Any ]( result_dict )

137

138 def render_as_markdown(

139 self, /, *,

140 reveal_internals: __.typx.Annotated[

141 bool,

142 __.ddoc.Doc( "Controls whether internal details are shown." ),

143 ] = False,

144 ) -> tuple[ str, ... ]:

145 ''' Renders complete object as Markdown lines for display. '''

146 lines = [ f"### `{self.effective_display_name}`" ]

147 lines.append( f"- **URI:** {self.uri}" )

148 lines.append( f"- **Type:** {self.inventory_type}" )

149 lines.append( f"- **Location:** {self.location_url}" )

150 specifics_lines = self.render_specifics_markdown(

151 reveal_internals = reveal_internals )

152 lines.extend( specifics_lines )

153 return tuple( lines )

154

155

156class ContentDocument( ResultBase ):

157 ''' Documentation content with extracted metadata and content ID. '''

158

159 inventory_object: __.typx.Annotated[

160 InventoryObject,

161 __.ddoc.Doc( "Location inventory object for this content." ),

162 ]

163 content_id: __.typx.Annotated[

164 str,

165 __.ddoc.Doc( "Deterministic identifier for content retrieval." ),

166 ]

167 description: __.typx.Annotated[

168 str,

169 __.ddoc.Doc( "Extracted object description or summary." ),

170 ] = ''

171 documentation_url: __.typx.Annotated[

172 str,

173 __.ddoc.Doc( "Complete URL to full documentation page." ),

174 ] = ''

175 extraction_metadata: __.typx.Annotated[

176 __.immut.Dictionary[ str, __.typx.Any ],

177 __.ddoc.Doc( "Metadata from structure processor extraction." ),

178 ] = __.dcls.field( default_factory = lambda: __.immut.Dictionary( ) )

179

180 @property

181 def has_meaningful_content( self ) -> bool:

182 ''' Returns True if document contains useful extracted content. '''

183 return bool( self.description )

184

185 def render_as_json(

186 self, /, *,

187 lines_max: __.typx.Optional[ int ] = None,

188 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

189 ''' Renders complete document as JSON-compatible dictionary. '''

190 description = self.description

191 if lines_max is not None:

192 desc_lines = description.split( '\n' )

193 if len( desc_lines ) > lines_max:

194 desc_lines = desc_lines[ :lines_max ]

195 desc_lines.append( "..." )

196 description = '\n'.join( desc_lines )

197 return __.immut.Dictionary[

198 str, __.typx.Any

199 ](

200 inventory_object = dict( self.inventory_object.render_as_json( ) ),

201 content_id = self.content_id,

202 description = description,

203 documentation_url = self.documentation_url,

204 extraction_metadata = dict( self.extraction_metadata ),

205 has_meaningful_content = self.has_meaningful_content,

206 )

207

208 def render_as_markdown(

209 self, /, *,

210 reveal_internals: __.typx.Annotated[

211 bool,

212 __.ddoc.Doc( "Controls whether internal details are shown." ),

213 ] = False,

214 lines_max: __.typx.Annotated[

215 __.typx.Optional[ int ],

216 __.ddoc.Doc( "Maximum lines to display for description." ),

217 ] = None,

218 include_title: __.typx.Annotated[

219 bool,

220 __.ddoc.Doc( "Whether to include document title header." ),

221 ] = True,

222 ) -> tuple[ str, ... ]:

223 ''' Renders complete document as Markdown lines for display. '''

224 lines: list[ str ] = [ ]

225 if include_title:

226 lines.append(

227 f"### `{self.inventory_object.effective_display_name}`" )

228 metadata_lines: list[ str ] = [ ]

229 if self.documentation_url:

230 metadata_lines.append( f"- **URL:** {self.documentation_url}" )

231 metadata_lines.append( f"- **Content ID:** `{self.content_id}`" )

232 if metadata_lines:

233 lines.extend( metadata_lines )

234 inventory_lines = self.inventory_object.render_specifics_markdown(

235 reveal_internals = reveal_internals )

236 if inventory_lines:

237 lines.extend( inventory_lines )

238 if self.description:

239 lines.append( "" )

240 description = self.description

241 if lines_max is not None:

242 desc_lines = description.split( '\n' )

243 if len( desc_lines ) > lines_max:

244 desc_lines = desc_lines[ :lines_max ]

245 desc_lines.append( "..." )

246 description = '\n'.join( desc_lines )

247 lines.append( description )

248 return tuple( lines )

249

250

251class InventoryLocationInfo( __.immut.DataclassObject ):

252 ''' Information about detected inventory location and processor. '''

253

254 inventory_type: __.typx.Annotated[

255 str,

256 __.ddoc.Doc( "Inventory format type identifier." ),

257 ]

258 location_url: __.typx.Annotated[

259 str,

260 __.ddoc.Doc( "Complete URL to inventory location." ),

261 ]

262 processor_name: __.typx.Annotated[

263 str,

264 __.ddoc.Doc( "Name of processor handling this location." ),

265 ]

266 confidence: __.typx.Annotated[

267 float,

268 __.ddoc.Doc( "Detection confidence score (0.0-1.0)." ),

269 ]

270 object_count: __.typx.Annotated[

271 int,

272 __.ddoc.Doc( "Total objects available in this inventory." ),

273 ]

274

275 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

276 ''' Renders location info as JSON-compatible dictionary. '''

277 return __.immut.Dictionary(

278 inventory_type = self.inventory_type,

279 location_url = self.location_url,

280 processor_name = self.processor_name,

281 confidence = self.confidence,

282 object_count = self.object_count,

283 )

284

285

286class SearchMetadata( __.immut.DataclassObject ):

287 ''' Search operation metadata and performance statistics. '''

288

289 results_count: __.typx.Annotated[

290 int,

291 __.ddoc.Doc( "Number of results returned to user." ),

292 ]

293 results_max: __.typx.Annotated[

294 int,

295 __.ddoc.Doc( "Maximum results requested by user." ),

296 ]

297 matches_total: __.typx.Annotated[

298 __.typx.Optional[ int ],

299 __.ddoc.Doc( "Total matching objects before limit applied." ),

300 ] = None

301 search_time_ms: __.typx.Annotated[

302 __.typx.Optional[ int ],

303 __.ddoc.Doc( "Search execution time in milliseconds." ),

304 ] = None

305 filters_applied: __.typx.Annotated[

306 tuple[ str, ... ],

307 __.ddoc.Doc( "Filter names that were successfully applied." ),

308 ] = ( )

309 filters_ignored: __.typx.Annotated[

310 tuple[ str, ... ],

311 __.ddoc.Doc( "Filter names that were not supported by processor." ),

312 ] = ( )

313

314 @property

315 def results_truncated( self ) -> bool:

316 ''' Returns True if results were limited by results_max. '''

317 if self.matches_total is None:

318 return False

319 return self.results_count < self.matches_total

320

321 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

322 ''' Renders search metadata as JSON-compatible dictionary. '''

323 return __.immut.Dictionary(

324 results_count = self.results_count,

325 results_max = self.results_max,

326 matches_total = self.matches_total,

327 search_time_ms = self.search_time_ms,

328 results_truncated = self.results_truncated,

329 filters_applied = list( self.filters_applied ),

330 filters_ignored = list( self.filters_ignored ),

331 )

332

333

334class SearchResult( ResultBase ):

335 ''' Search result with inventory object and match metadata. '''

336

337 inventory_object: __.typx.Annotated[

338 InventoryObject,

339 __.ddoc.Doc( "Matched inventory object with metadata." ),

340 ]

341 score: __.typx.Annotated[

342 float,

343 __.ddoc.Doc( "Search relevance score (0.0-1.0)." ),

344 ]

345 match_reasons: __.typx.Annotated[

346 tuple[ str, ... ],

347 __.ddoc.Doc( "Detailed reasons for search match." ),

348 ]

349

350 @classmethod

351 def from_inventory_object(

352 cls,

353 inventory_object: InventoryObject, *,

354 score: float,

355 match_reasons: __.cabc.Sequence[ str ],

356 ) -> __.typx.Self:

357 ''' Produces search result from inventory object with scoring. '''

358 return cls(

359 inventory_object = inventory_object,

360 score = score,

361 match_reasons = tuple( match_reasons ) )

362

363 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

364 ''' Renders search result as JSON-compatible dictionary. '''

365 return __.immut.Dictionary[

366 str, __.typx.Any

367 ](

368 inventory_object = dict( self.inventory_object.render_as_json( ) ),

369 score = self.score,

370 match_reasons = list( self.match_reasons ),

371 )

372

373 def render_as_markdown(

374 self, /, *,

375 reveal_internals: __.typx.Annotated[

376 bool,

377 __.ddoc.Doc( "Controls whether internal details are shown." ),

378 ] = False,

379 ) -> tuple[ str, ... ]:

380 ''' Renders search result as Markdown lines for display. '''

381 title = "### `{name}` (Score: {score:.2f})".format(

382 name = self.inventory_object.effective_display_name,

383 score = self.score )

384 lines = [ title ]

385 if reveal_internals and self.match_reasons:

386 reasons = ', '.join( self.match_reasons )

387 lines.append( "- **Match reasons:** {reasons}".format(

388 reasons = reasons ) )

389 inventory_lines = self.inventory_object.render_as_markdown(

390 reveal_internals = reveal_internals )

391 lines.extend( inventory_lines[ 1: ] ) # Skip duplicate title line

392 return tuple( lines )

393

394

395class ContentQueryResult( ResultBase ):

396 ''' Complete result structure for content queries. '''

397

398 location: __.typx.Annotated[

399 str,

400 __.ddoc.Doc( "Primary location URL for this query." ),

401 ]

402 term: __.typx.Annotated[

403 str,

404 __.ddoc.Doc( "Search term used for this query." ),

405 ]

406 documents: __.typx.Annotated[

407 tuple[ ContentDocument, ... ],

408 __.ddoc.Doc( "Documentation content for matching objects." ) ]

409 search_metadata: __.typx.Annotated[

410 SearchMetadata,

411 __.ddoc.Doc( "Search execution and result metadata." ),

412 ]

413 inventory_locations: __.typx.Annotated[

414 tuple[ InventoryLocationInfo, ... ],

415 __.ddoc.Doc( "Information about inventory locations used." ),

416 ]

417

418 def render_as_json(

419 self, /, *,

420 lines_max: __.typx.Optional[ int ] = None,

421 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

422 ''' Renders content query result as JSON-compatible dictionary. '''

423 documents_json = [

424 dict( doc.render_as_json( lines_max = lines_max ) )

425 for doc in self.documents ]

426 locations_json = [

427 dict( loc.render_as_json( ) ) for loc in self.inventory_locations ]

428 return __.immut.Dictionary[

429 str, __.typx.Any

430 ](

431 location = self.location,

432 term = self.term,

433 documents = documents_json,

434 search_metadata = dict( self.search_metadata.render_as_json( ) ),

435 inventory_locations = locations_json,

436 )

437

438 def render_as_markdown(

439 self, /, *,

440 reveal_internals: __.typx.Annotated[

441 bool,

442 __.ddoc.Doc( "Controls whether internal details are shown." ),

443 ] = False,

444 lines_max: __.typx.Annotated[

445 __.typx.Optional[ int ],

446 __.ddoc.Doc( "Maximum lines to display per content result." ),

447 ] = None,

448 ) -> tuple[ str, ... ]:

449 ''' Renders content query result as Markdown lines for display. '''

450 title = "# Content Query Results"

451 if lines_max is not None:

452 title += " (truncated)"

453 lines = [ title ]

454 lines.append( "- **Term:** {term}".format( term = self.term ) )

455 if reveal_internals:

456 lines.append( "- **Location:** {location}".format(

457 location = self.location ) )

458 lines.append( "- **Results:** {count} of {max}".format(

459 count = self.search_metadata.results_count,

460 max = self.search_metadata.results_max ) )

461 if self.documents:

462 lines.append( "" )

463 lines.append( "## Documents" )

464 for index, doc in enumerate( self.documents, 1 ):

465 separator = "\n📄 ── Document {} ──────────────────── 📄\n"

466 lines.append( separator.format( index ) )

467 doc_lines = doc.render_as_markdown(

468 reveal_internals = reveal_internals,

469 lines_max = lines_max,

470 include_title = False )

471 lines.extend( doc_lines )

472 return tuple( lines )

473

474

475class InventoryQueryResult( ResultBase ):

476 ''' Complete result structure for inventory queries. '''

477

478 location: __.typx.Annotated[

479 str,

480 __.ddoc.Doc( "Primary location URL for this query." ),

481 ]

482 term: __.typx.Annotated[

483 str,

484 __.ddoc.Doc( "Search term used for this query." ),

485 ]

486 objects: __.typx.Annotated[

487 tuple[ InventoryObject, ... ],

488 __.ddoc.Doc( "Inventory objects matching search criteria." ),

489 ]

490 search_metadata: __.typx.Annotated[

491 SearchMetadata,

492 __.ddoc.Doc( "Search execution and result metadata." ),

493 ]

494 inventory_locations: __.typx.Annotated[

495 tuple[ InventoryLocationInfo, ... ],

496 __.ddoc.Doc( "Information about inventory locations used." ),

497 ]

498

499 def render_as_json(

500 self, /, *,

501 reveal_internals: bool = False,

502 summarize: bool = False,

503 group_by: __.cabc.Sequence[ str ] = ( ),

504 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

505 ''' Renders inventory query result as JSON-compatible dictionary. '''

506 if summarize:

507 return self._render_summary_json( group_by, reveal_internals )

508 results_max = self.search_metadata.results_max

509 displayed_objects = self.objects[ : results_max ]

510 objects_json = [

511 dict( obj.render_as_json( reveal_internals = reveal_internals ) )

512 for obj in displayed_objects ]

513 locations_json = [

514 dict( loc.render_as_json( ) ) for loc in self.inventory_locations ]

515 return __.immut.Dictionary[

516 str, __.typx.Any

517 ](

518 location = self.location,

519 term = self.term,

520 objects = objects_json,

521 search_metadata = dict( self.search_metadata.render_as_json( ) ),

522 inventory_locations = locations_json,

523 )

524

525 def render_as_markdown(

526 self, /, *,

527 reveal_internals: __.typx.Annotated[

528 bool,

529 __.ddoc.Doc( "Controls whether internal details are shown." ),

530 ] = False,

531 summarize: bool = False,

532 group_by: __.cabc.Sequence[ str ] = ( ),

533 ) -> tuple[ str, ... ]:

534 ''' Renders inventory query result as Markdown lines for display. '''

535 if summarize:

536 return self._render_summary_markdown( group_by, reveal_internals )

537 results_max = self.search_metadata.results_max

538 displayed_objects = self.objects[ : results_max ]

539 lines = [ "# Inventory Query Results" ]

540 lines.append( "- **Term:** {term}".format( term = self.term ) )

541 if reveal_internals:

542 lines.append( "- **Location:** {location}".format(

543 location = self.location ) )

544 lines.append( "- **Results:** {count} of {total}".format(

545 count = len( displayed_objects ),

546 total = len( self.objects ) ) )

547 if self.search_metadata.filters_ignored:

548 lines.append( "" )

549 lines.append( "⚠️ **Warning: Unsupported Filters**" )

550 ignored_list = ', '.join( self.search_metadata.filters_ignored )

551 message = (

552 "The following filters are not supported by this "

553 "processor and were ignored: {filters}" )

554 lines.append( message.format( filters = ignored_list ) )

555 if len( self.objects ) == 0:

556 lines.append(

557 "No results returned due to unsupported filters. "

558 "Remove unsupported filters to see results." )

559 elif self.search_metadata.filters_applied and len( self.objects ) == 0:

560 lines.append( "" )

561 lines.append( "**No Matches**" )

562 applied_list = ', '.join( self.search_metadata.filters_applied )

563 lines.append(

564 "Filters applied ({filters}) matched 0 objects.".format(

565 filters = applied_list ) )

566 if displayed_objects:

567 lines.append( "" )

568 lines.append( "## Objects" )

569 for index, obj in enumerate( displayed_objects, 1 ):

570 separator = "\n📦 ── Object {} ─────────────────────── 📦\n"

571 lines.append( separator.format( index ) )

572 obj_lines = obj.render_as_markdown(

573 reveal_internals = reveal_internals )

574 lines.extend( obj_lines )

575 return tuple( lines )

576

577 def _render_summary_json(

578 self,

579 group_by: __.cabc.Sequence[ str ],

580 reveal_internals: bool,

581 ) -> __.immut.Dictionary[ str, __.typx.Any ]:

582 ''' Computes and renders summary statistics as JSON. '''

583 distributions = self._compute_distributions( group_by )

584 return __.immut.Dictionary[

585 str, __.typx.Any

586 ](

587 location = self.location,

588 term = self.term,

589 matches_total = len( self.objects ),

590 group_by = list( group_by ),

591 distributions = distributions,

592 search_metadata = dict( self.search_metadata.render_as_json( ) ),

593 )

594

595 def _render_summary_markdown(

596 self,

597 group_by: __.cabc.Sequence[ str ],

598 reveal_internals: bool,

599 ) -> tuple[ str, ... ]:

600 ''' Computes and renders summary statistics as Markdown. '''

601 distributions = self._compute_distributions( group_by )

602 lines = [ "# Inventory Query Summary" ]

603 lines.append( f"- **Term:** {self.term}" )

604 lines.append( f"- **Total matches:** {len( self.objects )}" )

605 if group_by:

606 group_by_formatted = ', '.join( group_by )

607 lines.append( f"- **Grouped by:** {group_by_formatted}" )

608 if self.search_metadata.filters_ignored:

609 lines.extend( self._render_filter_warnings( ) )

610 empty_dimensions = self._render_distribution_sections(

611 lines, group_by, distributions )

612 if empty_dimensions:

613 lines.extend( self._render_empty_dimension_warnings(

614 empty_dimensions ) )

615 return tuple( lines )

616

617 def _render_filter_warnings( self ) -> tuple[ str, ... ]:

618 ''' Renders filter warning messages for summary output. '''

619 lines = [ "" ]

620 lines.append( "⚠️ **Warning: Unsupported Filters**" )

621 ignored_list = ', '.join( self.search_metadata.filters_ignored )

622 message = (

623 "The following filters are not supported by this "

624 "processor: {filters}" )

625 lines.append( message.format( filters = ignored_list ) )

626 return tuple( lines )

627

628 def _render_distribution_sections(

629 self,

630 lines: list[ str ],

631 group_by: __.cabc.Sequence[ str ],

632 distributions: dict[ str, dict[ str, int ] ],

633 ) -> list[ str ]:

634 ''' Renders distribution sections and returns empty dimensions. '''

635 empty_dimensions: list[ str ] = [ ]

636 for dimension in group_by:

637 if dimension in distributions:

638 dist = distributions[ dimension ]

639 if not dist:

640 empty_dimensions.append( dimension )

641 continue

642 lines.append( "" )

643 dimension_title = dimension.replace( '_', ' ' ).title( )

644 lines.append( f"### By {dimension_title}" )

645 total = sum( dist.values( ) )

646 sorted_items = sorted(

647 dist.items( ), key = lambda x: x[ 1 ], reverse = True )

648 for value, count in sorted_items[ :_SUMMARY_ITEMS_LIMIT ]:

649 pct = ( count / total * 100 ) if total > 0 else 0

650 lines.append( f"- `{value}`: {count} ({pct:.1f}%)" )

651 if len( sorted_items ) > _SUMMARY_ITEMS_LIMIT:

652 remaining = len( sorted_items ) - _SUMMARY_ITEMS_LIMIT

653 lines.append( f"- ...and {remaining} more" )

654 return empty_dimensions

655

656 def _render_empty_dimension_warnings(

657 self, empty_dimensions: list[ str ]

658 ) -> tuple[ str, ... ]:

659 ''' Renders warnings for empty group-by dimensions. '''

660 lines = [ "" ]

661 lines.append( "⚠️ **Warning: Empty Group-By Dimensions**" )

662 empty_list = ', '.join( empty_dimensions )

663 message = (

664 "The following dimensions have no values: {dimensions}. "

665 "This may indicate unsupported dimensions for this "

666 "processor." )

667 lines.append( message.format( dimensions = empty_list ) )

668 return tuple( lines )

669

670 def _compute_distributions(

671 self, group_by: __.cabc.Sequence[ str ]

672 ) -> dict[ str, dict[ str, int ] ]:

673 ''' Computes distribution statistics from objects. '''

674 distributions: dict[ str, dict[ str, int ] ] = { }

675 for dimension in group_by:

676 dist: dict[ str, int ] = { }

677 for obj in self.objects:

678 value = obj.specifics.get( dimension )

679 if value is not None:

680 value_str = str( value )

681 dist[ value_str ] = dist.get( value_str, 0 ) + 1

682 distributions[ dimension ] = dist

683 return distributions

684

685

686class Detection( __.immut.DataclassObject ):

687 ''' Processor detection information with confidence scoring. '''

688

689 processor_name: __.typx.Annotated[

690 str,

691 __.ddoc.Doc( "Name of the processor that can handle this location." ),

692 ]

693 confidence: __.typx.Annotated[

694 float,

695 __.ddoc.Doc( "Detection confidence score (0.0-1.0)." ),

696 ]

697 processor_type: __.typx.Annotated[

698 str,

699 __.ddoc.Doc( "Type of processor (inventory, structure)." ),

700 ]

701 detection_metadata: __.typx.Annotated[

702 __.immut.Dictionary[ str, __.typx.Any ],

703 __.ddoc.Doc( "Processor-specific detection metadata." ),

704 ] = __.dcls.field( default_factory = lambda: __.immut.Dictionary( ) )

705

706 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

707 ''' Renders detection as JSON-compatible dictionary. '''

708 return __.immut.Dictionary[

709 str, __.typx.Any

710 ](

711 processor_name = self.processor_name,

712 confidence = self.confidence,

713 processor_type = self.processor_type,

714 detection_metadata = dict( self.detection_metadata ),

715 )

716

717

718class DetectionsResult( ResultBase ):

719 ''' Detection results with processor selection and timing metadata. '''

720

721 source: __.typx.Annotated[

722 str,

723 __.ddoc.Doc( "Primary location URL for detection operation." ),

724 ]

725 detections: __.typx.Annotated[

726 tuple[ Detection, ... ],

727 __.ddoc.Doc( "All processor detections found for location." ),

728 ]

729 detection_optimal: __.typx.Annotated[

730 __.typx.Optional[ Detection ],

731 __.ddoc.Doc( "Best detection result based on confidence scoring." ),

732 ]

733 time_detection_ms: __.typx.Annotated[

734 int,

735 __.ddoc.Doc( "Detection operation time in milliseconds." ),

736 ]

737

738

739 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

740 ''' Renders detection results as JSON-compatible dictionary. '''

741 detections_json = [

742 dict( detection.render_as_json( ) )

743 for detection in self.detections ]

744 return __.immut.Dictionary[

745 str, __.typx.Any

746 ](

747 source = self.source,

748 detections = detections_json,

749 detection_optimal = (

750 dict( self.detection_optimal.render_as_json( ) )

751 if self.detection_optimal else None ),

752 time_detection_ms = self.time_detection_ms,

753 )

754

755 def render_as_markdown(

756 self, /, *,

757 reveal_internals: __.typx.Annotated[

758 bool,

759 __.ddoc.Doc( "Controls whether internal details are shown." ),

760 ] = False,

761 ) -> tuple[ str, ... ]:

762 ''' Renders detection results as Markdown lines for display. '''

763 lines = [ "# Detection Results" ]

764 if reveal_internals:

765 lines.append( "- **Source:** {source}".format(

766 source = self.source ) )

767 lines.append( "- **Detection time:** {time}ms".format(

768 time = self.time_detection_ms ) )

769 if self.detection_optimal:

770 lines.append( "- **Optimal processor:** {name} ({type})".format(

771 name = self.detection_optimal.processor_name,

772 type = self.detection_optimal.processor_type ) )

773 lines.append( "- **Confidence:** {conf:.2f}".format(

774 conf = self.detection_optimal.confidence ) )

775 else:

776 lines.append( "- **No optimal processor found**" )

777 if reveal_internals and self.detections:

778 lines.append( "" )

779 lines.append( "## All Detections" )

780 detection_lines = [

781 "- **{name}** ({type}): {conf:.2f}".format(

782 name = detection.processor_name,

783 type = detection.processor_type,

784 conf = detection.confidence )

785 for detection in self.detections ]

786 lines.extend( detection_lines )

787 return tuple( lines )

788

789

790class ProcessorInfo( ResultBase ):

791 ''' Information about a processor and its capabilities. '''

792

793 processor_name: __.typx.Annotated[

794 str,

795 __.ddoc.Doc( "Name of the processor for identification." ),

796 ]

797 processor_type: __.typx.Annotated[

798 str,

799 __.ddoc.Doc( "Type of processor (inventory, structure)." ),

800 ]

801 capabilities: __.typx.Annotated[

802 __.typx.Any, # Will be _interfaces.ProcessorCapabilities after import

803 __.ddoc.Doc( "Complete capability description for processor." ),

804 ]

805

806 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

807 ''' Renders processor info as JSON-compatible dictionary. '''

808 return __.immut.Dictionary[

809 str, __.typx.Any

810 ](

811 processor_name = self.processor_name,

812 processor_type = self.processor_type,

813 capabilities = self.capabilities.render_as_json( ),

814 )

815

816 def render_as_markdown(

817 self, /, *,

818 reveal_internals: __.typx.Annotated[

819 bool,

820 __.ddoc.Doc( "Controls whether internal details are shown." ),

821 ] = False,

822 ) -> tuple[ str, ... ]:

823 ''' Renders processor info as Markdown lines for display. '''

824 lines = [ f"### `{self.processor_name}` ({self.processor_type})" ]

825 if reveal_internals:

826 capabilities_lines = self.capabilities.render_as_markdown( )

827 lines.extend( capabilities_lines )

828 return tuple( lines )

829

830

831class ProcessorsSurveyResult( ResultBase ):

832 ''' Survey results listing available processors and capabilities. '''

833

834 genus: __.typx.Annotated[

835 __.typx.Any, # Will be _interfaces.ProcessorGenera after import

836 __.ddoc.Doc(

837 "Processor genus that was surveyed (inventory or structure)." ),

838 ]

839 filter_name: __.typx.Annotated[

840 __.typx.Optional[ str ],

841 __.ddoc.Doc( "Optional processor name filter applied to survey." ),

842 ] = None

843 processors: __.typx.Annotated[

844 tuple[ ProcessorInfo, ... ],

845 __.ddoc.Doc( "Available processors matching survey criteria." ),

846 ]

847 survey_time_ms: __.typx.Annotated[

848 int,

849 __.ddoc.Doc( "Survey operation time in milliseconds." ),

850 ]

851

852 def render_as_json( self ) -> __.immut.Dictionary[ str, __.typx.Any ]:

853 ''' Renders survey results as JSON-compatible dictionary. '''

854 processors_json = [

855 dict( processor.render_as_json( ) )

856 for processor in self.processors ]

857 return __.immut.Dictionary[

858 str, __.typx.Any

859 ](

860 genus = (

861 self.genus.value if hasattr( self.genus, 'value' )

862 else str( self.genus ) ),

863 filter_name = self.filter_name,

864 processors = processors_json,

865 survey_time_ms = self.survey_time_ms,

866 )

867

868 def render_as_markdown(

869 self, /, *,

870 reveal_internals: __.typx.Annotated[

871 bool,

872 __.ddoc.Doc( "Controls whether internal details are shown." ),

873 ] = False,

874 ) -> tuple[ str, ... ]:

875 ''' Renders survey results as Markdown lines for display. '''

876 genus_name = (

877 self.genus.value if hasattr( self.genus, 'value' )

878 else str( self.genus ) )

879 title = f"# Processor Survey Results ({genus_name})"

880 lines = [ title ]

881 if reveal_internals:

882 lines.append( f"- **Survey time:** {self.survey_time_ms}ms" )

883 if self.filter_name:

884 lines.append( f"- **Filter:** {self.filter_name}" )

885 lines.append( f"- **Processors found:** {len( self.processors )}" )

886 if self.processors:

887 lines.append( "" )

888 for i, processor in enumerate( self.processors, 1 ):

889 lines.append( f"📦 ── Processor {i} ──────────" )

890 processor_lines = processor.render_as_markdown(

891 reveal_internals = reveal_internals )

892 lines.extend( processor_lines )

893 if i < len( self.processors ):

894 lines.append( "" )

895 return tuple( lines )

896

897

898def parse_content_id( content_id: str ) -> tuple[ str, str ]:

899 ''' Parses content identifier back to location and name components.

900

901 Returns tuple of (location, name) extracted from content_id.

902 Raises ContentIdInvalidity if content_id is malformed or cannot be

903 decoded.

904 '''

905 try:

906 identifier_source = __.base64.b64decode(

907 content_id.encode( 'ascii' ) ).decode( 'utf-8' )

908 except Exception as exc:

909 raise _exceptions.ContentIdInvalidity(

910 content_id, "Base64 decoding failed" ) from exc

911 if ':' not in identifier_source:

912 raise _exceptions.ContentIdInvalidity(

913 content_id, "Missing location:object separator" )

914 location, name = identifier_source.rsplit( ':', 1 )

915 return location, name

916

917

918def produce_content_id( location: str, name: str ) -> str:

919 ''' Produces deterministic content identifier for browse-then-extract.

920

921 Uses base64 encoding of location + ":" + name to create stable,

922 debuggable identifiers that maintain stateless operation.

923 '''

924 identifier_source = f"{location}:{name}"

925 return __.base64.b64encode(

926 identifier_source.encode( 'utf-8' ) ).decode( 'ascii' )

934ContentDocuments: __.typx.TypeAlias = __.cabc.Sequence[ ContentDocument ]

935InventoryObjects: __.typx.TypeAlias = __.cabc.Sequence[ InventoryObject ]

936SearchResults: __.typx.TypeAlias = __.cabc.Sequence[ SearchResult ]

937