Coverage for sources/librovore/structures/sphinx/detection.py: 27%
46 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Sphinx detection and metadata extraction. '''
24from urllib.parse import ParseResult as _Url
26from . import __
27from . import extraction as _extraction
28from . import urls as _urls
31_scribe = __.acquire_scribe( __name__ )
34class SphinxDetection( __.StructureDetection ):
35 ''' Detection result for Sphinx documentation sources. '''
37 source: str
38 has_searchindex: bool = False
39 normalized_source: str = ''
40 theme: __.typx.Optional[ str ] = None
42 @classmethod
43 def get_capabilities( cls ) -> __.StructureProcessorCapabilities:
44 ''' Sphinx processor capabilities based on universal pattern
45 analysis. '''
46 return __.StructureProcessorCapabilities(
47 supported_inventory_types = frozenset( { 'sphinx' } ),
48 content_extraction_features = frozenset( {
49 __.ContentExtractionFeatures.Signatures,
50 __.ContentExtractionFeatures.Descriptions,
51 __.ContentExtractionFeatures.Arguments,
52 __.ContentExtractionFeatures.Returns,
53 __.ContentExtractionFeatures.Attributes,
54 __.ContentExtractionFeatures.CodeExamples,
55 __.ContentExtractionFeatures.CrossReferences
56 } ),
57 confidence_by_inventory_type = __.immut.Dictionary( {
58 'sphinx': 1.0
59 } )
60 )
62 @classmethod
63 async def from_source(
64 selfclass,
65 auxdata: __.ApplicationGlobals,
66 processor: __.Processor,
67 source: str,
68 ) -> __.typx.Self:
69 ''' Constructs detection from source location. '''
70 detection = await processor.detect( auxdata, source )
71 return __.typx.cast( __.typx.Self, detection )
73 async def extract_contents(
74 self,
75 auxdata: __.ApplicationGlobals,
76 source: str,
77 objects: __.cabc.Sequence[ __.InventoryObject ], /,
78 ) -> tuple[ __.ContentDocument, ... ]:
79 ''' Extracts documentation content for specified objects. '''
80 theme = self.theme if self.theme is not None else __.absent
81 documents = await _extraction.extract_contents(
82 auxdata, source, objects, theme = theme )
83 return tuple( documents )
87async def check_searchindex(
88 auxdata: __.ApplicationGlobals, source: _Url
89) -> bool:
90 ''' Checks if searchindex.js exists (indicates full Sphinx site). '''
91 url = _urls.derive_searchindex_url( source )
92 return await __.probe_url( auxdata.probe_cache, url )
95async def detect_theme(
96 auxdata: __.ApplicationGlobals, source: _Url
97) -> dict[ str, __.typx.Any ]:
98 ''' Detects Sphinx theme and other metadata. '''
99 theme_metadata: dict[ str, __.typx.Any ] = { }
100 html_url = _urls.derive_html_url( source )
101 try:
102 # TODO: Use probe_url instead of `try`.
103 html_content = await __.retrieve_url_as_text(
104 auxdata.content_cache,
105 html_url, duration_max = 10.0 )
106 except __.DocumentationInaccessibility: pass
107 else:
108 html_content_lower = html_content.lower( )
109 if ( 'furo' in html_content_lower
110 or 'css/furo.css' in html_content_lower
111 ): theme_metadata[ 'theme' ] = 'furo'
112 elif ( 'sphinx_rtd_theme' in html_content_lower
113 or 'css/theme.css' in html_content_lower
114 ): theme_metadata[ 'theme' ] = 'sphinx_rtd_theme'
115 elif ( 'alabaster' in html_content_lower
116 or 'css/alabaster.css' in html_content_lower
117 ): theme_metadata[ 'theme' ] = 'alabaster'
118 elif ( 'pydoctheme.css' in html_content_lower
119 or 'classic.css' in html_content_lower
120 ): theme_metadata[ 'theme' ] = 'pydoctheme'
121 elif 'flask.css' in html_content_lower:
122 theme_metadata[ 'theme' ] = 'flask'
123 elif 'css/nature.css' in html_content_lower:
124 theme_metadata[ 'theme' ] = 'nature'
125 elif 'css/default.css' in html_content_lower:
126 theme_metadata[ 'theme' ] = 'classic'
127 elif 'sphinx_book_theme' in html_content_lower:
128 theme_metadata[ 'theme' ] = 'sphinx_book_theme'
129 elif 'pydata_sphinx_theme' in html_content_lower:
130 theme_metadata[ 'theme' ] = 'pydata_sphinx_theme'
131 # If no theme detected, don't set theme key (returns None)
132 return theme_metadata