Coverage for sources/librovore/structures/mkdocs/detection.py: 22%
58 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-10-20 18:40 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' MkDocs detection and metadata extraction. '''
24from urllib.parse import ParseResult as _Url
26from . import __
27from . import extraction as _extraction
30_scribe = __.acquire_scribe( __name__ )
33class MkDocsDetection( __.StructureDetection ):
34 ''' Detection result for MkDocs documentation sources. '''
36 source: str
37 has_mkdocs_yml: bool = False
38 normalized_source: str = ''
39 theme: __.typx.Optional[ str ] = None
41 @classmethod
42 def get_capabilities( cls ) -> __.StructureProcessorCapabilities:
43 ''' MkDocs processor capabilities based on universal pattern
44 analysis. '''
45 return __.StructureProcessorCapabilities(
46 supported_inventory_types = frozenset( {
47 'mkdocs',
48 'sphinx'
49 } ),
50 content_extraction_features = frozenset( {
51 __.ContentExtractionFeatures.Signatures,
52 __.ContentExtractionFeatures.Descriptions,
53 __.ContentExtractionFeatures.Arguments,
54 __.ContentExtractionFeatures.Returns,
55 __.ContentExtractionFeatures.Attributes,
56 __.ContentExtractionFeatures.CodeExamples,
57 __.ContentExtractionFeatures.Navigation
58 } ),
59 confidence_by_inventory_type = __.immut.Dictionary( {
60 'mkdocs': 0.8,
61 'sphinx': 0.7 # Lower confidence (mkdocs primary)
62 } )
63 )
65 @classmethod
66 async def from_source(
67 selfclass,
68 auxdata: __.ApplicationGlobals,
69 processor: __.Processor,
70 source: str,
71 ) -> __.typx.Self:
72 ''' Constructs detection from source location. '''
73 detection = await processor.detect( auxdata, source )
74 return __.typx.cast( __.typx.Self, detection )
76 async def extract_contents(
77 self,
78 auxdata: __.ApplicationGlobals,
79 source: str,
80 objects: __.cabc.Sequence[ __.InventoryObject ], /,
81 ) -> tuple[ __.ContentDocument, ... ]:
82 ''' Extracts documentation content for specified objects. '''
83 theme_value = self.theme if self.theme is not None else __.absent
84 documents = await _extraction.extract_contents(
85 auxdata, source, objects, theme = theme_value )
86 return tuple( documents )
90async def check_mkdocs_yml(
91 auxdata: __.ApplicationGlobals, source: _Url
92) -> bool:
93 ''' Checks if mkdocs.yml exists (indicates MkDocs site). '''
94 url = source._replace( path = f"{source.path}/mkdocs.yml" )
95 return await __.probe_url( auxdata.probe_cache, url )
98async def check_mkdocs_html_markers(
99 auxdata: __.ApplicationGlobals, source: _Url
100) -> float:
101 ''' Checks HTML content for MkDocs-specific markers. '''
102 html_candidates = [
103 source._replace( path = f"{source.path}/" ),
104 source._replace( path = f"{source.path}/index.html" ),
105 ]
106 html_content = None
107 for html_url in html_candidates:
108 try:
109 html_content = await __.retrieve_url_as_text(
110 auxdata.content_cache,
111 html_url, duration_max = 10.0 )
112 except __.DocumentationInaccessibility: continue # noqa: PERF203
113 else: break
114 if not html_content: return 0.0
115 confidence = 0.0
116 html_content_lower = html_content.lower( )
117 if 'mkdocs' in html_content_lower:
118 confidence += 0.3
119 if 'mkdocs-material' in html_content_lower:
120 confidence += 0.2
121 if '_mkdocstrings' in html_content_lower:
122 confidence += 0.2
123 if ( 'name="generator"' in html_content_lower
124 and 'mkdocs' in html_content_lower
125 ):
126 confidence += 0.3
127 return min( confidence, 0.5 )
130async def detect_theme(
131 auxdata: __.ApplicationGlobals, source: _Url
132) -> dict[ str, __.typx.Any ]:
133 ''' Detects MkDocs theme and other metadata. '''
134 theme_metadata: dict[ str, __.typx.Any ] = { }
135 html_candidates = [
136 source._replace( path = f"{source.path}/" ),
137 source._replace( path = f"{source.path}/index.html" ),
138 ]
139 html_content = None
140 for html_url in html_candidates:
141 # TODO: Use probe_url instead of `try`.
142 try:
143 html_content = await __.retrieve_url_as_text(
144 auxdata.content_cache,
145 html_url, duration_max = 10.0 )
146 except __.DocumentationInaccessibility: continue # noqa: PERF203
147 else: break
148 if html_content:
149 html_content_lower = html_content.lower( )
150 if ( 'material' in html_content_lower
151 or 'mkdocs-material' in html_content_lower
152 ): theme_metadata[ 'theme' ] = 'material'
153 elif 'readthedocs' in html_content_lower:
154 theme_metadata[ 'theme' ] = 'readthedocs'
155 return theme_metadata