Coverage for sources/librovore/structures/mkdocs/detection.py: 20%
54 statements
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 22:48 +0000
« prev ^ index » next coverage.py v7.10.4, created at 2025-08-20 22:48 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' MkDocs detection and metadata extraction. '''
24from urllib.parse import ParseResult as _Url
26from . import __
27from . import extraction as _extraction
30_scribe = __.acquire_scribe( __name__ )
33class MkDocsDetection( __.StructureDetection ):
34 ''' Detection result for MkDocs documentation sources. '''
36 source: str
37 has_mkdocs_yml: bool = False
38 normalized_source: str = ''
39 theme: __.typx.Optional[ str ] = None
41 @classmethod
42 async def from_source(
43 selfclass,
44 auxdata: __.ApplicationGlobals,
45 processor: __.Processor,
46 source: str,
47 ) -> __.typx.Self:
48 ''' Constructs detection from source location. '''
49 detection = await processor.detect( auxdata, source )
50 return __.typx.cast( __.typx.Self, detection )
52 async def extract_contents(
53 self,
54 auxdata: __.ApplicationGlobals,
55 source: str,
56 objects: __.cabc.Sequence[ __.cabc.Mapping[ str, __.typx.Any ] ], /, *,
57 include_snippets: bool = True,
58 ) -> list[ dict[ str, __.typx.Any ] ]:
59 ''' Extracts documentation content for specified objects. '''
60 theme_value = self.theme if self.theme is not None else __.absent
61 return await _extraction.extract_contents(
62 auxdata, source, objects,
63 theme = theme_value,
64 include_snippets = include_snippets )
68async def check_mkdocs_yml(
69 auxdata: __.ApplicationGlobals, source: _Url
70) -> bool:
71 ''' Checks if mkdocs.yml exists (indicates MkDocs site). '''
72 url = source._replace( path = f"{source.path}/mkdocs.yml" )
73 return await __.probe_url( auxdata.probe_cache, url )
76async def check_mkdocs_html_markers(
77 auxdata: __.ApplicationGlobals, source: _Url
78) -> float:
79 ''' Checks HTML content for MkDocs-specific markers. '''
80 html_candidates = [
81 source._replace( path = f"{source.path}/" ),
82 source._replace( path = f"{source.path}/index.html" ),
83 ]
84 html_content = None
85 for html_url in html_candidates:
86 try:
87 html_content = await __.retrieve_url_as_text(
88 auxdata.content_cache,
89 html_url, duration_max = 10.0 )
90 except __.DocumentationInaccessibility: continue # noqa: PERF203
91 else: break
92 if not html_content: return 0.0
93 confidence = 0.0
94 html_content_lower = html_content.lower( )
95 if 'mkdocs' in html_content_lower:
96 confidence += 0.3
97 if 'mkdocs-material' in html_content_lower:
98 confidence += 0.2
99 if '_mkdocstrings' in html_content_lower:
100 confidence += 0.2
101 if ( 'name="generator"' in html_content_lower
102 and 'mkdocs' in html_content_lower
103 ):
104 confidence += 0.3
105 return min( confidence, 0.5 )
108async def detect_theme(
109 auxdata: __.ApplicationGlobals, source: _Url
110) -> dict[ str, __.typx.Any ]:
111 ''' Detects MkDocs theme and other metadata. '''
112 theme_metadata: dict[ str, __.typx.Any ] = { }
113 html_candidates = [
114 source._replace( path = f"{source.path}/" ),
115 source._replace( path = f"{source.path}/index.html" ),
116 ]
117 html_content = None
118 for html_url in html_candidates:
119 # TODO: Use probe_url instead of `try`.
120 try:
121 html_content = await __.retrieve_url_as_text(
122 auxdata.content_cache,
123 html_url, duration_max = 10.0 )
124 except __.DocumentationInaccessibility: continue # noqa: PERF203
125 else: break
126 if html_content:
127 html_content_lower = html_content.lower( )
128 if ( 'material' in html_content_lower
129 or 'mkdocs-material' in html_content_lower
130 ): theme_metadata[ 'theme' ] = 'material'
131 elif 'readthedocs' in html_content_lower:
132 theme_metadata[ 'theme' ] = 'readthedocs'
133 return theme_metadata