Coverage for sources/librovore/structures/mkdocs/detection.py: 22%

58 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-20 18:40 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' MkDocs detection and metadata extraction. ''' 

22 

23 

24from urllib.parse import ParseResult as _Url 

25 

26from . import __ 

27from . import extraction as _extraction 

28 

29 

30_scribe = __.acquire_scribe( __name__ ) 

31 

32 

33class MkDocsDetection( __.StructureDetection ): 

34 ''' Detection result for MkDocs documentation sources. ''' 

35 

36 source: str 

37 has_mkdocs_yml: bool = False 

38 normalized_source: str = '' 

39 theme: __.typx.Optional[ str ] = None 

40 

41 @classmethod 

42 def get_capabilities( cls ) -> __.StructureProcessorCapabilities: 

43 ''' MkDocs processor capabilities based on universal pattern 

44 analysis. ''' 

45 return __.StructureProcessorCapabilities( 

46 supported_inventory_types = frozenset( { 

47 'mkdocs', 

48 'sphinx' 

49 } ), 

50 content_extraction_features = frozenset( { 

51 __.ContentExtractionFeatures.Signatures, 

52 __.ContentExtractionFeatures.Descriptions, 

53 __.ContentExtractionFeatures.Arguments, 

54 __.ContentExtractionFeatures.Returns, 

55 __.ContentExtractionFeatures.Attributes, 

56 __.ContentExtractionFeatures.CodeExamples, 

57 __.ContentExtractionFeatures.Navigation 

58 } ), 

59 confidence_by_inventory_type = __.immut.Dictionary( { 

60 'mkdocs': 0.8, 

61 'sphinx': 0.7 # Lower confidence (mkdocs primary) 

62 } ) 

63 ) 

64 

65 @classmethod 

66 async def from_source( 

67 selfclass, 

68 auxdata: __.ApplicationGlobals, 

69 processor: __.Processor, 

70 source: str, 

71 ) -> __.typx.Self: 

72 ''' Constructs detection from source location. ''' 

73 detection = await processor.detect( auxdata, source ) 

74 return __.typx.cast( __.typx.Self, detection ) 

75 

76 async def extract_contents( 

77 self, 

78 auxdata: __.ApplicationGlobals, 

79 source: str, 

80 objects: __.cabc.Sequence[ __.InventoryObject ], /, 

81 ) -> tuple[ __.ContentDocument, ... ]: 

82 ''' Extracts documentation content for specified objects. ''' 

83 theme_value = self.theme if self.theme is not None else __.absent 

84 documents = await _extraction.extract_contents( 

85 auxdata, source, objects, theme = theme_value ) 

86 return tuple( documents ) 

87 

88 

89 

90async def check_mkdocs_yml( 

91 auxdata: __.ApplicationGlobals, source: _Url 

92) -> bool: 

93 ''' Checks if mkdocs.yml exists (indicates MkDocs site). ''' 

94 url = source._replace( path = f"{source.path}/mkdocs.yml" ) 

95 return await __.probe_url( auxdata.probe_cache, url ) 

96 

97 

98async def check_mkdocs_html_markers( 

99 auxdata: __.ApplicationGlobals, source: _Url 

100) -> float: 

101 ''' Checks HTML content for MkDocs-specific markers. ''' 

102 html_candidates = [ 

103 source._replace( path = f"{source.path}/" ), 

104 source._replace( path = f"{source.path}/index.html" ), 

105 ] 

106 html_content = None 

107 for html_url in html_candidates: 

108 try: 

109 html_content = await __.retrieve_url_as_text( 

110 auxdata.content_cache, 

111 html_url, duration_max = 10.0 ) 

112 except __.DocumentationInaccessibility: continue # noqa: PERF203 

113 else: break 

114 if not html_content: return 0.0 

115 confidence = 0.0 

116 html_content_lower = html_content.lower( ) 

117 if 'mkdocs' in html_content_lower: 

118 confidence += 0.3 

119 if 'mkdocs-material' in html_content_lower: 

120 confidence += 0.2 

121 if '_mkdocstrings' in html_content_lower: 

122 confidence += 0.2 

123 if ( 'name="generator"' in html_content_lower 

124 and 'mkdocs' in html_content_lower 

125 ): 

126 confidence += 0.3 

127 return min( confidence, 0.5 ) 

128 

129 

130async def detect_theme( 

131 auxdata: __.ApplicationGlobals, source: _Url 

132) -> dict[ str, __.typx.Any ]: 

133 ''' Detects MkDocs theme and other metadata. ''' 

134 theme_metadata: dict[ str, __.typx.Any ] = { } 

135 html_candidates = [ 

136 source._replace( path = f"{source.path}/" ), 

137 source._replace( path = f"{source.path}/index.html" ), 

138 ] 

139 html_content = None 

140 for html_url in html_candidates: 

141 # TODO: Use probe_url instead of `try`. 

142 try: 

143 html_content = await __.retrieve_url_as_text( 

144 auxdata.content_cache, 

145 html_url, duration_max = 10.0 ) 

146 except __.DocumentationInaccessibility: continue # noqa: PERF203 

147 else: break 

148 if html_content: 

149 html_content_lower = html_content.lower( ) 

150 if ( 'material' in html_content_lower 

151 or 'mkdocs-material' in html_content_lower 

152 ): theme_metadata[ 'theme' ] = 'material' 

153 elif 'readthedocs' in html_content_lower: 

154 theme_metadata[ 'theme' ] = 'readthedocs' 

155 return theme_metadata