Coverage for sources/librovore/structures/mkdocs/detection.py: 22%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' MkDocs detection and metadata extraction. '''

24from urllib.parse import ParseResult as _Url

26from . import __

27from . import extraction as _extraction

30_scribe = __.acquire_scribe( __name__ )

33class MkDocsDetection( __.StructureDetection ):

34 ''' Detection result for MkDocs documentation sources. '''

36 source: str

37 has_mkdocs_yml: bool = False

38 normalized_source: str = ''

39 theme: __.typx.Optional[ str ] = None

41 @classmethod

42 def get_capabilities( cls ) -> __.StructureProcessorCapabilities:

43 ''' MkDocs processor capabilities based on universal pattern

44 analysis. '''

45 return __.StructureProcessorCapabilities(

46 supported_inventory_types = frozenset( {

47 'mkdocs',

48 'sphinx'

49 } ),

50 content_extraction_features = frozenset( {

51 __.ContentExtractionFeatures.Signatures,

52 __.ContentExtractionFeatures.Descriptions,

53 __.ContentExtractionFeatures.Arguments,

54 __.ContentExtractionFeatures.Returns,

55 __.ContentExtractionFeatures.Attributes,

56 __.ContentExtractionFeatures.CodeExamples,

57 __.ContentExtractionFeatures.Navigation

58 } ),

59 confidence_by_inventory_type = __.immut.Dictionary( {

60 'mkdocs': 0.8,

61 'sphinx': 0.7 # Lower confidence (mkdocs primary)

62 } )

63 )

65 @classmethod

66 async def from_source(

67 selfclass,

68 auxdata: __.ApplicationGlobals,

69 processor: __.Processor,

70 source: str,

71 ) -> __.typx.Self:

72 ''' Constructs detection from source location. '''

73 detection = await processor.detect( auxdata, source )

74 return __.typx.cast( __.typx.Self, detection )

76 async def extract_contents(

77 self,

78 auxdata: __.ApplicationGlobals,

79 source: str,

80 objects: __.cabc.Sequence[ __.InventoryObject ], /,

81 ) -> tuple[ __.ContentDocument, ... ]:

82 ''' Extracts documentation content for specified objects. '''

83 theme_value = self.theme if self.theme is not None else __.absent

84 documents = await _extraction.extract_contents(

85 auxdata, source, objects, theme = theme_value )

86 return tuple( documents )

90async def check_mkdocs_yml(

91 auxdata: __.ApplicationGlobals, source: _Url

92) -> bool:

93 ''' Checks if mkdocs.yml exists (indicates MkDocs site). '''

94 url = source._replace( path = f"{source.path}/mkdocs.yml" )

95 return await __.probe_url( auxdata.probe_cache, url )

98async def check_mkdocs_html_markers(

99 auxdata: __.ApplicationGlobals, source: _Url

100) -> float:

101 ''' Checks HTML content for MkDocs-specific markers. '''

102 html_candidates = [

103 source._replace( path = f"{source.path}/" ),

104 source._replace( path = f"{source.path}/index.html" ),

105 ]

106 html_content = None

107 for html_url in html_candidates:

108 try:

109 html_content = await __.retrieve_url_as_text(

110 auxdata.content_cache,

111 html_url, duration_max = 10.0 )

112 except __.DocumentationInaccessibility: continue # noqa: PERF203

113 else: break

114 if not html_content: return 0.0

115 confidence = 0.0

116 html_content_lower = html_content.lower( )

117 if 'mkdocs' in html_content_lower:

118 confidence += 0.3

119 if 'mkdocs-material' in html_content_lower:

120 confidence += 0.2

121 if '_mkdocstrings' in html_content_lower:

122 confidence += 0.2

123 if ( 'name="generator"' in html_content_lower

124 and 'mkdocs' in html_content_lower

125 ):

126 confidence += 0.3

127 return min( confidence, 0.5 )

128

129

130async def detect_theme(

131 auxdata: __.ApplicationGlobals, source: _Url

132) -> dict[ str, __.typx.Any ]:

133 ''' Detects MkDocs theme and other metadata. '''

134 theme_metadata: dict[ str, __.typx.Any ] = { }

135 html_candidates = [

136 source._replace( path = f"{source.path}/" ),

137 source._replace( path = f"{source.path}/index.html" ),

138 ]

139 html_content = None

140 for html_url in html_candidates:

141 # TODO: Use probe_url instead of `try`.

142 try:

143 html_content = await __.retrieve_url_as_text(

144 auxdata.content_cache,

145 html_url, duration_max = 10.0 )

146 except __.DocumentationInaccessibility: continue # noqa: PERF203

147 else: break

148 if html_content:

149 html_content_lower = html_content.lower( )

150 if ( 'material' in html_content_lower

151 or 'mkdocs-material' in html_content_lower

152 ): theme_metadata[ 'theme' ] = 'material'

153 elif 'readthedocs' in html_content_lower:

154 theme_metadata[ 'theme' ] = 'readthedocs'

155 return theme_metadata