Coverage for sources/librovore/structures/mkdocs/detection.py: 20%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' MkDocs detection and metadata extraction. '''

24from urllib.parse import ParseResult as _Url

26from . import __

27from . import extraction as _extraction

30_scribe = __.acquire_scribe( __name__ )

33class MkDocsDetection( __.StructureDetection ):

34 ''' Detection result for MkDocs documentation sources. '''

36 source: str

37 has_mkdocs_yml: bool = False

38 normalized_source: str = ''

39 theme: __.typx.Optional[ str ] = None

41 @classmethod

42 async def from_source(

43 selfclass,

44 auxdata: __.ApplicationGlobals,

45 processor: __.Processor,

46 source: str,

47 ) -> __.typx.Self:

48 ''' Constructs detection from source location. '''

49 detection = await processor.detect( auxdata, source )

50 return __.typx.cast( __.typx.Self, detection )

52 async def extract_contents(

53 self,

54 auxdata: __.ApplicationGlobals,

55 source: str,

56 objects: __.cabc.Sequence[ __.cabc.Mapping[ str, __.typx.Any ] ], /, *,

57 include_snippets: bool = True,

58 ) -> list[ dict[ str, __.typx.Any ] ]:

59 ''' Extracts documentation content for specified objects. '''

60 theme_value = self.theme if self.theme is not None else __.absent

61 return await _extraction.extract_contents(

62 auxdata, source, objects,

63 theme = theme_value,

64 include_snippets = include_snippets )

68async def check_mkdocs_yml(

69 auxdata: __.ApplicationGlobals, source: _Url

70) -> bool:

71 ''' Checks if mkdocs.yml exists (indicates MkDocs site). '''

72 url = source._replace( path = f"{source.path}/mkdocs.yml" )

73 return await __.probe_url( auxdata.probe_cache, url )

76async def check_mkdocs_html_markers(

77 auxdata: __.ApplicationGlobals, source: _Url

78) -> float:

79 ''' Checks HTML content for MkDocs-specific markers. '''

80 html_candidates = [

81 source._replace( path = f"{source.path}/" ),

82 source._replace( path = f"{source.path}/index.html" ),

83 ]

84 html_content = None

85 for html_url in html_candidates:

86 try:

87 html_content = await __.retrieve_url_as_text(

88 auxdata.content_cache,

89 html_url, duration_max = 10.0 )

90 except __.DocumentationInaccessibility: continue # noqa: PERF203

91 else: break

92 if not html_content: return 0.0

93 confidence = 0.0

94 html_content_lower = html_content.lower( )

95 if 'mkdocs' in html_content_lower:

96 confidence += 0.3

97 if 'mkdocs-material' in html_content_lower:

98 confidence += 0.2

99 if '_mkdocstrings' in html_content_lower:

100 confidence += 0.2

101 if ( 'name="generator"' in html_content_lower

102 and 'mkdocs' in html_content_lower

103 ):

104 confidence += 0.3

105 return min( confidence, 0.5 )

106

107

108async def detect_theme(

109 auxdata: __.ApplicationGlobals, source: _Url

110) -> dict[ str, __.typx.Any ]:

111 ''' Detects MkDocs theme and other metadata. '''

112 theme_metadata: dict[ str, __.typx.Any ] = { }

113 html_candidates = [

114 source._replace( path = f"{source.path}/" ),

115 source._replace( path = f"{source.path}/index.html" ),

116 ]

117 html_content = None

118 for html_url in html_candidates:

119 # TODO: Use probe_url instead of `try`.

120 try:

121 html_content = await __.retrieve_url_as_text(

122 auxdata.content_cache,

123 html_url, duration_max = 10.0 )

124 except __.DocumentationInaccessibility: continue # noqa: PERF203

125 else: break

126 if html_content:

127 html_content_lower = html_content.lower( )

128 if ( 'material' in html_content_lower

129 or 'mkdocs-material' in html_content_lower

130 ): theme_metadata[ 'theme' ] = 'material'

131 elif 'readthedocs' in html_content_lower:

132 theme_metadata[ 'theme' ] = 'readthedocs'

133 return theme_metadata