Coverage for sources/librovore/structures/sphinx/detection.py: 27%

1# vim: set filetype=python fileencoding=utf-8:

2# -*- coding: utf-8 -*-

4#============================================================================#

5# #

6# Licensed under the Apache License, Version 2.0 (the "License"); #

7# you may not use this file except in compliance with the License. #

8# You may obtain a copy of the License at #

9# #

10# http://www.apache.org/licenses/LICENSE-2.0 #

11# #

12# Unless required by applicable law or agreed to in writing, software #

13# distributed under the License is distributed on an "AS IS" BASIS, #

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #

15# See the License for the specific language governing permissions and #

16# limitations under the License. #

17# #

18#============================================================================#

21''' Sphinx detection and metadata extraction. '''

24from urllib.parse import ParseResult as _Url

26from . import __

27from . import extraction as _extraction

28from . import urls as _urls

31_scribe = __.acquire_scribe( __name__ )

34class SphinxDetection( __.StructureDetection ):

35 ''' Detection result for Sphinx documentation sources. '''

37 source: str

38 has_searchindex: bool = False

39 normalized_source: str = ''

40 theme: __.typx.Optional[ str ] = None

42 @classmethod

43 def get_capabilities( cls ) -> __.StructureProcessorCapabilities:

44 ''' Sphinx processor capabilities based on universal pattern

45 analysis. '''

46 return __.StructureProcessorCapabilities(

47 supported_inventory_types = frozenset( { 'sphinx' } ),

48 content_extraction_features = frozenset( {

49 __.ContentExtractionFeatures.Signatures,

50 __.ContentExtractionFeatures.Descriptions,

51 __.ContentExtractionFeatures.Arguments,

52 __.ContentExtractionFeatures.Returns,

53 __.ContentExtractionFeatures.Attributes,

54 __.ContentExtractionFeatures.CodeExamples,

55 __.ContentExtractionFeatures.CrossReferences

56 } ),

57 confidence_by_inventory_type = __.immut.Dictionary( {

58 'sphinx': 1.0

59 } )

60 )

62 @classmethod

63 async def from_source(

64 selfclass,

65 auxdata: __.ApplicationGlobals,

66 processor: __.Processor,

67 source: str,

68 ) -> __.typx.Self:

69 ''' Constructs detection from source location. '''

70 detection = await processor.detect( auxdata, source )

71 return __.typx.cast( __.typx.Self, detection )

73 async def extract_contents(

74 self,

75 auxdata: __.ApplicationGlobals,

76 source: str,

77 objects: __.cabc.Sequence[ __.InventoryObject ], /,

78 ) -> tuple[ __.ContentDocument, ... ]:

79 ''' Extracts documentation content for specified objects. '''

80 theme = self.theme if self.theme is not None else __.absent

81 documents = await _extraction.extract_contents(

82 auxdata, source, objects, theme = theme )

83 return tuple( documents )

87async def check_searchindex(

88 auxdata: __.ApplicationGlobals, source: _Url

89) -> bool:

90 ''' Checks if searchindex.js exists (indicates full Sphinx site). '''

91 url = _urls.derive_searchindex_url( source )

92 return await __.probe_url( auxdata.probe_cache, url )

95async def detect_theme(

96 auxdata: __.ApplicationGlobals, source: _Url

97) -> dict[ str, __.typx.Any ]:

98 ''' Detects Sphinx theme and other metadata. '''

99 theme_metadata: dict[ str, __.typx.Any ] = { }

100 html_url = _urls.derive_html_url( source )

101 try:

102 # TODO: Use probe_url instead of `try`.

103 html_content = await __.retrieve_url_as_text(

104 auxdata.content_cache,

105 html_url, duration_max = 10.0 )

106 except __.DocumentationInaccessibility: pass

107 else:

108 html_content_lower = html_content.lower( )

109 if ( 'furo' in html_content_lower

110 or 'css/furo.css' in html_content_lower

111 ): theme_metadata[ 'theme' ] = 'furo'

112 elif ( 'sphinx_rtd_theme' in html_content_lower

113 or 'css/theme.css' in html_content_lower

114 ): theme_metadata[ 'theme' ] = 'sphinx_rtd_theme'

115 elif ( 'alabaster' in html_content_lower

116 or 'css/alabaster.css' in html_content_lower

117 ): theme_metadata[ 'theme' ] = 'alabaster'

118 elif ( 'pydoctheme.css' in html_content_lower

119 or 'classic.css' in html_content_lower

120 ): theme_metadata[ 'theme' ] = 'pydoctheme'

121 elif 'flask.css' in html_content_lower:

122 theme_metadata[ 'theme' ] = 'flask'

123 elif 'css/nature.css' in html_content_lower:

124 theme_metadata[ 'theme' ] = 'nature'

125 elif 'css/default.css' in html_content_lower:

126 theme_metadata[ 'theme' ] = 'classic'

127 elif 'sphinx_book_theme' in html_content_lower:

128 theme_metadata[ 'theme' ] = 'sphinx_book_theme'

129 elif 'pydata_sphinx_theme' in html_content_lower:

130 theme_metadata[ 'theme' ] = 'pydata_sphinx_theme'

131 # If no theme detected, don't set theme key (returns None)

132 return theme_metadata