Coverage for sources/librovore/structures/sphinx/detection.py: 27%

46 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-10-20 18:40 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Sphinx detection and metadata extraction. ''' 

22 

23 

24from urllib.parse import ParseResult as _Url 

25 

26from . import __ 

27from . import extraction as _extraction 

28from . import urls as _urls 

29 

30 

31_scribe = __.acquire_scribe( __name__ ) 

32 

33 

34class SphinxDetection( __.StructureDetection ): 

35 ''' Detection result for Sphinx documentation sources. ''' 

36 

37 source: str 

38 has_searchindex: bool = False 

39 normalized_source: str = '' 

40 theme: __.typx.Optional[ str ] = None 

41 

42 @classmethod 

43 def get_capabilities( cls ) -> __.StructureProcessorCapabilities: 

44 ''' Sphinx processor capabilities based on universal pattern 

45 analysis. ''' 

46 return __.StructureProcessorCapabilities( 

47 supported_inventory_types = frozenset( { 'sphinx' } ), 

48 content_extraction_features = frozenset( { 

49 __.ContentExtractionFeatures.Signatures, 

50 __.ContentExtractionFeatures.Descriptions, 

51 __.ContentExtractionFeatures.Arguments, 

52 __.ContentExtractionFeatures.Returns, 

53 __.ContentExtractionFeatures.Attributes, 

54 __.ContentExtractionFeatures.CodeExamples, 

55 __.ContentExtractionFeatures.CrossReferences 

56 } ), 

57 confidence_by_inventory_type = __.immut.Dictionary( { 

58 'sphinx': 1.0 

59 } ) 

60 ) 

61 

62 @classmethod 

63 async def from_source( 

64 selfclass, 

65 auxdata: __.ApplicationGlobals, 

66 processor: __.Processor, 

67 source: str, 

68 ) -> __.typx.Self: 

69 ''' Constructs detection from source location. ''' 

70 detection = await processor.detect( auxdata, source ) 

71 return __.typx.cast( __.typx.Self, detection ) 

72 

73 async def extract_contents( 

74 self, 

75 auxdata: __.ApplicationGlobals, 

76 source: str, 

77 objects: __.cabc.Sequence[ __.InventoryObject ], /, 

78 ) -> tuple[ __.ContentDocument, ... ]: 

79 ''' Extracts documentation content for specified objects. ''' 

80 theme = self.theme if self.theme is not None else __.absent 

81 documents = await _extraction.extract_contents( 

82 auxdata, source, objects, theme = theme ) 

83 return tuple( documents ) 

84 

85 

86 

87async def check_searchindex( 

88 auxdata: __.ApplicationGlobals, source: _Url 

89) -> bool: 

90 ''' Checks if searchindex.js exists (indicates full Sphinx site). ''' 

91 url = _urls.derive_searchindex_url( source ) 

92 return await __.probe_url( auxdata.probe_cache, url ) 

93 

94 

95async def detect_theme( 

96 auxdata: __.ApplicationGlobals, source: _Url 

97) -> dict[ str, __.typx.Any ]: 

98 ''' Detects Sphinx theme and other metadata. ''' 

99 theme_metadata: dict[ str, __.typx.Any ] = { } 

100 html_url = _urls.derive_html_url( source ) 

101 try: 

102 # TODO: Use probe_url instead of `try`. 

103 html_content = await __.retrieve_url_as_text( 

104 auxdata.content_cache, 

105 html_url, duration_max = 10.0 ) 

106 except __.DocumentationInaccessibility: pass 

107 else: 

108 html_content_lower = html_content.lower( ) 

109 if ( 'furo' in html_content_lower 

110 or 'css/furo.css' in html_content_lower 

111 ): theme_metadata[ 'theme' ] = 'furo' 

112 elif ( 'sphinx_rtd_theme' in html_content_lower 

113 or 'css/theme.css' in html_content_lower 

114 ): theme_metadata[ 'theme' ] = 'sphinx_rtd_theme' 

115 elif ( 'alabaster' in html_content_lower 

116 or 'css/alabaster.css' in html_content_lower 

117 ): theme_metadata[ 'theme' ] = 'alabaster' 

118 elif ( 'pydoctheme.css' in html_content_lower 

119 or 'classic.css' in html_content_lower 

120 ): theme_metadata[ 'theme' ] = 'pydoctheme' 

121 elif 'flask.css' in html_content_lower: 

122 theme_metadata[ 'theme' ] = 'flask' 

123 elif 'css/nature.css' in html_content_lower: 

124 theme_metadata[ 'theme' ] = 'nature' 

125 elif 'css/default.css' in html_content_lower: 

126 theme_metadata[ 'theme' ] = 'classic' 

127 elif 'sphinx_book_theme' in html_content_lower: 

128 theme_metadata[ 'theme' ] = 'sphinx_book_theme' 

129 elif 'pydata_sphinx_theme' in html_content_lower: 

130 theme_metadata[ 'theme' ] = 'pydata_sphinx_theme' 

131 # If no theme detected, don't set theme key (returns None) 

132 return theme_metadata