Coverage for sources/mimeogram/create.py: 72%

67 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-03-03 00:13 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Creation of mimeograms. ''' 

22# TODO? Use BSD sysexits. 

23 

24 

25from __future__ import annotations 

26 

27from . import __ 

28from . import interfaces as _interfaces 

29from . import tokenizers as _tokenizers 

30 

31 

32_scribe = __.produce_scribe( __name__ ) 

33 

34 

35class Command( 

36 _interfaces.CliCommand, 

37 decorators = ( __.standard_dataclass, __.standard_tyro_class ), 

38): 

39 ''' Creates mimeogram from filesystem locations or URLs. ''' 

40 

41 sources: __.typx.Annotated[ 

42 __.tyro.conf.Positional[ list[ str ] ], 

43 __.tyro.conf.arg( 

44 help = "Filesystem locations or URLs.", 

45 prefix_name = False ), 

46 ] 

47 clip: __.typx.Annotated[ 

48 __.typx.Optional[ bool ], 

49 __.tyro.conf.arg( 

50 aliases = ( '--clipboard', '--to-clipboard' ), 

51 help = "Copy mimeogram to clipboard." ), 

52 ] = None 

53 count_tokens: __.typx.Annotated[ 

54 __.typx.Optional[ bool ], 

55 __.tyro.conf.arg( 

56 help = "Count total tokens in mimeogram." ), 

57 ] = None 

58 edit: __.typx.Annotated[ 

59 bool, 

60 __.tyro.conf.arg( 

61 aliases = ( '-e', '--edit-message' ), 

62 help = "Spawn editor to capture an introductory message." ), 

63 ] = False 

64 prepend_prompt: __.typx.Annotated[ 

65 bool, 

66 __.tyro.conf.arg( 

67 help = "Prepend mimeogram format instructions." ), 

68 ] = False 

69 recurse: __.typx.Annotated[ 

70 __.typx.Optional[ bool ], 

71 __.tyro.conf.arg( 

72 aliases = ( '-r', '--recurse-directories', '--recursive' ), 

73 help = "Recurse into directories." ), 

74 ] = None 

75 strict: __.typx.Annotated[ 

76 __.typx.Optional[ bool ], 

77 __.tyro.conf.arg( 

78 aliases = ( '--fail-on-invalid', ), 

79 help = "Fail on invalid contents? True, fail. False, skip." ), 

80 ] = None 

81 tokenizer: __.typx.Annotated[ 

82 __.typx.Optional[ _tokenizers.Tokenizers ], 

83 __.tyro.conf.arg( 

84 help = "Which tokenizer to use for counting?" ), 

85 ] = None 

86 tokenizer_variant: __.typx.Annotated[ 

87 __.typx.Optional[ str ], 

88 __.tyro.conf.arg( 

89 help = ( 

90 "Which tokenizer variant to use for counting?\n" 

91 "For 'tiktoken': 'cl100k_base', 'o200k_base', etc....\n" 

92 "Not all tokenizers have variants.\n" 

93 "If not specified, then the default variant is used." ) ), 

94 ] = None 

95 

96 async def __call__( self, auxdata: __.Globals ) -> None: 

97 ''' Executes command to create mimeogram. ''' 

98 await create( auxdata, self ) 

99 

100 def provide_configuration_edits( self ) -> __.DictionaryEdits: 

101 ''' Provides edits against configuration from options. ''' 

102 edits: list[ __.DictionaryEdit ] = [ ] 

103 if None is not self.clip: 

104 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

105 address = ( 'create', 'to-clipboard' ), value = self.clip ) ) 

106 if None is not self.count_tokens: 106 ↛ 107line 106 didn't jump to line 107 because the condition on line 106 was never true

107 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

108 address = ( 'create', 'count-tokens' ), 

109 value = self.count_tokens ) ) 

110 if None is not self.recurse: 

111 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

112 address = ( 'acquire-parts', 'recurse-directories' ), 

113 value = self.recurse ) ) 

114 if None is not self.strict: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true

115 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

116 address = ( 'acquire-parts', 'fail-on-invalid' ), 

117 value = self.strict ) ) 

118 if None is not self.tokenizer: 118 ↛ 119line 118 didn't jump to line 119 because the condition on line 118 was never true

119 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

120 address = ( 'tokenizers', 'default' ), 

121 value = self.tokenizer ) ) 

122 return tuple( edits ) 

123 

124 

125async def _acquire_prompt( auxdata: __.Globals ) -> str: 

126 from .prompt import acquire_prompt 

127 return await acquire_prompt( auxdata ) 

128 

129 

130async def _copy_to_clipboard( mimeogram: str ) -> None: 

131 from pyperclip import copy 

132 copy( mimeogram ) 

133 _scribe.info( "Copied mimeogram to clipboard." ) 

134 

135 

136async def _edit_message( ) -> str: 

137 from .edit import edit_content 

138 return edit_content( ) 

139 

140 

141async def create( # pylint: disable=too-complex,too-many-locals 

142 auxdata: __.Globals, 

143 command: Command, *, 

144 editor: __.cabc.Callable[ 

145 [ ], __.cabc.Coroutine[ None, None, str ] ] = _edit_message, 

146 clipcopier: __.cabc.Callable[ 

147 [ str ], __.cabc.Coroutine[ None, None, None ] ] = _copy_to_clipboard, 

148 prompter: __.cabc.Callable[ 

149 [ __.Globals ], 

150 __.cabc.Coroutine[ None, None, str ] ] = _acquire_prompt, 

151) -> __.typx.Never: 

152 ''' Creates mimeogram. ''' 

153 from .acquirers import acquire 

154 from .formatters import format_mimeogram 

155 with __.report_exceptions( 

156 _scribe, "Could not acquire mimeogram parts." 

157 ): parts = await acquire( auxdata, command.sources ) 

158 if command.edit: 

159 with __.report_exceptions( 

160 _scribe, "Could not acquire user message." 

161 ): message = await editor( ) 

162 else: message = None 

163 mimeogram = format_mimeogram( parts, message = message ) 

164 # TODO? Pass prompt to 'format_mimeogram'. 

165 if command.prepend_prompt: 

166 prompt = await prompter( auxdata ) 

167 mimeogram = f"{prompt}\n\n{mimeogram}" 

168 options = auxdata.configuration.get( 'create', { } ) 

169 if options.get( 'count-tokens', False ): 169 ↛ 170line 169 didn't jump to line 170 because the condition on line 169 was never true

170 with __.report_exceptions( 

171 _scribe, "Could not count mimeogram tokens." 

172 ): 

173 tokenizer = await _tokenizer_from_command( auxdata, command ) 

174 tokens_count = await tokenizer.count( mimeogram ) 

175 _scribe.info( f"Total mimeogram size is {tokens_count} tokens." ) 

176 if options.get( 'to-clipboard', False ): 

177 with __.report_exceptions( 

178 _scribe, "Could not copy mimeogram to clipboard." 

179 ): await clipcopier( mimeogram ) 

180 else: print( mimeogram ) # TODO? Use output stream from configuration. 

181 raise SystemExit( 0 ) 

182 

183 

184async def _tokenizer_from_command( 

185 auxdata: __.Globals, command: Command 

186) -> _tokenizers.Tokenizer: 

187 options = auxdata.configuration.get( 'tokenizers', { } ) 

188 name = ( 

189 command.tokenizer.value if command.tokenizer 

190 else options.get( 'default', 'tiktoken' ) ) 

191 variant = command.tokenizer_variant 

192 args = dict( variant = variant ) if variant else { } 

193 return await _tokenizers.Tokenizers.produce( name, **args )