Coverage for sources/mimeogram/create.py: 72%

67 statements  

« prev     ^ index     » next       coverage.py v7.7.1, created at 2025-03-29 23:11 +0000

1# vim: set filetype=python fileencoding=utf-8: 

2# -*- coding: utf-8 -*- 

3 

4#============================================================================# 

5# # 

6# Licensed under the Apache License, Version 2.0 (the "License"); # 

7# you may not use this file except in compliance with the License. # 

8# You may obtain a copy of the License at # 

9# # 

10# http://www.apache.org/licenses/LICENSE-2.0 # 

11# # 

12# Unless required by applicable law or agreed to in writing, software # 

13# distributed under the License is distributed on an "AS IS" BASIS, # 

14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # 

15# See the License for the specific language governing permissions and # 

16# limitations under the License. # 

17# # 

18#============================================================================# 

19 

20 

21''' Creation of mimeograms. ''' 

22# TODO? Use BSD sysexits. 

23 

24 

25from __future__ import annotations 

26 

27from . import __ 

28from . import interfaces as _interfaces 

29from . import tokenizers as _tokenizers 

30 

31 

32_scribe = __.produce_scribe( __name__ ) 

33 

34 

35class Command( 

36 _interfaces.CliCommand, 

37 decorators = ( __.standard_dataclass, __.standard_tyro_class ), 

38): 

39 ''' Creates mimeogram from filesystem locations or URLs. ''' 

40 

41 sources: __.typx.Annotated[ 

42 __.tyro.conf.Positional[ list[ str ] ], 

43 __.typx.Doc( ''' Filesystem locations or URLs. ''' ), 

44 __.tyro.conf.arg( prefix_name = False ), 

45 ] 

46 clip: __.typx.Annotated[ 

47 __.typx.Optional[ bool ], 

48 __.typx.Doc( ''' Copy mimeogram to clipboard. ''' ), 

49 __.tyro.conf.arg( aliases = ( '--clipboard', '--to-clipboard' ) ), 

50 ] = None 

51 count_tokens: __.typx.Annotated[ 

52 __.typx.Optional[ bool ], 

53 __.typx.Doc( ''' Count total tokens in mimeogram. ''' ), 

54 ] = None 

55 edit: __.typx.Annotated[ 

56 bool, 

57 __.typx.Doc( ''' Spawn editor to capture introductory message. ''' ), 

58 __.tyro.conf.arg( aliases = ( '-e', '--edit-message' ) ), 

59 ] = False 

60 prepend_prompt: __.typx.Annotated[ 

61 bool, 

62 __.typx.Doc( ''' Prepend mimeogram format instructions. ''' ), 

63 ] = False 

64 recurse: __.typx.Annotated[ 

65 __.typx.Optional[ bool ], 

66 __.typx.Doc( ''' Recurse into directories. ''' ), 

67 __.tyro.conf.arg( 

68 aliases = ( '-r', '--recurse-directories', '--recursive' ) ), 

69 ] = None 

70 strict: __.typx.Annotated[ 

71 __.typx.Optional[ bool ], 

72 __.typx.Doc( 

73 ''' Fail on invalid contents? True, fail. False, skip. ''' ), 

74 __.tyro.conf.arg( aliases = ( '--fail-on-invalid', ) ), 

75 ] = None 

76 tokenizer: __.typx.Annotated[ 

77 __.typx.Optional[ _tokenizers.Tokenizers ], 

78 __.typx.Doc( ''' Which tokenizer to use for counting? ''' ), 

79 ] = None 

80 tokenizer_variant: __.typx.Annotated[ 

81 __.typx.Optional[ str ], 

82 __.typx.Doc( 

83 ''' Which tokenizer variant to use for counting? 

84 

85 'tiktoken': 'cl100k_base', 'o200k_base', etc.... 

86 

87 Not all tokenizers have variants. 

88 If not specified, then the default variant is used. 

89 ''' ), 

90 ] = None 

91 

92 async def __call__( self, auxdata: __.Globals ) -> None: 

93 ''' Executes command to create mimeogram. ''' 

94 await create( auxdata, self ) 

95 

96 def provide_configuration_edits( self ) -> __.DictionaryEdits: 

97 ''' Provides edits against configuration from options. ''' 

98 edits: list[ __.DictionaryEdit ] = [ ] 

99 if None is not self.clip: 

100 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

101 address = ( 'create', 'to-clipboard' ), value = self.clip ) ) 

102 if None is not self.count_tokens: 102 ↛ 103line 102 didn't jump to line 103 because the condition on line 102 was never true

103 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

104 address = ( 'create', 'count-tokens' ), 

105 value = self.count_tokens ) ) 

106 if None is not self.recurse: 

107 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

108 address = ( 'acquire-parts', 'recurse-directories' ), 

109 value = self.recurse ) ) 

110 if None is not self.strict: 110 ↛ 111line 110 didn't jump to line 111 because the condition on line 110 was never true

111 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

112 address = ( 'acquire-parts', 'fail-on-invalid' ), 

113 value = self.strict ) ) 

114 if None is not self.tokenizer: 114 ↛ 115line 114 didn't jump to line 115 because the condition on line 114 was never true

115 edits.append( __.SimpleDictionaryEdit( # pyright: ignore 

116 address = ( 'tokenizers', 'default' ), 

117 value = self.tokenizer ) ) 

118 return tuple( edits ) 

119 

120 

121async def _acquire_prompt( auxdata: __.Globals ) -> str: 

122 from .prompt import acquire_prompt 

123 return await acquire_prompt( auxdata ) 

124 

125 

126async def _copy_to_clipboard( mimeogram: str ) -> None: 

127 from pyperclip import copy 

128 copy( mimeogram ) 

129 _scribe.info( "Copied mimeogram to clipboard." ) 

130 

131 

132async def _edit_message( ) -> str: 

133 from .edit import edit_content 

134 return edit_content( ) 

135 

136 

137async def create( # pylint: disable=too-complex,too-many-locals 

138 auxdata: __.Globals, 

139 command: Command, *, 

140 editor: __.cabc.Callable[ 

141 [ ], __.cabc.Coroutine[ None, None, str ] ] = _edit_message, 

142 clipcopier: __.cabc.Callable[ 

143 [ str ], __.cabc.Coroutine[ None, None, None ] ] = _copy_to_clipboard, 

144 prompter: __.cabc.Callable[ 

145 [ __.Globals ], 

146 __.cabc.Coroutine[ None, None, str ] ] = _acquire_prompt, 

147) -> __.typx.Never: 

148 ''' Creates mimeogram. ''' 

149 from .acquirers import acquire 

150 from .formatters import format_mimeogram 

151 with __.report_exceptions( 

152 _scribe, "Could not acquire mimeogram parts." 

153 ): parts = await acquire( auxdata, command.sources ) 

154 if command.edit: 

155 with __.report_exceptions( 

156 _scribe, "Could not acquire user message." 

157 ): message = await editor( ) 

158 else: message = None 

159 mimeogram = format_mimeogram( parts, message = message ) 

160 # TODO? Pass prompt to 'format_mimeogram'. 

161 if command.prepend_prompt: 

162 prompt = await prompter( auxdata ) 

163 mimeogram = f"{prompt}\n\n{mimeogram}" 

164 options = auxdata.configuration.get( 'create', { } ) 

165 if options.get( 'count-tokens', False ): 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true

166 with __.report_exceptions( 

167 _scribe, "Could not count mimeogram tokens." 

168 ): 

169 tokenizer = await _tokenizer_from_command( auxdata, command ) 

170 tokens_count = await tokenizer.count( mimeogram ) 

171 _scribe.info( f"Total mimeogram size is {tokens_count} tokens." ) 

172 if options.get( 'to-clipboard', False ): 

173 with __.report_exceptions( 

174 _scribe, "Could not copy mimeogram to clipboard." 

175 ): await clipcopier( mimeogram ) 

176 else: print( mimeogram ) # TODO? Use output stream from configuration. 

177 raise SystemExit( 0 ) 

178 

179 

180async def _tokenizer_from_command( 

181 auxdata: __.Globals, command: Command 

182) -> _tokenizers.Tokenizer: 

183 options = auxdata.configuration.get( 'tokenizers', { } ) 

184 name = ( 

185 command.tokenizer.value if command.tokenizer 

186 else options.get( 'default', 'tiktoken' ) ) 

187 variant = command.tokenizer_variant 

188 args = dict( variant = variant ) if variant else { } 

189 return await _tokenizers.Tokenizers.produce( name, **args )