Coverage for sources / detextive / lineseparators.py: 100%
44 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 04:38 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-14 04:38 +0000
1# vim: set filetype=python fileencoding=utf-8:
2# -*- coding: utf-8 -*-
4#============================================================================#
5# #
6# Licensed under the Apache License, Version 2.0 (the "License"); #
7# you may not use this file except in compliance with the License. #
8# You may obtain a copy of the License at #
9# #
10# http://www.apache.org/licenses/LICENSE-2.0 #
11# #
12# Unless required by applicable law or agreed to in writing, software #
13# distributed under the License is distributed on an "AS IS" BASIS, #
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #
15# See the License for the specific language governing permissions and #
16# limitations under the License. #
17# #
18#============================================================================#
21''' Line separator enumeration and utilities. '''
24from . import __
27class LineSeparators( __.enum.Enum ):
28 ''' Line separators for cross-platform text processing. '''
30 CR = '\r' # Classic MacOS (0xD)
31 CRLF = '\r\n' # DOS/Windows (0xD 0xA)
32 LF = '\n' # Unix/Linux (0xA)
34 @classmethod
35 def detect_bytes(
36 selfclass,
37 content: __.cabc.Sequence[ int ] | bytes,
38 limit: int = 1024,
39 ) -> __.typx.Optional[ 'LineSeparators' ]:
40 ''' Detects line separator from byte content sample.
42 Returns detected LineSeparators enum member or None.
43 '''
44 sample = content[ : limit ]
45 found_cr = False
46 for byte in sample:
47 match byte:
48 case 0xd: # carriage return
49 if found_cr: return selfclass.CR
50 found_cr = True
51 case 0xa: # linefeed
52 if found_cr: return selfclass.CRLF
53 return selfclass.LF
54 case _:
55 if found_cr: return selfclass.CR
56 return None
58 @classmethod
59 def detect_text(
60 selfclass, text: str, limit: int = 1024
61 ) -> __.typx.Optional[ 'LineSeparators' ]:
62 ''' Detects line separator from text (Unicode string).
64 Returns detected LineSeparators enum member or None.
65 '''
66 sample = text[ : limit ]
67 found_cr = False
68 for c in sample:
69 match c:
70 case '\r': # carriage return
71 if found_cr: return selfclass.CR
72 found_cr = True
73 case '\n': # linefeed
74 if found_cr: return selfclass.CRLF
75 return selfclass.LF
76 case _:
77 if found_cr: return selfclass.CR
78 return None
80 @classmethod
81 def normalize_universal( selfclass, content: str ) -> str:
82 ''' Normalizes all line separators to Unix LF format. '''
83 return content.replace( '\r\n', '\r' ).replace( '\r', '\n' )
85 def normalize( self, content: str ) -> str:
86 ''' Normalizes specific line separator to Unix LF format. '''
87 if LineSeparators.LF is self: return content
88 return content.replace( self.value, '\n' )
90 def nativize( self, content: str ) -> str:
91 ''' Converts Unix LF to this platform's line separator. '''
92 if LineSeparators.LF is self: return content
93 return content.replace( '\n', self.value )