Content is user-generated and unverified.
"FileLineEndingDetector - Character-based implementation" Object subclass: #FileLineEndingDetector instanceVariableNames: '' classVariableNames: '' package: 'FileUtilities' "Class-side methods" FileLineEndingDetector class >> detectLineEndingIn: aFilename "Detect line ending convention using character-based approach" | fileReference contents | fileReference := aFilename asFileReference. fileReference exists ifFalse: [ ^ self error: 'File does not exist: ', aFilename ]. "Read as string but preserve original line endings" contents := fileReference binaryReadStream contents asString. ^ self detectLineEndingInString: contents FileLineEndingDetector class >> detectLineEndingInString: aString "Detect line ending convention in a string using characters" | crCount lfCount crlfCount index | aString isEmpty ifTrue: [ ^ #unknown ]. crCount := 0. lfCount := 0. crlfCount := 0. index := 1. [ index <= aString size ] whileTrue: [ | currentChar | currentChar := aString at: index. "Check for CR+LF sequence" (currentChar = Character cr and: [ index < aString size and: [ (aString at: index + 1) = Character lf ] ]) ifTrue: [ crlfCount := crlfCount + 1. index := index + 2 ] ifFalse: [ "Check for standalone CR or LF" currentChar = Character cr ifTrue: [ crCount := crCount + 1 ]. currentChar = Character lf ifTrue: [ lfCount := lfCount + 1 ]. index := index + 1 ] ]. ^ self determineConvention: crCount lf: lfCount crlf: crlfCount FileLineEndingDetector class >> detectLineEndingSimple: aFilename "Simpler detection using Pharo's string scanning capabilities" | fileReference contents hasCRLF hasCR hasLF | fileReference := aFilename asFileReference. fileReference exists ifFalse: [ ^ self error: 'File does not exist: ', aFilename ]. contents := fileReference binaryReadStream contents asString. contents isEmpty ifTrue: [ ^ #unknown ]. "Check for different line ending patterns" hasCRLF := contents includesSubstring: String crlf. hasCR := contents includes: Character cr. hasLF := contents includes: Character lf. "Determine convention based on presence" hasCRLF ifTrue: [ ^ #crlf ]. hasLF ifTrue: [ ^ #lf ]. hasCR ifTrue: [ ^ #cr ]. ^ #unknown FileLineEndingDetector class >> detectLineEndingPrecise: aFilename "Most precise detection using regular expressions" | fileReference contents crlfCount lfCount crCount | fileReference := aFilename asFileReference. fileReference exists ifFalse: [ ^ self error: 'File does not exist: ', aFilename ]. contents := fileReference binaryReadStream contents asString. contents isEmpty ifTrue: [ ^ #unknown ]. "Count different line ending types using pattern matching" crlfCount := (contents regex: String crlf matchesCollect: [ :each | each ]) size. "Remove CRLF before counting standalone CR and LF" | withoutCRLF | withoutCRLF := contents copyReplaceAll: String crlf with: ''. lfCount := (withoutCRLF occurrencesOf: Character lf). crCount := (withoutCRLF occurrencesOf: Character cr). ^ self determineConvention: crCount lf: lfCount crlf: crlfCount FileLineEndingDetector class >> determineConvention: crCount lf: lfCount crlf: crlfCount "Determine the predominant line ending convention" | total | total := crCount + lfCount + crlfCount. total = 0 ifTrue: [ ^ #unknown ]. "Return based on predominant type (>80% threshold)" (crlfCount > 0 and: [ crlfCount >= (total * 0.8) ]) ifTrue: [ ^ #crlf ]. (lfCount > 0 and: [ lfCount >= (total * 0.8) ]) ifTrue: [ ^ #lf ]. (crCount > 0 and: [ crCount >= (total * 0.8) ]) ifTrue: [ ^ #cr ]. "If no clear predominant type, determine by priority" crlfCount > 0 ifTrue: [ ^ #crlf ]. lfCount > 0 ifTrue: [ ^ #lf ]. crCount > 0 ifTrue: [ ^ #cr ]. ^ #mixed FileLineEndingDetector class >> analyzeLineEndings: aFilename "Analyze line endings and return detailed information" | fileReference contents info lines | fileReference := aFilename asFileReference. fileReference exists ifFalse: [ ^ Dictionary new at: #error put: 'File does not exist'; yourself ]. contents := fileReference binaryReadStream contents asString. info := Dictionary new. info at: #filename put: aFilename. info at: #size put: fileReference size. "Count occurrences of each type" info at: #crlf put: (self countOccurrences: String crlf in: contents). "For standalone counts, remove CRLF first" | withoutCRLF | withoutCRLF := contents copyReplaceAll: String crlf with: ''. info at: #lf put: (withoutCRLF occurrencesOf: Character lf). info at: #cr put: (withoutCRLF occurrencesOf: Character cr). "Determine convention" info at: #convention put: (self determineConvention: (info at: #cr) lf: (info at: #lf) crlf: (info at: #crlf)). "Count total lines (useful metric)" lines := contents lines. info at: #lineCount put: lines size. ^ info FileLineEndingDetector class >> countOccurrences: aSubstring in: aString "Count non-overlapping occurrences of substring in string" | count index | count := 0. index := 1. [ index <= aString size ] whileTrue: [ | foundIndex | foundIndex := aString findString: aSubstring startingAt: index. foundIndex > 0 ifTrue: [ count := count + 1. index := foundIndex + aSubstring size ] ifFalse: [ index := aString size + 1 ] ]. ^ count FileLineEndingDetector class >> convertFile: aFilename to: aConvention "Convert a file to use specific line endings" | fileReference contents lines newLineString result | fileReference := aFilename asFileReference. fileReference exists ifFalse: [ ^ self error: 'File does not exist: ', aFilename ]. "Read and split into logical lines" contents := fileReference contents. lines := contents lines. "Determine new line string" newLineString := self lineStringFor: aConvention. "Join lines with new line ending" result := lines joinUsing: newLineString. "Write back to file" fileReference writeStreamDo: [ :stream | stream nextPutAll: result ]. ^ true FileLineEndingDetector class >> lineStringFor: aConvention "Return the line ending string for a convention" aConvention = #lf ifTrue: [ ^ String lf ]. aConvention = #crlf ifTrue: [ ^ String crlf ]. aConvention = #cr ifTrue: [ ^ String cr ]. "Default to system line ending" ^ String new: 1 withAll: Character linefeed "Simplified single method version" FileLineEndingDetector class >> detectLineEndingQuick: aFilename "Quick detection method using character-based approach" | contents crlfPresent lfPresent crPresent | contents := aFilename asFileReference contents. contents isEmpty ifTrue: [ ^ #unknown ]. "Check what's present" crlfPresent := contents includesSubstring: (String with: Character cr with: Character lf). crlfPresent ifTrue: [ ^ #crlf ]. lfPresent := contents includes: Character lf. crPresent := contents includes: Character cr. lfPresent ifTrue: [ ^ #lf ]. crPresent ifTrue: [ ^ #cr ]. ^ #unknown "Extension methods for FileReference" FileReference >> detectLineEnding "Detect the line ending convention of this file" ^ FileLineEndingDetector detectLineEndingIn: self fullName FileReference >> lineEndingInfo "Get detailed line ending information" ^ FileLineEndingDetector analyzeLineEndings: self fullName FileReference >> hasUnixLineEndings "Check if file uses Unix line endings (LF)" ^ self detectLineEnding = #lf FileReference >> hasWindowsLineEndings "Check if file uses Windows line endings (CRLF)" ^ self detectLineEnding = #crlf FileReference >> hasMacClassicLineEndings "Check if file uses classic Mac line endings (CR)" ^ self detectLineEnding = #cr FileReference >> normalizeLineEndings "Convert to system default line endings" | systemConvention | systemConvention := Smalltalk os isWindows ifTrue: [ #crlf ] ifFalse: [ #lf ]. ^ FileLineEndingDetector convertFile: self fullName to: systemConvention "Usage examples:" " ""Quick detection"" FileLineEndingDetector detectLineEndingQuick: '/path/to/file.txt'. ""Detailed detection"" FileLineEndingDetector detectLineEndingIn: '/path/to/file.txt'. ""Simple check"" FileLineEndingDetector detectLineEndingSimple: '/path/to/file.txt'. ""Using FileReference extensions"" '/path/to/file.txt' asFileReference hasUnixLineEndings. '/path/to/file.txt' asFileReference hasWindowsLineEndings. ""Normalize to system convention"" '/path/to/file.txt' asFileReference normalizeLineEndings. ""Get detailed analysis"" info := FileLineEndingDetector analyzeLineEndings: '/path/to/file.txt'. info at: #lineCount. ""Number of lines"" info at: #convention. ""Detected convention"" "
Content is user-generated and unverified.
    Character-Based Line Ending Detection for Pharo | Claude