Work

PowerShell Size-Based Log Splitter

A mash-up of the following links:

Blog post “Split file csv by size”

Replace Text in a String

Stack Overflow Regex Remove-Comma-Between-Double-Quotes

param ($path, $size)

$src = $path
$SplitPath = $src.replace(".csv", "_clean_{0}.csv") 


# Read in source file and grab header row.
$inData = New-Object -TypeName System.IO.StreamReader -ArgumentList $src
$header = $inData.ReadLine()

# Create initial output object
$outData = New-Object -TypeName System.Text.StringBuilder
[void]$outData.Append($header)

$i = 0

while( $line = $inData.ReadLine() ){
    # If the object is longer than $size then output the content of the object and create a new one.
    if( $outData.Length -gt $size ){
        Write-Output "Splitting to filename " ($SplitPath -f $i)

        $outData.ToString() | Out-File -FilePath ( $SplitPath -f $i ) -Encoding ascii
        
        $outData = New-Object -TypeName System.Text.StringBuilder
        [void]$outData.Append($header)

        $i++
        }

    # Escape commas within quotes
    $line = $line.Replace(',(?!(([^"]*"){2})*[^"]*$)','\,')

    # Remove double-double quotes
    $line = $line.Replace('""','')

    Write-Verbose "$currentFile, $line"
    
    [void]$outData.Append("`r`n$($line)")
    }

# Write contents of final object 
Write-Output "Splitting to filename " ($SplitPath -f $i)

$outData.ToString() | Out-File -FilePath ( $SplitPath -f $i ) -Encoding ascii

#Close StreamReader
$inData.Close()