A mash-up of the following links:
Blog post “Split file csv by size”
Stack Overflow Regex Remove-Comma-Between-Double-Quotes
param ($path, $size)
$src = $path
$SplitPath = $src.replace(".csv", "_clean_{0}.csv")
# Read in source file and grab header row.
$inData = New-Object -TypeName System.IO.StreamReader -ArgumentList $src
$header = $inData.ReadLine()
# Create initial output object
$outData = New-Object -TypeName System.Text.StringBuilder
[void]$outData.Append($header)
$i = 0
while( $line = $inData.ReadLine() ){
# If the object is longer than $size then output the content of the object and create a new one.
if( $outData.Length -gt $size ){
Write-Output "Splitting to filename " ($SplitPath -f $i)
$outData.ToString() | Out-File -FilePath ( $SplitPath -f $i ) -Encoding ascii
$outData = New-Object -TypeName System.Text.StringBuilder
[void]$outData.Append($header)
$i++
}
# Escape commas within quotes
$line = $line.Replace(',(?!(([^"]*"){2})*[^"]*$)','\,')
# Remove double-double quotes
$line = $line.Replace('""','')
Write-Verbose "$currentFile, $line"
[void]$outData.Append("`r`n$($line)")
}
# Write contents of final object
Write-Output "Splitting to filename " ($SplitPath -f $i)
$outData.ToString() | Out-File -FilePath ( $SplitPath -f $i ) -Encoding ascii
#Close StreamReader
$inData.Close()