J'ai trouvé la réponse de Chris très utile, mais je voulais l'exécuter depuis le serveur SQL en utilisant T-SQL (et non CLR), j'ai donc converti son code en code T-SQL. J'ai donc converti son code en code T-SQL. Puis j'ai fait un pas de plus en enveloppant le tout dans une procédure stockée qui fait ce qui suit :
- utiliser l'insertion en masse pour importer initialement le fichier CSV
- nettoyer les lignes en utilisant le code de Chris
- retourner les résultats sous forme de tableau
Pour mes besoins, j'ai encore nettoyé les lignes en supprimant les guillemets autour des valeurs et en convertissant deux guillemets doubles en un guillemet double (je pense que c'est la bonne méthode).
CREATE PROCEDURE SSP_CSVToTable
-- Add the parameters for the stored procedure here
@InputFile nvarchar(4000)
, @FirstLine int
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
--convert the CSV file to a table
--clean up the lines so that commas are handles correctly
DECLARE @sql nvarchar(4000)
DECLARE @PH1 nvarchar(50)
DECLARE @LINECOUNT int -- This will also serve as a primary key
DECLARE @CURLINE int
DECLARE @Line nvarchar(4000)
DECLARE @starti int
DECLARE @endi int
DECLARE @FieldTerminatorFound bit
DECLARE @backChar nvarchar(4000)
DECLARE @quoteCount int
DECLARE @source nvarchar(4000)
DECLARE @COLCOUNT int
DECLARE @CURCOL int
DECLARE @ColVal nvarchar(4000)
-- new delimiter
SET @PH1 = '†'
-- create single column table to hold each line of file
CREATE TABLE [#CSVLine]([line] nvarchar(4000))
-- bulk insert into temp table
-- cannot use variable path with bulk insert
-- so we must run using dynamic sql
SET @Sql = 'BULK INSERT #CSVLine
FROM ''' + @InputFile + '''
WITH
(
FIRSTROW=' + CAST(@FirstLine as varchar) + ',
FIELDTERMINATOR = ''\n'',
ROWTERMINATOR = ''\n''
)'
-- run dynamic statement to populate temp table
EXEC(@sql)
-- get number of lines in table
SET @LINECOUNT = @@ROWCOUNT
-- add identity column to table so that we can loop through it
ALTER TABLE [#CSVLine] ADD [RowId] [int] IDENTITY(1,1) NOT NULL
IF @LINECOUNT > 0
BEGIN
-- cycle through each line, cleaning each line
SET @CURLINE = 1
WHILE @CURLINE <= @LINECOUNT
BEGIN
-- get current line
SELECT @line = line
FROM #CSVLine
WHERE [RowId] = @CURLINE
-- Replace commas with our custom-made delimiter
SET @Line = REPLACE(@Line, ',', @PH1)
-- Find a quoted part of the line, which could legitimately contain commas.
-- In that case we will need to identify the quoted section and swap commas back in for our custom placeholder.
SET @starti = CHARINDEX(@PH1 + '"' ,@Line, 0)
If CHARINDEX('"', @Line, 0) = 0 SET @starti = 0
-- loop through quoted fields
WHILE @starti > 0
BEGIN
SET @FieldTerminatorFound = 0
-- Find end quote token (originally a ",)
SET @endi = CHARINDEX('"' + @PH1, @Line, @starti) -- sLine.IndexOf("""" & PH1, starti)
IF @endi < 1
BEGIN
SET @FieldTerminatorFound = 1
If @endi < 1 SET @endi = LEN(@Line) - 1
END
WHILE @FieldTerminatorFound = 0
BEGIN
-- Find any more quotes that are part of that sequence, if any
SET @backChar = '"' -- thats one quote
SET @quoteCount = 0
WHILE @backChar = '"'
BEGIN
SET @quoteCount = @quoteCount + 1
SET @backChar = SUBSTRING(@Line, @endi-@quoteCount, 1) -- sLine.Chars(endi - quoteCount)
END
IF (@quoteCount % 2) = 1
BEGIN
-- odd number of quotes. real field terminator
SET @FieldTerminatorFound = 1
END
ELSE
BEGIN
-- keep looking
SET @endi = CHARINDEX('"' + @PH1, @Line, @endi + 1) -- sLine.IndexOf("""" & PH1, endi + 1)
END
END
-- Grab the quoted field from the line, now that we have the start and ending indices
SET @source = SUBSTRING(@Line, @starti + LEN(@PH1), @endi - @starti - LEN(@PH1) + 1)
-- sLine.Substring(starti + PH1.Length, endi - starti - PH1.Length + 1)
-- And swap the commas back in
SET @Line = REPLACE(@Line, @source, REPLACE(@source, @PH1, ','))
--sLine.Replace(source, source.Replace(PH1, ","))
-- Find the next quoted field
-- If endi >= line.Length - 1 Then endi = line.Length 'During the swap, the length of line shrinks so an endi value at the end of the line will fail
SET @starti = CHARINDEX(@PH1 + '"', @Line, @starti + LEN(@PH1))
--sLine.IndexOf(PH1 & """", starti + PH1.Length)
END
-- get table based on current line
IF OBJECT_ID('tempdb..#Line') IS NOT NULL
DROP TABLE #Line
-- converts a delimited list into a table
SELECT *
INTO #Line
FROM dbo.iter_charlist_to_table(@Line,@PH1)
-- get number of columns in line
SET @COLCOUNT = @@ROWCOUNT
-- dynamically create CSV temp table to hold CSV columns and lines
-- only need to create once
IF OBJECT_ID('tempdb..#CSV') IS NULL
BEGIN
-- create initial structure of CSV table
CREATE TABLE [#CSV]([Col1] nvarchar(100))
-- dynamically add a column for each column found in the first line
SET @CURCOL = 1
WHILE @CURCOL <= @COLCOUNT
BEGIN
-- first column already exists, don't need to add
IF @CURCOL > 1
BEGIN
-- add field
SET @sql = 'ALTER TABLE [#CSV] ADD [Col' + Cast(@CURCOL as varchar) + '] nvarchar(100)'
--print @sql
-- this adds the fields to the temp table
EXEC(@sql)
END
-- go to next column
SET @CURCOL = @CURCOL + 1
END
END
-- build dynamic sql to insert current line into CSV table
SET @sql = 'INSERT INTO [#CSV] VALUES('
-- loop through line table, dynamically adding each column value
SET @CURCOL = 1
WHILE @CURCOL <= @COLCOUNT
BEGIN
-- get current column
Select @ColVal = str
From #Line
Where listpos = @CURCOL
IF LEN(@ColVal) > 0
BEGIN
-- remove quotes from beginning if exist
IF LEFT(@ColVal,1) = '"'
SET @ColVal = RIGHT(@ColVal, LEN(@ColVal) - 1)
-- remove quotes from end if exist
IF RIGHT(@ColVal,1) = '"'
SET @ColVal = LEFT(@ColVal, LEN(@ColVal) - 1)
END
-- write column value
-- make value sql safe by replacing single quotes with two single quotes
-- also, replace two double quotes with a single double quote
SET @sql = @sql + '''' + REPLACE(REPLACE(@ColVal, '''',''''''), '""', '"') + ''''
-- add comma separater except for the last record
IF @CURCOL <> @COLCOUNT
SET @sql = @sql + ','
-- go to next column
SET @CURCOL = @CURCOL + 1
END
-- close sql statement
SET @sql = @sql + ')'
--print @sql
-- run sql to add line to table
EXEC(@sql)
-- move to next line
SET @CURLINE = @CURLINE + 1
END
END
-- return CSV table
SELECT * FROM [#CSV]
END
GO
La procédure stockée utilise cette fonction d'aide qui analyse une chaîne de caractères dans un tableau (merci Erland Sommarskog !):
CREATE FUNCTION [dbo].[iter_charlist_to_table]
(@list ntext,
@delimiter nchar(1) = N',')
RETURNS @tbl TABLE (listpos int IDENTITY(1, 1) NOT NULL,
str varchar(4000),
nstr nvarchar(2000)) AS
BEGIN
DECLARE @pos int,
@textpos int,
@chunklen smallint,
@tmpstr nvarchar(4000),
@leftover nvarchar(4000),
@tmpval nvarchar(4000)
SET @textpos = 1
SET @leftover = ''
WHILE @textpos <= datalength(@list) / 2
BEGIN
SET @chunklen = 4000 - datalength(@leftover) / 2
SET @tmpstr = @leftover + substring(@list, @textpos, @chunklen)
SET @textpos = @textpos + @chunklen
SET @pos = charindex(@delimiter, @tmpstr)
WHILE @pos > 0
BEGIN
SET @tmpval = ltrim(rtrim(left(@tmpstr, @pos - 1)))
INSERT @tbl (str, nstr) VALUES(@tmpval, @tmpval)
SET @tmpstr = substring(@tmpstr, @pos + 1, len(@tmpstr))
SET @pos = charindex(@delimiter, @tmpstr)
END
SET @leftover = @tmpstr
END
INSERT @tbl(str, nstr) VALUES (ltrim(rtrim(@leftover)), ltrim(rtrim(@leftover)))
RETURN
END
Voici comment je l'appelle en T-SQL. Dans ce cas, j'insère les résultats dans une table temporaire, donc je crée d'abord la table temporaire :
-- create temp table for file import
CREATE TABLE #temp
(
CustomerCode nvarchar(100) NULL,
Name nvarchar(100) NULL,
[Address] nvarchar(100) NULL,
City nvarchar(100) NULL,
[State] nvarchar(100) NULL,
Zip nvarchar(100) NULL,
OrderNumber nvarchar(100) NULL,
TimeWindow nvarchar(100) NULL,
OrderType nvarchar(100) NULL,
Duration nvarchar(100) NULL,
[Weight] nvarchar(100) NULL,
Volume nvarchar(100) NULL
)
-- convert the CSV file into a table
INSERT #temp
EXEC [dbo].[SSP_CSVToTable]
@InputFile = @FileLocation
,@FirstLine = @FirstImportRow
Je n'ai pas beaucoup testé les performances, mais cela fonctionne bien pour ce dont j'ai besoin - importer des fichiers CSV de moins de 1000 lignes. Cependant, il pourrait s'étouffer sur des fichiers très volumineux.
J'espère que quelqu'un d'autre y trouvera également son compte.
A la vôtre !