如果您只是尝试再次导入该文件,但导入一个包含更多奇怪列的正确表,这可能是最好的。
但要回答这个问题。
这里有一些适用于 Sql Server 2017 的测试代码
设置测试数据
create table bad_import (
id int identity(1,1) primary key,
ID_Year_Birth_Education_Marital_Status_Income_Kidhome_Teenhome_D varchar(300) not null
);
insert into bad_import (ID_Year_Birth_Education_Marital_Status_Income_Kidhome_Teenhome_D)
values
('100 1969 Basic Married 99999 1 1 25-12-2020 1 2 3 4 5 6 7 8 9 10 11')
, ('200 1972 PhD Single 12345 0 0 12-06-2021 1 2 3 4 5 6 7 8 9 10 11')
;
select *
from bad_import
编号 | ID_Year_Birth_Education_Marital_Status_Income_Kidhome_Teenhome_D
-: | :------------------------------------------------ -----------------
1 | 100 1969 基本 已婚 99999 1 1 25-12-2020 1 2 3 4 5 6 7 8 9 10 11
2 | 200 1972 博士 单身 12345 0 0 12-06-2021 1 2 3 4 5 6 7 8 9 10 11
创建临时表
create table #tmp_targets (
ID int primary key,
Year_Birth decimal(4,0),
Education varchar(30),
Marital_Status varchar(30),
Income int,
Kidhome int,
Teenhome int,
Dt date
)
转换数据并将字段插入临时表
with DATA (txt) as (
select ID_Year_Birth_Education_Marital_Status_Income_Kidhome_Teenhome_D txt
from bad_import
)
, UNFOLDED as (
select
cast(left(txt, charindex(' ', txt)) as int) AS ID
, ca.*
from DATA d
cross apply (
select value
, row_number() over (order by (select null)) as rn
from string_split(txt, ' ')
) ca
)
insert into #tmp_targets
(ID, Year_Birth, Education, Marital_Status,
Income, Kidhome, Teenhome, Dt)
select ID
, cast([2] as decimal(4,0)) as Year_Birth
, [3] as Education
, [4] as Marital_Status
, cast([5] as int) as Income
, cast([6] as int) as Kidhome
, cast([7] as int) as Teenhome
, convert(date, [8], 105) as Dt
from UNFOLDED
pivot (
max(value)
for rn in ([2],[3],[4],[5],[6],[7],[8])
) pvt;
查询临时表:
select *
from #tmp_targets
order by ID
身份证 |年_出生 |教育 |婚姻状况 |收入 |儿童之家 |青少年之家 | DT
--: | ---------: | :-------- | :------------- | -----: | ------: | --------: | :---------
100 | 1969 |基本 |已婚 | 99999 | 1 | 1 | 2020-12-25
200 | 1972 |博士 |单身 | 12345 | 0 | 0 | 2021-06-12
db小提琴here