【发布时间】:2019-12-07 14:41:48
【问题描述】:
我有 azure 数据工厂管道,我需要通过它从 blob 存储容器中提取所有 CSV 文件并将这些文件存储到 azure 数据湖容器。在将这些文件存储到数据湖之前,我需要对该文件的数据进行一些数据操作。
现在我需要按顺序而不是并行执行此过程。所以我使用 ForEach Activity->Settings->Sequential。
但它不能按顺序工作,而是作为并行进程工作。
{
"name":"PN_obfuscate_and_move",
"properties":{
"description":"move PN blob csv to adlgen2(obfuscated)",
"activities":[
{
"name":"GetBlobFileName",
"type":"GetMetadata",
"dependsOn":[
],
"policy":{
"timeout":"7.00:00:00",
"retry":0,
"retryIntervalInSeconds":30,
"secureOutput":false,
"secureInput":false
},
"userProperties":[
],
"typeProperties":{
"dataset":{
"referenceName":"PN_Getblobfilename_Dataset",
"type":"DatasetReference"
},
"fieldList":[
"childItems"
],
"storeSettings":{
"type":"AzureBlobStorageReadSetting",
"recursive":true
},
"formatSettings":{
"type":"DelimitedTextReadSetting"
}
}
},
{
"name":"ForEachBlobFile",
"type":"ForEach",
"dependsOn":[
{
"activity":"GetBlobFileName",
"dependencyConditions":[
"Succeeded"
]
}
],
"userProperties":[
],
"typeProperties":{
"items":{
"value":"@activity('GetBlobFileName').output.childItems",
"type":"Expression"
},
"isSequential":true,
"activities":[
{
"name":"Blob_to_SQLServer",
"description":"Copy PN blob files to sql server table",
"type":"Copy",
"dependsOn":[
],
"policy":{
"timeout":"7.00:00:00",
"retry":0,
"retryIntervalInSeconds":30,
"secureOutput":false,
"secureInput":false
},
"userProperties":[
{
"name":"Source",
"value":"PNemailattachment//"
},
{
"name":"Destination",
"value":"[dbo].[PN]"
}
],
"typeProperties":{
"source":{
"type":"DelimitedTextSource",
"storeSettings":{
"type":"AzureBlobStorageReadSetting",
"recursive":false,
"wildcardFileName":"*.*",
"enablePartitionDiscovery":false
},
"formatSettings":{
"type":"DelimitedTextReadSetting"
}
},
"sink":{
"type":"AzureSqlSink"
},
"enableStaging":false
},
"inputs":[
{
"referenceName":"PNBlob",
"type":"DatasetReference"
}
],
"outputs":[
{
"referenceName":"PN_SQLServer",
"type":"DatasetReference"
}
]
},
{
"name":"Obfuscate_PN_SQLData",
"description":"mask specific columns",
"type":"SqlServerStoredProcedure",
"dependsOn":[
{
"activity":"Blob_to_SQLServer",
"dependencyConditions":[
"Succeeded"
]
}
],
"policy":{
"timeout":"7.00:00:00",
"retry":0,
"retryIntervalInSeconds":30,
"secureOutput":false,
"secureInput":false
},
"userProperties":[
],
"typeProperties":{
"storedProcedureName":"[dbo].[Obfuscate_PN_Data]"
},
"linkedServiceName":{
"referenceName":"PN_SQLServer",
"type":"LinkedServiceReference"
}
},
{
"name":"SQLServer_to_ADLSGen2",
"description":"move PN obfuscated data to azure data lake gen2",
"type":"Copy",
"dependsOn":[
{
"activity":"Obfuscate_PN_SQLData",
"dependencyConditions":[
"Succeeded"
]
}
],
"policy":{
"timeout":"7.00:00:00",
"retry":0,
"retryIntervalInSeconds":30,
"secureOutput":false,
"secureInput":false
},
"userProperties":[
],
"typeProperties":{
"source":{
"type":"AzureSqlSource"
},
"sink":{
"type":"DelimitedTextSink",
"storeSettings":{
"type":"AzureBlobFSWriteSetting"
},
"formatSettings":{
"type":"DelimitedTextWriteSetting",
"quoteAllText":true,
"fileExtension":".csv"
}
},
"enableStaging":false
},
"inputs":[
{
"referenceName":"PN_SQLServer",
"type":"DatasetReference"
}
],
"outputs":[
{
"referenceName":"PNADLSGen2",
"type":"DatasetReference"
}
]
},
{
"name":"Delete_PN_SQLData",
"description":"delete all data from table",
"type":"SqlServerStoredProcedure",
"dependsOn":[
{
"activity":"SQLServer_to_ADLSGen2",
"dependencyConditions":[
"Succeeded"
]
}
],
"policy":{
"timeout":"7.00:00:00",
"retry":0,
"retryIntervalInSeconds":30,
"secureOutput":false,
"secureInput":false
},
"userProperties":[
],
"typeProperties":{
"storedProcedureName":"[dbo].[Delete_PN_Data]"
},
"linkedServiceName":{
"referenceName":"PN_SQLServer",
"type":"LinkedServiceReference"
}
}
]
}
}
],
"folder":{
"name":"PN"
},
"annotations":[
]
},
"type":"Microsoft.DataFactory/factories/pipelines"
}
【问题讨论】:
-
你能解释一下复制活动中的源数据集吗?
-
1.“Blob_to_SQLServer”复制数据活动中的源是具有 csv 文件路径的 blobstorage 容器,其中 @item().name 作为文件名。 2.“SQLServer_to_ADLSGen2”复制数据活动中的来源是 SQL Server 中的特定表。
-
嗨 Manish,如果是的话,您的问题得到答案了吗,请分享,因为我也面临同样的问题。
-
是的,上面提到的相同过程有效,当时ADF发布的更新中存在一些错误,微软后来解决了这个问题。就是这样。
标签: foreach pipeline azure-data-factory sequential