Skip to main content

流式读取Excel SDK

1. 配置

alt text

1.1 File URL

File URL 必须是Rest Flow上传的文件,通常情况下Rest Flow的Trigger节点需要如下配置 alt text

然后File URL的input框中,只需要填入$1.restTrigger.requestBody.file这个值即可。

1.2 Start row

Start row指的是Excel中值开始的行,通常情况下如果第一行是Header,那么Start Row就是2,Excel样式如下 alt text

1.3 Sheet Index(Starts with 1)

数据所在Sheet的第几页,一版情况下是第一页

1.4 Batch Size

每次读取多少行,对于大数据而言可以设置到5000行左右

1.5 Process Data Flow

流式读取数据的时候,每读取一批(Batch Size)数据,就会把数据交给Process Data Flow执行。

Process Data Flow是以子流程的方式运行的,所以Process Data Flow的第一个Trigger节点是父流程触发,该子流程接受4个固定参数

  • excelData - 每一批次传给Process Data Flow的数据
  • startRowNum - 这一批次数据在Excel中的开始行数
  • endRowNum - 这一批数据在Excel中的结束行数
  • uniqueId - 唯一ID,用于并发运行时断点续传

如果Running Concurrently设置为false,则只接受一个excelData也可。

1.6 Running Concurrently

是否并发运行Process Data Flow。

  • false:如果设置为false,程序可以保证读取的所有数据都在一个事务中运行,也就是所有数据可以一起回滚
  • true:如果设置为true,就是并发运行,程序只能报错一个Batch(即调用一次Process Data Flow)的数据在一个事务中。

1.7 Unique ID for Running Concurrently

如果Running Concurrently设置为true,我们提供一种机制断点续传续传

2. 案例分享

2.1 非并发导入大量数据

简单的导入大量数据,即Stream Read Excel SDK中,设置Running Concurrently为false

  • 主流程

alt text

{
"nodeList": [
{
"scheduleType": "ONE_TIME",
"triggerType": "REST",
"restTrigger": {
"url": "/flow/api/flow-rest/warehouse/create-single-thread",
"method": "POST",
"requestParams": [
{
"name": "uniqueId",
"type": "string",
"optional": false
}
],
"requestBody": [
{
"name": "file",
"label": "file",
"dataType": "string",
"optional": false
}
],
"eligibleForAgent": false
},
"startTime": 1742384792641,
"periodStart": "00:00:00",
"version": "2.0",
"periodEnd": "23:59:59",
"interval": 60,
"inMemory": true,
"inMemoryTransaction": true,
"id": 1,
"type": "TRIGGER",
"enabled": true,
"nodeName": "TRIGGER",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 33,
"uuid": "8aaaf737-5d25-4223-9029-5c0660c5ae23",
"name": "流式读取Excel数据",
"inputFields": [
{
"name": "fileUrl",
"label": "Rest Flow File Url",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "startRow",
"label": "Start Row",
"controlType": "input",
"optional": false
},
{
"name": "sheetIndex",
"label": "Sheet Index(Starts with 1)",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "batchSize",
"label": "Batch Size",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "flowToken",
"label": "Process Data Flow",
"dataType": "string",
"optional": true
},
{
"name": "concurrent",
"label": "Running Concurrently",
"dataType": "string",
"controlType": "select",
"pickList": [
"true",
"false"
],
"optional": true
},
{
"name": "uniqueId",
"label": "Unique ID for Running Concurrently",
"dataType": "string",
"controlType": "input",
"optional": true
}
],
"outputFields": [
{
"name": "results",
"label": "查询结果"
},
{
"name": "count",
"label": "结果数量",
"dataType": "string"
}
],
"sdkType": "STREAM_READ_EXCEL",
"input": {
"fileUrl": "$1.restTrigger.requestBody.file",
"sheetIndex": "1",
"batchSize": "4000",
"startRow": "2",
"concurrent": "false",
"flowToken": "batch-create-sub-flow-single-thread",
"flowPbcToken": "master-data-management",
"uniqueId": ""
}
},
"version": "1.0",
"id": 2,
"type": "APP",
"enabled": true,
"nodeName": "APP - Stream Read Excel",
"runAsCurrentUser": false
},
{
"result": "Successful",
"id": 3,
"type": "STOP_JOB",
"enabled": true,
"nodeName": "STOP_JOB",
"runAsCurrentUser": false
}
]
}
  • 子流程

alt text

{
"nodeList": [
{
"scheduleType": "ONE_TIME",
"triggerType": "PARENT",
"startTime": 1742384884888,
"periodStart": "00:00:00",
"version": "2.0",
"periodEnd": "23:59:59",
"interval": 60,
"inMemory": false,
"inMemoryTransaction": false,
"parentTrigger": {
"inputFields": [
{
"name": "excelData",
"type": "string",
"optional": false
}
]
},
"id": 1,
"type": "TRIGGER",
"enabled": true,
"nodeName": "TRIGGER",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 26,
"uuid": "8abc1654-b307-4fe9-b41a-72f9d7481e79",
"name": "批量新建表单数据",
"inputFields": [
{
"name": "batchInput",
"label": "batchInput",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "formEntityToken",
"label": "Form Entity",
"controlType": "entity",
"optional": false
},
{
"name": "needReturnValue",
"label": "Need Return Created Value?",
"dataType": "string",
"controlType": "select",
"pickList": [
"true",
"false"
],
"optional": true
},
{
"name": "batchSize",
"label": "Batch Size",
"dataType": "string",
"controlType": "input",
"optional": true
}
],
"outputFields": [
{
"name": "results",
"label": "返回结果"
},
{
"name": "count",
"label": "返回数量",
"dataType": "string"
}
],
"sdkType": "BATCH_CREATE_ENTITY",
"input": {
"needReturnValue": "false",
"formEntityToken": "warehouse-info-table-form",
"pbcToken": "master-data-management",
"batchInput": "$CURRENT_FLOW.excelData",
"batchSize": "500"
},
"formEntityId": ""
},
"version": "1.0",
"id": 3,
"type": "APP",
"enabled": true,
"nodeName": "Batch Create Warehouse information data",
"runAsCurrentUser": false
},
{
"result": "Successful",
"id": 4,
"type": "STOP_JOB",
"enabled": true,
"nodeName": "STOP_JOB",
"runAsCurrentUser": false
}
]
}

[!WARNING]
在Copy此段json时,需要重新选择主流程中的Process Data Flow

2.1 并发可断点续传导入大量数据

可断点续传的重要参数时uniqueId,如果需要两次导入同一个Excel,则两次传输的 uniqueId 必须相同

  • 主流程
{
"nodeList": [
{
"scheduleType": "ONE_TIME",
"triggerType": "REST",
"restTrigger": {
"url": "/flow/api/flow-rest/warehouse/create",
"method": "POST",
"requestParams": [
{
"name": "uniqueId",
"type": "string",
"optional": false
}
],
"requestBody": [
{
"name": "file",
"label": "file",
"dataType": "string",
"optional": false
}
],
"eligibleForAgent": false
},
"startTime": 1742384792641,
"periodStart": "00:00:00",
"version": "2.0",
"periodEnd": "23:59:59",
"interval": 60,
"inMemory": true,
"inMemoryTransaction": true,
"id": 1,
"type": "TRIGGER",
"enabled": true,
"nodeName": "TRIGGER",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 33,
"uuid": "8aaaf737-5d25-4223-9029-5c0660c5ae23",
"name": "流式读取Excel数据",
"inputFields": [
{
"name": "fileUrl",
"label": "Rest Flow File Url",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "startRow",
"label": "Start Row",
"controlType": "input",
"optional": false
},
{
"name": "sheetIndex",
"label": "Sheet Index(Starts with 1)",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "batchSize",
"label": "Batch Size",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "flowToken",
"label": "Process Data Flow",
"dataType": "string",
"optional": true
},
{
"name": "concurrent",
"label": "Running Concurrently",
"dataType": "string",
"controlType": "select",
"pickList": [
"true",
"false"
],
"optional": true
},
{
"name": "uniqueId",
"label": "Unique ID for Running Concurrently",
"dataType": "string",
"controlType": "input",
"optional": true
}
],
"outputFields": [
{
"name": "results",
"label": "查询结果"
},
{
"name": "count",
"label": "结果数量",
"dataType": "string"
}
],
"sdkType": "STREAM_READ_EXCEL",
"input": {
"fileUrl": "$1.restTrigger.requestBody.file",
"sheetIndex": "1",
"batchSize": "4000",
"startRow": "2",
"concurrent": "true",
"flowToken": "batch-create-sub-flow",
"flowPbcToken": "master-data-management",
"uniqueId": "$1.restTrigger.requestParams.uniqueId"
}
},
"version": "1.0",
"id": 2,
"type": "APP",
"enabled": true,
"nodeName": "APP - Stream Read Excel",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 720,
"uuid": "c64c0109-9870-461e-835f-1d4414b60a85",
"name": "Redis retrieves the all fields from the hash table",
"inputFields": [
{
"name": "key",
"label": "Hash table key",
"dataType": "string",
"controlType": "input",
"optional": false
}
],
"outputFields": [
{
"name": "status",
"label": "Execution result status",
"dataType": "string"
},
{
"name": "value",
"label": "results of execution",
"dataType": "string"
}
],
"sdkType": "REDIS_HGETALL",
"input": {
"key": "$1.restTrigger.requestParams.uniqueId"
}
},
"version": "1.0",
"id": 5,
"type": "APP",
"enabled": true,
"nodeName": "APP - Redis retrieves the all fields from the hash table",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 7,
"uuid": "492f8c00-14de-4837-a1f6-ed6ff0a2870e",
"name": "Groovy script",
"inputFields": [
{
"name": "hashTableValues",
"label": "hashTableValues",
"dataType": "string",
"optional": false
}
],
"outputFields": [
{
"name": "faliedRows",
"label": "faliedRows",
"dataType": "string"
}
],
"sdkType": "GENERAL_GROOVY",
"code": "\ndef keysNotSucceedList = hashTableValues.findAll { key, value -> value != \"succeed\" }.keySet().toList()\n\ndef faliedRows = keysNotSucceedList.collect { key ->\n key.split(\"_\").collect { it.toInteger() }\n}\n\nreturn ['faliedRows': String.valueOf(faliedRows)]\n",
"input": {
"hashTableValues": "$5.value"
}
},
"version": "1.0",
"id": 6,
"type": "APP",
"enabled": true,
"nodeName": "APP - Groovy script",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 11,
"uuid": "f63bae3b-4003-4b29-aedf-4626216fc12a",
"name": "Set workflow output value",
"inputFields": [
{
"name": "faliedRows",
"label": "Falied rows",
"dataType": "string",
"optional": false
}
],
"sdkType": "SET_VALUE_TO_INSTANCE"
},
"version": "2.0",
"input": {
"faliedRows": "$6.faliedRows"
},
"id": 4,
"type": "APP",
"enabled": true,
"nodeName": "APP - Set workflow output value",
"runAsCurrentUser": false
},
{
"result": "Successful",
"id": 3,
"type": "STOP_JOB",
"enabled": true,
"nodeName": "STOP_JOB",
"runAsCurrentUser": false
}
]
}
  • 子流程
{
"nodeList": [
{
"scheduleType": "ONE_TIME",
"triggerType": "PARENT",
"startTime": 1742384884888,
"periodStart": "00:00:00",
"version": "2.0",
"periodEnd": "23:59:59",
"interval": 60,
"inMemory": false,
"inMemoryTransaction": false,
"parentTrigger": {
"inputFields": [
{
"name": "excelData",
"type": "string",
"optional": false
},
{
"name": "startRowNum",
"type": "string",
"optional": false
},
{
"name": "endRowNum",
"type": "string",
"optional": false
},
{
"name": "uniqueId",
"type": "string",
"optional": false
}
]
},
"id": 1,
"type": "TRIGGER",
"enabled": true,
"nodeName": "TRIGGER",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 7,
"uuid": "492f8c00-14de-4837-a1f6-ed6ff0a2870e",
"name": "Groovy script",
"inputFields": [
{
"name": "startRowNum",
"label": "startRowNum",
"dataType": "string",
"optional": false
},
{
"name": "endRowNum",
"label": "endRowNum",
"dataType": "string",
"optional": false
}
],
"outputFields": [
{
"name": "key",
"label": "key",
"dataType": "string"
}
],
"sdkType": "GENERAL_GROOVY",
"code": "def key = startRowNum + '_' + endRowNum\nreturn ['key': key]",
"input": {
"startRowNum": "$CURRENT_FLOW.startRowNum",
"endRowNum": "$CURRENT_FLOW.endRowNum"
}
},
"version": "1.0",
"id": 7,
"type": "APP",
"enabled": true,
"nodeName": "Get redis hash table key",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 732,
"uuid": "c64c0109-9870-461e-835f-1d4414b60a85",
"name": "Redis retrieves the specified fields from the hash table",
"inputFields": [
{
"name": "key",
"label": "Redis操作的哈希表",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "field",
"label": "Redis操作的键",
"dataType": "string",
"controlType": "input",
"optional": false
}
],
"outputFields": [
{
"name": "status",
"label": "Execution result status",
"dataType": "number"
},
{
"name": "value",
"label": "results of execution",
"dataType": "string"
}
],
"sdkType": "REDIS_HGET",
"code": "",
"input": {
"key": "$CURRENT_FLOW.uniqueId",
"field": "$7.key"
}
},
"version": "1.0",
"id": 14,
"type": "APP",
"enabled": true,
"nodeName": "APP - Redis retrieves the specified fields from the hash table",
"runAsCurrentUser": false
},
{
"conditions": [
{
"id": 1277,
"dataField": "$14.value",
"condition": "does_not_equal",
"value": "succeed"
}
],
"subFlow": [
{
"subFlow": [
{
"sdk": {
"id": 26,
"uuid": "8abc1654-b307-4fe9-b41a-72f9d7481e79",
"name": "批量新建表单数据",
"inputFields": [
{
"name": "batchInput",
"label": "batchInput",
"dataType": "string",
"controlType": "input",
"optional": true
},
{
"name": "formEntityToken",
"label": "Form Entity",
"controlType": "entity",
"optional": false
},
{
"name": "needReturnValue",
"label": "是否需要返回创建的值",
"dataType": "string",
"controlType": "select",
"pickList": [
"true",
"false"
],
"optional": true
},
{
"name": "batchSize",
"label": "Batch Size",
"dataType": "string",
"controlType": "input",
"optional": true
}
],
"outputFields": [
{
"name": "results",
"label": "返回结果"
},
{
"name": "count",
"label": "返回数量",
"dataType": "string"
}
],
"sdkType": "BATCH_CREATE_ENTITY",
"input": {
"needReturnValue": "false",
"formEntityToken": "warehouse-info-table-form",
"pbcToken": "master-data-management",
"batchInput": "$CURRENT_FLOW.excelData",
"batchSize": "50"
},
"formEntityId": ""
},
"version": "1.0",
"id": 3,
"type": "APP",
"enabled": true,
"nodeName": "Bulk insert data into Warehouse Information Table",
"runAsCurrentUser": false
},
{
"sdk": {
"id": 729,
"uuid": "ad1bac32-fe45-4ca8-894e-0d559e760520",
"name": "Redis sets the specified fields in the hash table",
"inputFields": [
{
"name": "key",
"label": "Redis操作的哈希表",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "field",
"label": "Redis操作的键",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "value",
"label": "Redis操作的键值",
"dataType": "string",
"controlType": "input",
"optional": false
}
],
"outputFields": [
{
"name": "status",
"label": "Execution result status",
"dataType": "number"
}
],
"sdkType": "REDIS_HSET",
"code": "",
"input": {
"key": "$CURRENT_FLOW.uniqueId",
"field": "$7.key",
"value": "succeed"
}
},
"version": "1.0",
"id": 15,
"type": "APP",
"enabled": true,
"nodeName": "APP - Redis sets the specified fields in the hash table",
"runAsCurrentUser": false
}
],
"retryTimesInMonitorBlock": 0,
"retryInterval": 0,
"isRetryIf": false,
"onErrorSubFlow": [
{
"sdk": {
"id": 729,
"uuid": "ad1bac32-fe45-4ca8-894e-0d559e760520",
"name": "Redis sets the specified fields in the hash table",
"inputFields": [
{
"name": "key",
"label": "Redis操作的哈希表",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "field",
"label": "Redis操作的键",
"dataType": "string",
"controlType": "input",
"optional": false
},
{
"name": "value",
"label": "Redis操作的键值",
"dataType": "string",
"controlType": "input",
"optional": false
}
],
"outputFields": [
{
"name": "status",
"label": "Execution result status",
"dataType": "number"
}
],
"sdkType": "REDIS_HSET",
"code": "",
"input": {
"key": "$CURRENT_FLOW.uniqueId",
"field": "$7.key",
"value": "falied"
}
},
"version": "1.0",
"id": 13,
"type": "APP",
"enabled": true,
"nodeName": "APP - Redis sets the specified fields in the hash table",
"runAsCurrentUser": false
}
],
"id": 10,
"type": "HANDLE_ERRORS",
"enabled": true,
"nodeName": "HANDLE_ERRORS",
"runAsCurrentUser": false
}
],
"id": 9,
"type": "IF",
"enabled": true,
"nodeName": "IF",
"runAsCurrentUser": false
},
{
"result": "Successful",
"id": 4,
"type": "STOP_JOB",
"enabled": true,
"nodeName": "STOP_JOB",
"runAsCurrentUser": false
}
]
}

[!WARNING]
在Copy此段json时,需要重新选择主流程中的Process Data Flow