Skip to content

Commit

Permalink
Datafusion assets (#21518)
Browse files Browse the repository at this point in the history
  • Loading branch information
wojsamjan committed Feb 15, 2022
1 parent 56365b1 commit dc03000
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 56 deletions.
64 changes: 30 additions & 34 deletions airflow/providers/google/cloud/example_dags/example_datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,30 +55,31 @@

PIPELINE_NAME = os.environ.get("GCP_DATAFUSION_PIPELINE_NAME", "airflow_test")
PIPELINE = {
"name": "test-pipe",
"artifact": {
"name": "cdap-data-pipeline",
"version": "6.5.1",
"scope": "SYSTEM",
"label": "Data Pipeline - System Test",
},
"description": "Data Pipeline Application",
"artifact": {"name": "cdap-data-pipeline", "version": "6.4.1", "scope": "SYSTEM"},
"name": "test-pipe",
"config": {
"resources": {"memoryMB": 2048, "virtualCores": 1},
"driverResources": {"memoryMB": 2048, "virtualCores": 1},
"connections": [{"from": "GCS", "to": "GCS2"}],
"comments": [],
"postActions": [],
"properties": {},
"processTimingEnabled": True,
"stageLoggingEnabled": False,
"processTimingEnabled": "true",
"stageLoggingEnabled": "false",
"stages": [
{
"name": "GCS",
"plugin": {
"name": "GCSFile",
"type": "batchsource",
"label": "GCS",
"artifact": {
"name": "google-cloud",
"version": "0.17.3",
"scope": "SYSTEM",
},
"artifact": {"name": "google-cloud", "version": "0.18.1", "scope": "SYSTEM"},
"properties": {
"project": "auto-detect",
"format": "text",
Expand All @@ -87,62 +88,57 @@
"filenameOnly": "false",
"recursive": "false",
"encrypted": "false",
"schema": '{"type":"record","name":"etlSchemaBody","fields":'
'[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}',
"schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\
:\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"path": BUCKET_1_URI,
"referenceName": "foo_bucket",
"useConnection": "false",
"serviceAccountType": "filePath",
"sampleSize": "1000",
"fileEncoding": "UTF-8",
},
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": '{"type":"record","name":"etlSchemaBody","fields":'
'[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}',
}
],
"outputSchema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\"\
:[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"id": "GCS",
},
{
"name": "GCS2",
"plugin": {
"name": "GCS",
"type": "batchsink",
"label": "GCS2",
"artifact": {
"name": "google-cloud",
"version": "0.17.3",
"scope": "SYSTEM",
},
"artifact": {"name": "google-cloud", "version": "0.18.1", "scope": "SYSTEM"},
"properties": {
"project": "auto-detect",
"suffix": "yyyy-MM-dd-HH-mm",
"format": "json",
"serviceFilePath": "auto-detect",
"location": "us",
"schema": '{"type":"record","name":"etlSchemaBody","fields":'
'[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}',
"schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\
:\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"referenceName": "bar",
"path": BUCKET_2_URI,
"serviceAccountType": "filePath",
"contentType": "application/octet-stream",
},
},
"outputSchema": [
{
"name": "etlSchemaBody",
"schema": '{"type":"record","name":"etlSchemaBody","fields":'
'[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}',
}
],
"outputSchema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\"\
:[{\"name\":\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}",
"inputSchema": [
{
"name": "GCS",
"schema": '{"type":"record","name":"etlSchemaBody","fields":'
'[{"name":"offset","type":"long"},{"name":"body","type":"string"}]}',
"schema": "{\"type\":\"record\",\"name\":\"textfile\",\"fields\":[{\"name\"\
:\"offset\",\"type\":\"long\"},{\"name\":\"body\",\"type\":\"string\"}]}",
}
],
"id": "GCS2",
},
],
"schedule": "0 * * * *",
"engine": "spark",
"numOfRecordsPreview": 100,
"description": "Data Pipeline Application",
"maxConcurrentRuns": 1,
},
}
Expand Down

0 comments on commit dc03000

Please sign in to comment.