Skip to content

Commit

Permalink
fix: AlphaFold dataset - add accession_ids.csv to the bucket (#451)
Browse files Browse the repository at this point in the history
  • Loading branch information
adlersantos committed Aug 25, 2022
1 parent 4333fca commit cacd9f1
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 9 deletions.
12 changes: 7 additions & 5 deletions datasets/deepmind/pipelines/alphafold/alphafold_dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,17 +35,19 @@
default_view="graph",
) as dag:

# Copy JSON metadata and FASTA to public bucket
copy_json_metadata_and_fasta_to_public_bucket = (
# Copy JSON metadata, accession IDs, and FASTA to public bucket
copy_json_metadata_accession_and_fasta_to_public_bucket = (
cloud_storage_transfer_service.CloudDataTransferServiceGCSToGCSOperator(
task_id="copy_json_metadata_and_fasta_to_public_bucket",
task_id="copy_json_metadata_accession_and_fasta_to_public_bucket",
timeout=43200,
retries=0,
wait=True,
project_id="bigquery-public-data",
source_bucket="{{ var.json.deepmind.alphafold.source_bucket }}",
destination_bucket="{{ var.json.deepmind.alphafold.destination_bucket }}",
object_conditions={"includePrefixes": ["metadata", "sequences.fasta"]},
object_conditions={
"includePrefixes": ["metadata", "accession_ids.csv", "sequences.fasta"]
},
transfer_options={
"overwriteWhen": "DIFFERENT",
"deleteObjectsUniqueInSink": True,
Expand Down Expand Up @@ -311,7 +313,7 @@
)

[
copy_json_metadata_and_fasta_to_public_bucket,
copy_json_metadata_accession_and_fasta_to_public_bucket,
copy_proteomes_to_public_bucket,
] >> load_json_metadata_to_bq
(
Expand Down
8 changes: 4 additions & 4 deletions datasets/deepmind/pipelines/alphafold/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,17 @@ dag:

tasks:
- operator: "CloudDataTransferServiceGCSToGCSOperator"
description: "Copy JSON metadata and FASTA to public bucket"
description: "Copy JSON metadata, accession IDs, and FASTA to public bucket"
args:
task_id: copy_json_metadata_and_fasta_to_public_bucket
task_id: copy_json_metadata_accession_and_fasta_to_public_bucket
timeout: 43200 # 12 hours
retries: 0
wait: True
project_id: bigquery-public-data
source_bucket: "{{ var.json.deepmind.alphafold.source_bucket }}"
destination_bucket: "{{ var.json.deepmind.alphafold.destination_bucket }}"
object_conditions:
includePrefixes: ["metadata", "sequences.fasta"]
includePrefixes: ["metadata", "accession_ids.csv", "sequences.fasta"]
transfer_options:
overwriteWhen: DIFFERENT
deleteObjectsUniqueInSink: True
Expand Down Expand Up @@ -279,7 +279,7 @@ dag:
type: STRING

graph_paths:
- "[copy_json_metadata_and_fasta_to_public_bucket, copy_proteomes_to_public_bucket] >> load_json_metadata_to_bq"
- "[copy_json_metadata_accession_and_fasta_to_public_bucket, copy_proteomes_to_public_bucket] >> load_json_metadata_to_bq"
- "download_accession_ids_to_composer_bucket >> generate_manifests >> [suffix_confidence_v3_json, suffix_model_v3_cif, suffix_predicted_aligned_error_v3_json]"
- "[suffix_confidence_v3_json, suffix_model_v3_cif, suffix_predicted_aligned_error_v3_json] >> create_and_run_sts_jobs_using_manifests"
- "create_and_run_sts_jobs_using_manifests >> [copy_manifests_to_public_bucket, load_json_metadata_to_bq]"

0 comments on commit cacd9f1

Please sign in to comment.