Skip to content

Commit

Permalink
feat: Update IDC dataset with new views and v6 version (#266)
Browse files Browse the repository at this point in the history
* feat: New views for v1-v6

* feat: bootstrap idc_v6 dataset

* fix: Add back impersonating account

* fix: Regenerate DAG with v6 dataset

* fix: Trailing semi-colon and CURRENT_VERSION env var
  • Loading branch information
adlersantos committed Jan 13, 2022
1 parent 445577c commit 02cae2b
Show file tree
Hide file tree
Showing 35 changed files with 1,093 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.analysis_results_metadata`
select * from `PROJECT.idc_CURRENT_VERSION.analysis_results_metadata`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.auxiliary_metadata`
select * from `PROJECT.idc_CURRENT_VERSION.auxiliary_metadata`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.dicom_all`
select * from `PROJECT.idc_CURRENT_VERSION.dicom_all`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.dicom_metadata`
select * from `PROJECT.idc_CURRENT_VERSION.dicom_metadata`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.dicom_metadata_curated`
select * from `PROJECT.idc_CURRENT_VERSION.dicom_metadata_curated`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.measurement_groups`
select * from `PROJECT.idc_CURRENT_VERSION.measurement_groups`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.nlst_canc`
select * from `PROJECT.idc_CURRENT_VERSION.nlst_canc`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.nlst_ctab`
select * from `PROJECT.idc_CURRENT_VERSION.nlst_ctab`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.nlst_ctabc`
select * from `PROJECT.idc_CURRENT_VERSION.nlst_ctabc`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.nlst_prsn`
select * from `PROJECT.idc_CURRENT_VERSION.nlst_prsn`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.nlst_screen`
select * from `PROJECT.idc_CURRENT_VERSION.nlst_screen`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.original_collections_metadata`
select * from `PROJECT.idc_CURRENT_VERSION.original_collections_metadata`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.qualitative_measurements`
select * from `PROJECT.idc_CURRENT_VERSION.qualitative_measurements`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.quantitative_measurements`
select * from `PROJECT.idc_CURRENT_VERSION.quantitative_measurements`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.segmentations`
select * from `PROJECT.idc_CURRENT_VERSION.segmentations`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.tcga_biospecimen_rel9`
select * from `PROJECT.idc_CURRENT_VERSION.tcga_biospecimen_rel9`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.tcga_clinical_rel9`
select * from `PROJECT.idc_CURRENT_VERSION.tcga_clinical_rel9`
Original file line number Diff line number Diff line change
@@ -1 +1 @@
select * from `PROJECT.idc_v5.version_metadata`
select * from `PROJECT.idc_CURRENT_VERSION.version_metadata`
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
SELECT
pivot.PatientID,
pivot.BodyPartExamined,
pivot.SeriesInstanceUID,
pivot.SliceThickness,
pivot.SeriesNumber,
pivot.SeriesDescription,
pivot.StudyInstanceUID,
pivot.StudyDescription,
pivot.StudyDate,
pivot.SOPInstanceUID,
pivot.Modality,
pivot.SOPClassUID,
pivot.collection_id,
Internal_structure,
Sphericity,
Calcification,
Lobular_Pattern,
Spiculation,
Margin,
Texture,
Subtlety_score,
Malignancy,
SUVbw,
Volume,
Diameter,
Surface_area_of_mesh, Total_Lesion_Glycolysis,
Standardized_Added_Metabolic_Activity,
Percent_Within_First_Quarter_of_Intensity_Range,
Percent_Within_Third_Quarter_of_Intensity_Range,
Percent_Within_Fourth_Quarter_of_Intensity_Range,
Percent_Within_Second_Quarter_of_Intensity_Range,
Standardized_Added_Metabolic_Activity_Background,
Glycolysis_Within_First_Quarter_of_Intensity_Range,
Glycolysis_Within_Third_Quarter_of_Intensity_Range,
Glycolysis_Within_Fourth_Quarter_of_Intensity_Range,
Glycolysis_Within_Second_Quarter_of_Intensity_Range,
pivot.AnatomicRegionSequence,
SegmentedPropertyCategoryCodeSequence,
SegmentedPropertyTypeCodeSequence,
pivot.FrameOfReferenceUID,
SegmentNumber,
SegmentAlgorithmType,
pivot.crdc_study_uuid,
pivot.crdc_series_uuid,
pivot.crdc_instance_uuid,
Program,
pivot.tcia_tumorLocation,
pivot.source_DOI,
gcs_url,
pivot.tcia_species
FROM `PROJECT.DATASET.dicom_derived_all` pivot
JOIN `PROJECT.DATASET.dicom_all` dicom_all
ON pivot.SOPInstanceUID = dicom_all.SOPInstanceUID
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
SELECT
pivot.PatientID,
pivot.BodyPartExamined,
pivot.SeriesInstanceUID,
pivot.SliceThickness,
pivot.SeriesNumber,
pivot.SeriesDescription,
pivot.StudyInstanceUID,
pivot.StudyDescription,
pivot.StudyDate,
pivot.SOPInstanceUID,
pivot.Modality,
pivot.SOPClassUID,
pivot.collection_id,
Apparent_Diffusion_Coefficient,
Internal_structure,
Sphericity,
Calcification,
Lobular_Pattern,
Spiculation,
Margin,
Texture,
Subtlety_score,
Malignancy,
SUVbw,
Volume,
Diameter,
Surface_area_of_mesh,
Total_Lesion_Glycolysis,
Standardized_Added_Metabolic_Activity,
Percent_Within_First_Quarter_of_Intensity_Range,
Percent_Within_Third_Quarter_of_Intensity_Range,
Percent_Within_Fourth_Quarter_of_Intensity_Range,
Percent_Within_Second_Quarter_of_Intensity_Range,
Standardized_Added_Metabolic_Activity_Background,
Glycolysis_Within_First_Quarter_of_Intensity_Range,
Glycolysis_Within_Third_Quarter_of_Intensity_Range,
Glycolysis_Within_Fourth_Quarter_of_Intensity_Range,
Glycolysis_Within_Second_Quarter_of_Intensity_Range,
pivot.AnatomicRegionSequence,
SegmentedPropertyCategoryCodeSequence,
SegmentedPropertyTypeCodeSequence,
pivot.FrameOfReferenceUID,
SegmentNumber,
SegmentAlgorithmType,
pivot.crdc_study_uuid,
pivot.crdc_series_uuid,
pivot.crdc_instance_uuid,
Program,
pivot.tcia_tumorLocation,
pivot.source_DOI,
gcs_url,
AdditionalPatientHistory,
Allergies, ImageType,
LastMenstrualDate,
MedicalAlerts,
EthnicGroup,
Occupation,
PatientAge,
PatientComments,
PatientSize,
PatientWeight,
PregnancyStatus,
ReasonForStudy,
RequestedProcedureComments,
SmokingStatus,
pivot.tcia_species
FROM `PROJECT.DATASET.dicom_derived_all` pivot
JOIN `PROJECT.DATASET.dicom_all` dicom_all
ON pivot.SOPInstanceUID = dicom_all.SOPInstanceUID
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
SELECT
pivot.PatientID,
pivot.BodyPartExamined,
pivot.SeriesInstanceUID,
pivot.SliceThickness,
pivot.SeriesNumber,
pivot.SeriesDescription,
pivot.StudyInstanceUID,
pivot.StudyDescription,
pivot.StudyDate,
pivot.SOPInstanceUID,
pivot.Modality,
pivot.SOPClassUID,
pivot.collection_id,
Apparent_Diffusion_Coefficient,
Internal_structure,
Sphericity,
Calcification,
Lobular_Pattern,
Spiculation,
Margin,
Texture,
Subtlety_score,
Malignancy,
SUVbw,
Volume,
Diameter,
Surface_area_of_mesh,
Total_Lesion_Glycolysis,
Standardized_Added_Metabolic_Activity,
Percent_Within_First_Quarter_of_Intensity_Range,
Percent_Within_Third_Quarter_of_Intensity_Range,
Percent_Within_Fourth_Quarter_of_Intensity_Range,
Percent_Within_Second_Quarter_of_Intensity_Range,
Standardized_Added_Metabolic_Activity_Background,
Glycolysis_Within_First_Quarter_of_Intensity_Range,
Glycolysis_Within_Third_Quarter_of_Intensity_Range,
Glycolysis_Within_Fourth_Quarter_of_Intensity_Range,
Glycolysis_Within_Second_Quarter_of_Intensity_Range,
pivot.AnatomicRegionSequence,
SegmentedPropertyCategoryCodeSequence,
SegmentedPropertyTypeCodeSequence,
pivot.FrameOfReferenceUID,
SegmentNumber,
SegmentAlgorithmType,
pivot.crdc_study_uuid,
pivot.crdc_series_uuid,
pivot.crdc_instance_uuid,
Program,
pivot.tcia_tumorLocation,
pivot.source_DOI,
gcs_url,
AdditionalPatientHistory,
Allergies, ImageType,
LastMenstrualDate,
MedicalAlerts,
EthnicGroup,
Occupation,
PatientAge,
PatientComments,
PatientSize,
PatientWeight,
PregnancyStatus,
ReasonForStudy,
RequestedProcedureComments,
SmokingStatus,
pivot.tcia_species
FROM `PROJECT.DATASET.dicom_derived_all` pivot
JOIN `PROJECT.DATASET.dicom_all` dicom_all
ON pivot.SOPInstanceUID = dicom_all.SOPInstanceUID
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
SELECT
pivot.PatientID,
pivot.BodyPartExamined,
pivot.SeriesInstanceUID,
pivot.SliceThickness,
pivot.SeriesNumber,
pivot.SeriesDescription,
pivot.StudyInstanceUID,
pivot.StudyDescription,
pivot.StudyDate,
pivot.SOPInstanceUID,
pivot.Modality,
pivot.SOPClassUID,
pivot.collection_id,
pivot.AnatomicRegionSequence,
pivot.FrameOfReferenceUID,
pivot.crdc_study_uuid,
pivot.crdc_series_uuid,
pivot.crdc_instance_uuid,
pivot.program,
pivot.tcia_tumorLocation,
pivot.source_DOI,
pivot.tcia_species,
pivot.license_short_name,
pivot.gcs_url,
pivot.Manufacturer,
pivot.ManufacturerModelName,
Apparent_Diffusion_Coefficient,
Internal_structure,
Sphericity,
Calcification,
Lobular_Pattern,
Spiculation,
Margin,
Texture,
Subtlety_score,
Malignancy,
SUVbw,
Volume,
Diameter,
Surface_area_of_mesh,
Total_Lesion_Glycolysis,
Standardized_Added_Metabolic_Activity,
Percent_Within_First_Quarter_of_Intensity_Range,
Percent_Within_Third_Quarter_of_Intensity_Range,
Percent_Within_Fourth_Quarter_of_Intensity_Range,
Percent_Within_Second_Quarter_of_Intensity_Range,
Standardized_Added_Metabolic_Activity_Background,
Glycolysis_Within_First_Quarter_of_Intensity_Range,
Glycolysis_Within_Third_Quarter_of_Intensity_Range,
Glycolysis_Within_Fourth_Quarter_of_Intensity_Range,
Glycolysis_Within_Second_Quarter_of_Intensity_Range,
SegmentedPropertyCategoryCodeSequence,
SegmentedPropertyTypeCodeSequence,
SegmentNumber,
SegmentAlgorithmType,
AdditionalPatientHistory,
Allergies,
ImageType,
LastMenstrualDate,
MedicalAlerts,
EthnicGroup,
Occupation,
PatientAge,
PatientComments,
PatientSize,
PatientWeight,
PregnancyStatus,
ReasonForStudy,
RequestedProcedureComments,
SmokingStatus
FROM `PROJECT.DATASET.dicom_derived_all` pivot
JOIN `PROJECT.DATASET.dicom_all` dicom_all
ON pivot.SOPInstanceUID = dicom_all.SOPInstanceUID

0 comments on commit 02cae2b

Please sign in to comment.