Skip to content

Commit

Permalink
Fix: Change cms_medicare tables with column provider_zipcode from…
Browse files Browse the repository at this point in the history
… integer to string type (#417)
  • Loading branch information
nlarge-google committed Jul 18, 2022
1 parent 2785234 commit 27b0a9b
Show file tree
Hide file tree
Showing 9 changed files with 53 additions and 110 deletions.
11 changes: 3 additions & 8 deletions datasets/cms_medicare/infra/hospital_general_info_pipeline.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@


resource "google_bigquery_table" "cms_medicare_hospital_general_info" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "hospital_general_info"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "hospital_general_info"
description = "CMS Medicare Hospital General Info"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand Down
55 changes: 15 additions & 40 deletions datasets/cms_medicare/infra/inpatient_charges_pipeline.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@


resource "google_bigquery_table" "cms_medicare_inpatient_charges_2011" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2011"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2011"
description = "CMS Medicare Inpatient Charges 2011"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -39,15 +34,10 @@ output "bigquery_table-cms_medicare_inpatient_charges_2011-id" {
}

resource "google_bigquery_table" "cms_medicare_inpatient_charges_2012" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2012"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2012"
description = "CMS Medicare Inpatient Charges 2012"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -62,15 +52,10 @@ output "bigquery_table-cms_medicare_inpatient_charges_2012-id" {
}

resource "google_bigquery_table" "cms_medicare_inpatient_charges_2013" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2013"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2013"
description = "CMS Medicare Inpatient Charges 2013"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -85,15 +70,10 @@ output "bigquery_table-cms_medicare_inpatient_charges_2013-id" {
}

resource "google_bigquery_table" "cms_medicare_inpatient_charges_2014" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2014"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2014"
description = "CMS Medicare Inpatient Charges 2014"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -108,15 +88,10 @@ output "bigquery_table-cms_medicare_inpatient_charges_2014-id" {
}

resource "google_bigquery_table" "cms_medicare_inpatient_charges_2015" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2015"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "inpatient_charges_2015"
description = "CMS Medicare Inpatient Charges 2015"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand Down
44 changes: 12 additions & 32 deletions datasets/cms_medicare/infra/outpatient_charges_pipeline.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,10 @@


resource "google_bigquery_table" "cms_medicare_outpatient_charges_2011" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2011"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2011"
description = "CMS Medicare Outpatient Charges 2011"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -39,15 +34,10 @@ output "bigquery_table-cms_medicare_outpatient_charges_2011-id" {
}

resource "google_bigquery_table" "cms_medicare_outpatient_charges_2012" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2012"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2012"
description = "CMS Medicare Outpatient Charges 2012"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -62,15 +52,10 @@ output "bigquery_table-cms_medicare_outpatient_charges_2012-id" {
}

resource "google_bigquery_table" "cms_medicare_outpatient_charges_2013" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2013"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2013"
description = "CMS Medicare Outpatient Charges 2013"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand All @@ -85,15 +70,10 @@ output "bigquery_table-cms_medicare_outpatient_charges_2013-id" {
}

resource "google_bigquery_table" "cms_medicare_outpatient_charges_2014" {
project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2014"

project = var.project_id
dataset_id = "cms_medicare"
table_id = "outpatient_charges_2014"
description = "CMS Medicare Outpatient Charges 2014"




depends_on = [
google_bigquery_dataset.cms_medicare
]
Expand Down
3 changes: 3 additions & 0 deletions datasets/cms_medicare/infra/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ variable "bucket_name_prefix" {}
variable "impersonating_acct" {}
variable "region" {}
variable "env" {}
variable "iam_policies" {
default = {}
}

Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,9 @@ def main(

logging.info("Creating 'files' folder")
pathlib.Path("./files").mkdir(parents=True, exist_ok=True)

logging.info(f"Downloading file {source_url}")
download_file(source_url, source_file)

logging.info(f"Opening file {source_file}")

if pipeline_name in (PIPELINES_NAME_INPATIENT + PIPELINES_NAME_OUTPATIENT):
with ZipFile(source_file) as zipped_files:
file_list = zipped_files.namelist()
Expand All @@ -67,31 +64,24 @@ def main(
df = pd.read_csv(data)
else:
df = pd.read_csv(str(source_file))

logging.info(f"Transformation Process Starting.. {source_file}")

rename_headers(df, rename_mappings)

filter_null_rows(
df, PIPELINES_NAME_INPATIENT, PIPELINES_NAME_OUTPATIENT, pipeline_name
)

if pipeline_name in (PIPELINES_NAME_INPATIENT + PIPELINES_NAME_OUTPATIENT):
df["provider_zipcode"] = df["provider_zipcode"].apply(lambda x: str(x).zfill(5))
df = df[headers]

logging.info(f"Transformation Process complete .. {source_file}")

logging.info(f"Saving to output file.. {target_file}")

try:
save_to_new_file(df, file_path=str(target_file))
except Exception as e:
logging.error(f"Error saving output file: {e}.")

logging.info(
f"Uploading output file to.. gs://{target_gcs_bucket}/{target_gcs_path}"
)
upload_file_to_gcs(target_file, target_gcs_bucket, target_gcs_path)

logging.info(
"CMS Medicare process completed at "
+ str(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -271,7 +271,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -356,7 +356,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -441,7 +441,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -526,7 +526,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down
10 changes: 5 additions & 5 deletions datasets/cms_medicare/pipelines/inpatient_charges/pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ dag:
mode: "NULLABLE"
- description: "The zip code in which the provider is physically located"
name: "provider_zipcode"
type: "INTEGER"
type: "STRING"
mode: "NULLABLE"
- description: "The code and description identifying the MS-DRG. MS-DRGs are a classification system that groups similar clinical conditions (diagnoses) and the procedures furnished by the hospital during the stay"
name: "drg_definition"
Expand Down Expand Up @@ -267,7 +267,7 @@ dag:
mode: "NULLABLE"
- description: "The zip code in which the provider is physically located"
name: "provider_zipcode"
type: "INTEGER"
type: "STRING"
mode: "NULLABLE"
- description: "The code and description identifying the MS-DRG. MS-DRGs are a classification system that groups similar clinical conditions (diagnoses) and the procedures furnished by the hospital during the stay"
name: "drg_definition"
Expand Down Expand Up @@ -327,7 +327,7 @@ dag:
mode: "NULLABLE"
- description: "The zip code in which the provider is physically located"
name: "provider_zipcode"
type: "INTEGER"
type: "STRING"
mode: "NULLABLE"
- description: "The code and description identifying the MS-DRG. MS-DRGs are a classification system that groups similar clinical conditions (diagnoses) and the procedures furnished by the hospital during the stay"
name: "drg_definition"
Expand Down Expand Up @@ -387,7 +387,7 @@ dag:
mode: "NULLABLE"
- description: "The zip code in which the provider is physically located"
name: "provider_zipcode"
type: "INTEGER"
type: "STRING"
mode: "NULLABLE"
- description: "The code and description identifying the MS-DRG. MS-DRGs are a classification system that groups similar clinical conditions (diagnoses) and the procedures furnished by the hospital during the stay"
name: "drg_definition"
Expand Down Expand Up @@ -447,7 +447,7 @@ dag:
mode: "NULLABLE"
- description: "The zip code in which the provider is physically located"
name: "provider_zipcode"
type: "INTEGER"
type: "STRING"
mode: "NULLABLE"
- description: "The code and description identifying the MS-DRG. MS-DRGs are a classification system that groups similar clinical conditions (diagnoses) and the procedures furnished by the hospital during the stay"
name: "drg_definition"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -240,7 +240,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -319,7 +319,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down Expand Up @@ -398,7 +398,7 @@
{
"description": "The zip code in which the provider is physically located",
"name": "provider_zipcode",
"type": "INTEGER",
"type": "STRING",
"mode": "NULLABLE",
},
{
Expand Down

0 comments on commit 27b0a9b

Please sign in to comment.