Hi everyone! I have created my tag template in Dataplex and implemented it in several tables. I need to remove the whole tag from some tables. How can I do it? I know that this exists:
from google.cloud import datacatalog_v1
def sample_delete_tag():
# Create a client
client = datacatalog_v1.DataCatalogClient()
# Initialize request argument(s)
request = datacatalog_v1.DeleteTagRequest(
name="name_value",
)
# Make the request
client.delete_tag(request=request)
What should I indicate in the request? my table and the tag I want to delete?how? Thank you!
Solved! Go to Solution.
Hi @Wolfmarel ,
You're on the right track with the datacatalog_v1.DataCatalogClient
and the delete_tag
method. Here's the key concept:
Tags are associated with entries: In Data Catalog (which Dataplex uses for metadata), tags are linked to specific entries. These entries represent your tables, views, files, etc.
The name
argument: The name
in your DeleteTagRequest
must uniquely identify the specific tag you want to remove. This name includes both the entry's name and the tag's unique identifier.
Use the Data Catalog API or the Google Cloud console to find the fully qualified names of the entries representing the tables you want to modify. These names will have a format similar to:
projects/{project_id}/locations/{region}/entryGroups/{entry_group_id}/entries/{entry_id}
Construct the Tag Names:
Combine the entry name with the tag's unique identifier in this format:
{entry_name}/tags/{tag_id}
Delete Tags in a Loop:
Iterate over the tables where you want to remove the tag. For each table, perform these steps:
name_value
in your code with the fully qualified tag name you constructed.client.delete_tag(request=request)
function.
from google.cloud import datacatalog_v1
def delete_tags_from_tables(project_id, region, entry_group_id, table_names, tag_template_name):
client = datacatalog_v1.DataCatalogClient()
for table_name in table_names:
entry_name = f"projects/{project_id}/locations/{region}/entryGroups/{entry_group_id}/entries/{table_name}"
# List tags for the entry to find the specific tag to delete
tags = client.list_tags(parent=entry_name)
tag_id_to_delete = None
for tag in tags:
if tag.template == tag_template_name:
tag_id_to_delete = tag.name
break
if tag_id_to_delete:
request = datacatalog_v1.DeleteTagRequest(
name=tag_id_to_delete,
)
client.delete_tag(request=request)
print(f"Deleted tag '{tag_template_name}' from table '{table_name}'")
else:
print(f"No tag with template '{tag_template_name}' found on table '{table_name}'")
# Example usage (replace placeholders with your values)
project_id = "your-project-id"
region = "your-region"
entry_group_id = "your-entry-group-id"
table_names = ["table1", "table2", "table3"]
tag_template_name = "projects/your-project-id/locations/your-region/tagTemplates/your-tag-template"
delete_tags_from_tables(project_id, region, entry_group_id, table_names, tag_template_name)
Hi @Wolfmarel ,
You're on the right track with the datacatalog_v1.DataCatalogClient
and the delete_tag
method. Here's the key concept:
Tags are associated with entries: In Data Catalog (which Dataplex uses for metadata), tags are linked to specific entries. These entries represent your tables, views, files, etc.
The name
argument: The name
in your DeleteTagRequest
must uniquely identify the specific tag you want to remove. This name includes both the entry's name and the tag's unique identifier.
Use the Data Catalog API or the Google Cloud console to find the fully qualified names of the entries representing the tables you want to modify. These names will have a format similar to:
projects/{project_id}/locations/{region}/entryGroups/{entry_group_id}/entries/{entry_id}
Construct the Tag Names:
Combine the entry name with the tag's unique identifier in this format:
{entry_name}/tags/{tag_id}
Delete Tags in a Loop:
Iterate over the tables where you want to remove the tag. For each table, perform these steps:
name_value
in your code with the fully qualified tag name you constructed.client.delete_tag(request=request)
function.
from google.cloud import datacatalog_v1
def delete_tags_from_tables(project_id, region, entry_group_id, table_names, tag_template_name):
client = datacatalog_v1.DataCatalogClient()
for table_name in table_names:
entry_name = f"projects/{project_id}/locations/{region}/entryGroups/{entry_group_id}/entries/{table_name}"
# List tags for the entry to find the specific tag to delete
tags = client.list_tags(parent=entry_name)
tag_id_to_delete = None
for tag in tags:
if tag.template == tag_template_name:
tag_id_to_delete = tag.name
break
if tag_id_to_delete:
request = datacatalog_v1.DeleteTagRequest(
name=tag_id_to_delete,
)
client.delete_tag(request=request)
print(f"Deleted tag '{tag_template_name}' from table '{table_name}'")
else:
print(f"No tag with template '{tag_template_name}' found on table '{table_name}'")
# Example usage (replace placeholders with your values)
project_id = "your-project-id"
region = "your-region"
entry_group_id = "your-entry-group-id"
table_names = ["table1", "table2", "table3"]
tag_template_name = "projects/your-project-id/locations/your-region/tagTemplates/your-tag-template"
delete_tags_from_tables(project_id, region, entry_group_id, table_names, tag_template_name)
Great ms4446! Thanks. I have solved as follows:
linked_resource = '//bigquery.googleapis.com/projects/{0}/datasets/{1}/tables/{2}'.format(project_id, dataset_id, table_id)
request = datacatalog_v1.LookupEntryRequest()
request.linked_resource = linked_resource
tabla_entry = dc_client.lookup_entry(request)
request = datacatalog_v1.ListTagsRequest()
request.parent = tabla_entry.name
page_result = dc_client.list_tags(request=request) #Here I search for the tags associated to a table
for response in page_result:
tag = MessageToDict(response._pb)
if tag["template"] == tag_table_filtro: #when I get the template tag that I want to remove from the table, I enter the "if".
request = datacatalog_v1.DeleteTagRequest(
name=tag["name"],
)
# Make the request
dc_client.delete_tag(request=request)