Problem:
There is currently no default method to manipulate tags within Workflows despite the potential with the GetMetadata and CreateDatasetTag Dataset functions in code engine.
Solution:
We've adopted a rough custom function for extending tags that works around this, it avoids the default behaviour of erasing all tags when updating them, and includes a validation step:
def manipulate_dataset_tags(dataset_id, input_tags, action):
"""
This function allows users to extend, remove, or replace dataset tags.
Returns: Success status(str).
Params:
dataset_id(str) The dataset id - not using DATASET type to handle data types programmatically.
input_tags(list) The list of tags e.g. ['tag1','tag2','tag3'] for extending or removing
action(str) The type of action: 'extend', 'remove', or 'replace
"""
import json
from datetime import datetime
# Validate tags input
if not isinstance(input_tags, list):
raise ValueError("tags must be a list of strings or a single string")
if not isinstance(action, str):
raise ValueError("action must be a string: 'extend', 'remove', or 'replace")
if action not in('extend','remove','replace'):
raise ValueError("Action must be either 'extend', 'remove', or 'replace'")
get_url = f"/api/data/v3/datasources/{dataset_id}"
post_url = f"/api/data/ui/v3/datasources/{dataset_id}/tags"
current_dateTime = datetime.now()
# Connect to Dataset
try:
response = codeengine.send_request('GET', get_url)
except Exception as e:
return f"unsuccessful,{current_dateTime},{str(input_tags)}, error connecting to dataset: {e}"
# Retrieve tags:
try:
raw_tags = response.get('tags', '[]')
tags = json.loads(raw_tags) if isinstance(raw_tags, str) else raw_tags
if not isinstance(tags, list):
tags = []
except Exception as e:
tags = []
if action == 'remove':
# Find which tags are present
present_tags = [tag for tag in input_tags if tag in tags]
# if none of the tags are present, stop
if not present_tags:
return f"unsuccessful,{current_dateTime},{str(input_tags)},none of the specified tags exist: {tags}"
# Remove unwanted tags
tags = [tag for tag in tags if tag not in input_tags]
if action == 'extend':
# Update Tags
tags.extend(input_tags)
if action == 'replace':
# Replace tags
tags = input_tags
# Implement change
update_response = codeengine.send_request('POST', post_url, tags)
if not update_response or ('error' in update_response or 'status' in update_response and update_response['status'] != 200):
return f"unsuccessful,{current_dateTime},{str(input_tags)},POST request may have failed: {update_response}"
# Validate by re-fetching the tags
response = codeengine.send_request('GET', get_url)
# Retrieve existing_tags tags:
try:
raw_tags = response.get('tags', '[]')
existing_tags = json.loads(raw_tags) if isinstance(raw_tags, str) else raw_tags
if not isinstance(tags, list):
tags = []
except Exception as e:
tags = []
# Validation status
if set(existing_tags) == set(tags):
return f"successful,{current_dateTime},{str(input_tags)},validation may have failed"
else:
return f"unsuccessful,{current_dateTime},{str(input_tags)}, validation may have failed"