Extend Dataset Tags Function - Code Engine

NateBI
NateBI Contributor
edited 10:50AM in Code Engine Ideas

Problem:

There is currently no default method to manipulate tags within Workflows despite the potential with the GetMetadata and CreateDatasetTag Dataset functions in code engine.

Solution:

We've adopted a rough custom function for extending tags that works around this, it avoids the default behaviour of erasing all tags when updating them, and includes a validation step:

def manipulate_dataset_tags(dataset_id, input_tags, action):
  """
  This function allows users to extend, remove, or replace dataset tags.
  Returns: Success status(str).
  Params:
  dataset_id(str) The dataset id - not using DATASET type to handle data types programmatically. 
  input_tags(list) The list of tags e.g. ['tag1','tag2','tag3'] for extending or removing
  action(str) The type of action: 'extend', 'remove', or 'replace
  """
  import json
  from datetime import datetime

  # Validate tags input
  if not isinstance(input_tags, list):
    raise ValueError("tags must be a list of strings or a single string")

  if not isinstance(action, str):
    raise ValueError("action must be a string: 'extend', 'remove', or 'replace")    
  
  if action not in('extend','remove','replace'):
    raise ValueError("Action must be either 'extend', 'remove', or 'replace'")
 
  get_url = f"/api/data/v3/datasources/{dataset_id}"
  post_url = f"/api/data/ui/v3/datasources/{dataset_id}/tags"

  current_dateTime = datetime.now()
  
  # Connect to Dataset  
  try:
    response = codeengine.send_request('GET', get_url)
  except Exception as e: 
    return f"unsuccessful,{current_dateTime},{str(input_tags)}, error connecting to dataset: {e}"

  # Retrieve tags:
  try:
    raw_tags = response.get('tags', '[]')
    tags = json.loads(raw_tags) if isinstance(raw_tags, str) else raw_tags
    if not isinstance(tags, list):
        tags = []
  except Exception as e:
        tags = []
  
  if action == 'remove':
    # Find which tags are present
    present_tags = [tag for tag in input_tags if tag in tags]

    # if none of the tags are present, stop
    if not present_tags:
        return f"unsuccessful,{current_dateTime},{str(input_tags)},none of the specified tags exist: {tags}"
    
    # Remove unwanted tags
    tags = [tag for tag in tags if tag not in input_tags]
  
  if action == 'extend':
    # Update Tags
    tags.extend(input_tags)

  if action == 'replace':
    # Replace tags
    tags = input_tags
    
  # Implement change
  update_response = codeengine.send_request('POST', post_url, tags)
  
  if not update_response or ('error' in update_response or 'status' in update_response and update_response['status'] != 200):
    return f"unsuccessful,{current_dateTime},{str(input_tags)},POST request may have failed: {update_response}"


  # Validate by re-fetching the tags
  response = codeengine.send_request('GET', get_url)

  # Retrieve existing_tags tags:
  try:
    raw_tags = response.get('tags', '[]')
    existing_tags = json.loads(raw_tags) if isinstance(raw_tags, str) else raw_tags
    if not isinstance(tags, list):
        tags = []
  except Exception as e:
        tags = []
  
  # Validation status
  if set(existing_tags) == set(tags):
      return f"successful,{current_dateTime},{str(input_tags)},validation may have failed"
  else:
      return f"unsuccessful,{current_dateTime},{str(input_tags)}, validation may have failed"
Tagged:
2
2 votes