Source code for cloudos_cli.utils.array_job
import re
import sys
from cloudos_cli.utils.errors import BadRequestException
[docs]
def is_valid_regex(s):
"""
Validates whether the given string is a valid regular expression.
Parameters
----------
s : str
The string to be checked as a regular expression.
Returns
-------
bool
True if the string is a valid regular expression, False otherwise.
"""
try:
re.compile(s)
return True
except re.error:
return False
[docs]
def is_glob_pattern(s):
"""
Check if a given string contains glob pattern characters.
Glob patterns are commonly used for filename matching and include
special characters such as '*', '?', and '['.
Parameters
----------
s : str
The string to check for glob pattern characters.
Returns
-------
bool
True if the string contains any glob pattern characters, otherwise False.
"""
return any(char in s for char in "*?[")
[docs]
def is_probably_regex(s):
"""
Determines if a given string is likely a regular expression.
This function checks whether the input string matches common patterns
that are indicative of regular expressions. It first validates the
string using `is_valid_regex(s)` and then searches for specific regex
indicators such as quantifiers, character classes, anchors, and
alternation.
Parameters
----------
s : str
The string to evaluate.
Returns
-------
bool
True if the string is likely a regular expression, False otherwise.
Notes
-----
The function assumes the existence of `is_valid_regex(s)` which
validates whether the input string is a valid regex.
"""
if not is_valid_regex(s):
return False
# Patterns that usually indicate actual regex use (not just file names)
regex_indicators = [
r"\.\*", r"\.\+", r"\\[dws]", r"\[[^\]]+\]", r"\([^\)]+\)",
r"\{\d+(,\d*)?\}", r"\^", r"\$", r"\|"
]
return any(re.search(pat, s) for pat in regex_indicators)
[docs]
def classify_pattern(s):
"""
Classifies a given string pattern into one of three categories: "regex", "glob", or "exact".
Parameters
----------
s : str
The string pattern to classify.
Returns
-------
str: A string indicating the type of pattern:
- "regex" if the pattern is likely a regular expression.
- "glob" if the pattern matches glob-style syntax.
- "exact" if the pattern does not match regex or glob syntax.
"""
if is_probably_regex(s):
return "regex"
elif is_glob_pattern(s):
return "glob"
else:
return "exact"
[docs]
def generate_datasets_for_project(cloudos_url, apikey, workspace_id, project_name, verify_ssl):
"""
Generate datasets for a specified project in a CloudOS workspace.
This function initializes a `Datasets` object for the given project and handles
potential errors such as missing project elements or unauthorized API calls.
Parameters
----------
cloudos_url : str
The URL of the CloudOS instance.
apikey : str
The API key for authentication.
workspace_id : str
The ID of the workspace where the project resides.
project_name : str
The name of the project for which datasets are generated.
verify_ssl : bool
Whether to verify SSL certificates during API calls.
Returns
-------
Datasets
An instance of the `Datasets` class initialized for the specified project.
Raises
------
ValueError
If the specified project is not found in the workspace.
BadRequestException
If the API call is unauthorized or encounters other issues.
"""
# this avoids circular import error if import is added at the top
from cloudos_cli.datasets import Datasets
try:
ds = Datasets(
cloudos_url=cloudos_url,
apikey=apikey,
workspace_id=workspace_id,
project_name=project_name,
verify=verify_ssl,
cromwell_token=None
)
except ValueError:
print(f"No {project_name} element in projects was found")
sys.exit(1)
except BadRequestException as e:
if 'Forbidden' in str(e):
print('It seems your call is not authorised. Please check if ' +
'your workspace is restricted by Airlock and if your API key is valid.')
sys.exit(1)
else:
raise e
return ds
[docs]
def get_file_or_folder_id(cloudos_url, apikey, workspace_id, project_name, verify_ssl, command_dir, command_name, is_file=True):
"""Retrieve the ID of a specific file or folder within a CloudOS workspace.
Parameters
----------
cloudos_url : str
The base URL of the CloudOS API.
apikey : str
The API key for authenticating requests to the CloudOS API.
workspace_id : str
The ID of the workspace containing the project.
project_name : str
The name of the project within the workspace.
verify_ssl : bool
Whether to verify SSL certificates for the API requests.
name : str
The name of the file or folder whose ID is to be retrieved.
is_file : bool, optional
Whether to retrieve a file ID (True) or folder ID (False). Default is True.
Returns
-------
str: The ID of the specified file or folder.
Raises
------
ValueError
If the specified file or folder is not found.
Exception
If there is an error during the API interaction or data retrieval.
Notes
-----
- This function uses the `generate_datasets_for_project` function to create a Datasets object for the specified project.
- The `list_folder_content` method is used for files, and `list_project_content` is used for folders.
- The function assumes that the IDs are stored in the `"_id"` field of the metadata.
"""
# create a Datasets() class
ds = generate_datasets_for_project(cloudos_url, apikey, workspace_id, project_name, verify_ssl)
if is_file:
# get all files from a folder
content = ds.list_folder_content(command_dir)
for file in content['files']:
if file.get("name") == command_name:
return file.get("_id", '')
raise ValueError(f"File '{command_name}' not found in directory '{command_dir}'.")
else:
# get all folders from the project
# check if the command_dir has a sub-folder
if len(command_dir.split("/")) > 1:
# get the first folder which is just below the project
folders = ds.list_folder_content(command_dir.split("/")[0])
# use the last folder as is listed in the first folder
folder_to_search = command_dir.split("/")[-1]
else:
folders = ds.list_project_content()
folder_to_search = command_dir
for folder in folders['folders']:
if folder.get("name") == folder_to_search:
return folder.get("_id", '')
raise ValueError(f"Folder '{folder_to_search}' not found in project.")