Source code for dynamo_consistency.checkphedex
# pylint: disable=import-error
"""
A module that provides functions to check the comparison results to
the list of files and deletions in PhEDEx.
:author: Daniel Abercrombie <dabercro@mit.edu>
"""
import time
import logging
from cmstoolbox.webtools import get_json
from . import config
LOG = logging.getLogger(__name__)
[docs]def set_of_deletions(site):
"""
Get a list of datasets with approved deletion requests at a given site that were created
within the number of days matching the **IgnoreAge** configuration parameter.
This request is done via the PhEDEx ``deleterequests`` API.
:param str site: The site that we want the list of deletion requests for.
:returns: Datasets that are in deletion requests
:rtype: set
"""
created_since = int(
time.time() - float(config.config_dict().get('IgnoreAge', 0)) * 24 * 3600)
# Get deletion requests in PhEDEx
deletion_request = get_json(
'cmsweb.cern.ch', '/phedex/datasvc/json/prod/deleterequests',
{'node': site, 'approval': 'approved', 'create_since': created_since},
use_https=True)
# PhEDEx APIs are ridiculous
# Here I get the dataset names of approved deletion requests in a single list
datasets_for_deletion = set(
[block['name'].split('#')[0] for block in sum(
[request['data']['dbs']['block'] for request in \
deletion_request['phedex']['request']],
[])] + \
[dataset['name'] for dataset in sum(
[request['data']['dbs']['dataset'] for request in \
deletion_request['phedex']['request']],
[])]
) if deletion_request else set()
return datasets_for_deletion
[docs]def check_for_datasets(site, orphan_list_file):
"""
Checks PhEDEx exhaustively to see if a dataset should exist at a site,
according to PhEDEx, but has files marked as orphans according to our check.
This is done via the PhEDEx ``filereplicas`` API.
The number of filereplicas for each dataset is printed to the terminal.
Datasets that contain any filereplicas are returned by this function.
:param str site: The name of the site to check
:param list orphan_list_file: List of LFNs that are listed as orphans at the site
:returns: The list of number of files and datasets for each dataset that is
supposed to have at least 1 file at the site.
:rtype: list of tuples
"""
datasets = set()
output = []
with open(orphan_list_file) as orphans:
for line in orphans:
split_name = line.split('/')
dataset = '/%s/%s-%s/%s' % (split_name[4], split_name[3], split_name[6], split_name[5])
if dataset not in datasets:
phedex_response = get_json(
'cmsweb.cern.ch', '/phedex/datasvc/json/prod/filereplicas',
{'node': site, 'dataset': dataset},
use_https=True)
num_files = sum(len(block['file']) for block in phedex_response['phedex']['block'])
datasets.add(dataset)
print num_files, dataset
if num_files:
output.append((num_files, dataset))
return output