Source code for dynamo_consistency.cms.filedumps

"""
A module to handle file dumps from sites
"""


import os
import logging
import time
import datetime
import subprocess

from .. import config
from .. import opts

LOG = logging.getLogger(__name__)


[docs]class LineReader(object): # pylint:disable=too-few-public-methods """ A callable object that translates lines from a file dump. It tracks the time that it was initialized. """ def __init__(self): self.now = int(time.time()) def __call__(self, line): """ :param str line: Single line from a file dump :returns: The useful information from a line :rtype: tuple """ contents = line.split() # The last column is time in *days* since epoch return contents[0], int(contents[1]), (int(contents[2]) * 3600 * 24)
[docs]def read_ral_dump(endpoint, datestring=None): """ Copies file from remote site and lists :param str endpoint: The SE to copy the file dump from :param str datestring: An optional datestring to force source file name :returns: A tuple of the filename and translator :rtype: tuple """ dump = 'unmerged' if 'unmerged' in \ config.config_dict().get('DirectoryList', []) else \ 'consistency' inputfile = os.path.join( config.vardir('scratch'), '%s_%s' % (dump, config.SITE or opts.SITE_PATTERN) ) raw_file = '%s.raw' % inputfile if os.path.exists(raw_file): os.remove(raw_file) cp_command = ' '.join([ 'gfal-copy', '{endpoint}/store/accounting/{dump}-{date}.tsv'.format( endpoint=endpoint, dump=dump, # Datestring can be set as a parameter in the function # or in the cmdline options. Otherwise, just use today. date=(datestring or opts.DATESTRING or datetime.datetime.utcnow().strftime('%Y%m%d'))), raw_file ]) LOG.info('About to call: %s', cp_command) subprocess.check_call([cp_command], shell=True) sort_command = ' '.join(['sort', '-o', inputfile, raw_file]) LOG.info('About to call: %s', sort_command) subprocess.check_call([sort_command], shell=True) return inputfile, LineReader()