Source code for dynamo_consistency.cms.unmerged

"""
A module for handling the listing and cleaning of /store/unmerged
"""

import os
import sqlite3
import shutil
import logging

from cmstoolbox.unmergedcleaner import listdeletable

from .. import config
from .. import datatypes
from .. import history
from .. import opts
from .. import remotelister
from .. import summary
from ..backend import registry
from ..backend import listers
from ..emptyremover import EmptyRemover


LOG = logging.getLogger(__name__)


[docs]def report_contents(timestamp, site, files): """ Creates a SQLite3 database that contains all of the files and directories in a list. This database is then copied to the **WebDir** with the name ``SITE_unmerged.db``. :param int timestamp: Time that the listing was done :param str site: Used mostly for naming the database :param list files: List of files to put in the database """ db_name = os.path.join(config.vardir('work'), '%s_protected.db' % site) if os.path.exists(db_name): os.remove(db_name) # dump undeleted unmerged files into an SQL database conn = sqlite3.connect(db_name) curs = conn.cursor() currdir = 'fake_start' dirid = 1 currcontents = [] curs.execute('CREATE TABLE timestamp (timestamp DATETIME);') curs.execute('CREATE TABLE directories (id INT PRIMARY KEY, dirname VARCHAR(511));') curs.execute(""" CREATE TABLE files (dir INT, file CHAR(63), FOREIGN KEY(dir) REFERENCES directories(id)); """) curs.execute(""" INSERT INTO timestamp (`timestamp`) VALUES (DATETIME({0}, 'unixepoch')); """.format(timestamp)) for fname in files: if fname.startswith(currdir): currcontents.append(os.path.basename(fname)) else: if currcontents: curs.execute('INSERT INTO directories (`id`, `dirname`) VALUES (?, ?);', (dirid, currdir)) curs.executemany('INSERT INTO files (`dir`, `file`) VALUES (?, ?);', [(dirid, f) for f in currcontents]) dirid += 1 currdir = os.path.dirname(fname) currcontents = [os.path.basename(fname)] if currcontents: curs.execute('INSERT INTO directories (`id`, `dirname`) VALUES (?, ?);', (dirid, currdir)) curs.executemany('INSERT INTO files (`dir`, `file`) VALUES (?, ?);', [(dirid, f) for f in currcontents]) conn.commit() conn.close() webdir = summary.webdir() db_dest = os.path.join(webdir, os.path.basename(db_name)) if os.path.exists(db_dest): os.remove(db_dest) # Move this over to the web directory shutil.move(db_name, webdir)
[docs]def clean_unmerged(site): # pylint: disable=too-complex, too-many-branches """ Lists the /store/unmerged area of a site, and then uses :py:mod:`cmstoolbox.unmergedcleaner.listdeletable` to list files to delete and adds them to the registry. .. Warning:: This function has a number of side effects to various module configurations. Definitely call this after running the main site consistency. :param str site: The site to run the check over :returns: The number of files entered into the register and the number that are log files :rtype: int, int """ # First, we do a bunch of hacky configuration changes for /store/unmerged listdeletable.set_config(config.LOCATION, 'ListDeletable') # Set the directory list to unmerged only config.DIRECTORYLIST = ['unmerged'] logsdirs = ['unmerged/logs'] if opts.MORELOGS: morelogs = config.CONFIG.get('AdditionalLogDeletions', {}).get(site, []) LOG.debug('Adding more log directories: %s', morelogs) config.DIRECTORYLIST.extend(morelogs) logsdirs.extend(morelogs) # Set the IgnoreAge for directories to match the listdeletable config datatypes.DirectoryInfo.ignore_age = listdeletable.config.MIN_AGE/(24 * 3600) # Get the list of protected directories try: listdeletable.PROTECTED_LIST = listdeletable.get_protected() except listdeletable.SuspiciousConditions: LOG.error('Problem with protected list, returning nothing') return 0, 0 # If list is empty, exit because something is wrong if not listdeletable.PROTECTED_LIST: LOG.error('No protected list was found... Not cleaning /store/unmerged') return 0, 0 listdeletable.PROTECTED_LIST.sort() with open(os.path.join(config.vardir('cache/%s' % site), 'protectedLFNs.txt'), 'w') as protected: protected.write('\n'.join(listdeletable.PROTECTED_LIST) + '\n') # Create a tree structure that will hold the protected directories protected_tree = datatypes.DirectoryInfo() for directory in listdeletable.PROTECTED_LIST: protected_tree.get_node(directory) def check_protected(path): """ Determine if the path should be protected or not :param str path: full path of directory :returns: If the path should be protected :rtype: bool """ # If the directory is explicitly protected, of course don't delete it if bool(protected_tree.get_node(path, make_new=False)): LOG.debug('%s is protected', path) return True for protected in listdeletable.PROTECTED_LIST: # If a subdirectory, don't delete if path.startswith(protected): LOG.debug('%s is protected', path) return True # We sorted the protected list, so we don't have to check all of them if path < protected: break LOG.debug('%s is NOT protected', path) return False # Turn on the filtering of ignored directories listers.FILTER_IGNORED = True # Overwrite the RootPath, if needed new_root = config.CONFIG['ListDeletable'].get('AlternateRoot', {}).get(site) if new_root: config.CONFIG['RootPath'] = new_root listdeletable.config.UNMERGED_DIR_LOCATION = os.path.join(new_root, 'unmerged') # And do a listing of unmerged site_tree = remotelister.listing( # pylint: disable=unexpected-keyword-arg site, cache='unmerged', # cache keyword comes from cache decorator callback=EmptyRemover(site, check_protected)) # Setup the config a bit more deletion_file = os.path.join(config.vardir('work'), '%s_unmerged.txt' % site) listdeletable.config.DELETION_FILE = deletion_file # Reset the protected list in case the listing took a long time try: listdeletable.PROTECTED_LIST = listdeletable.get_protected() except listdeletable.SuspiciousConditions: LOG.error('Problem with protected list, returning nothing') return 0, 0 listdeletable.PROTECTED_LIST.sort() # Only consider things older than four weeks # Only look inside unmerged for the cleaning rootpath = config.CONFIG['RootPath'] listdeletable.get_unmerged_files = lambda: [ os.path.join(rootpath, fil) for fil in site_tree.get_node('unmerged').get_files(listdeletable.config.MIN_AGE) ] # Do the cleaning listdeletable.main() config_dict = config.config_dict() # Delete the contents of the deletion file and the contents of the log directory that are old with open(deletion_file, 'a') as d_file: for logs in logsdirs: node = site_tree.get_node(logs, make_new=False) if node: d_file.write('\n' + '\n'.join( node.get_files( min_age=(int(config_dict['UnmergedLogsAge']) * 24 * 3600), path=os.path.join(rootpath, os.path.dirname(logs)) ))) to_delete = set() with open(deletion_file, 'r') as d_file: to_delete.update([l.strip() for l in d_file if l.strip()]) report_contents(site_tree.timestamp, site, [f for f in site_tree.get_files() if f not in to_delete]) history.report_unmerged([(name, site_tree.get_file(name)) for name in to_delete]) return registry.delete(site, to_delete), len( [f for f in to_delete if f.strip().endswith('.tar.gz')])