Source code for mkShapesRDF.lib.search_files

import fnmatch
import subprocess
import glob
import sys


[docs] class SearchFiles: """ Class to search for files in a folder or DAS """ def __init__(self): # cache result of `glob.glob(folder)` and `xrdfs redirector ls folder` self.cached_list_of_files = {}
[docs] def searchFiles( self, folder, process, redirector="root://eoscms.cern.ch/", isLatino=True ): r"""Search for files in a folder. If redirector is specified, it will use xrdfs to query the redirector. Parameters ---------- folder : str the folder to search in process : str the name of the process to search for redirector : str, optional, default: ``"root://eoscms.cern.ch/"`` redirector to use. isLatino : bool, optional if the process is a latino process. Defaults to True. The process to search for will be ``"nanoLatino_" + process + "__part*.root"``. Returns ------- `list of str` list of files found including the redirector """ if not folder.endswith("/"): folder += "/" listOfFiles = [] if len(self.cached_list_of_files.get(folder, [])) == 0: print("Need to query for files for folder", folder) if redirector != "": print("with redirector", redirector) proc = subprocess.Popen( f"xrdfs {redirector} ls {folder}", shell=True, stdout=subprocess.PIPE, ) listOfFiles = proc.communicate()[0].decode("utf-8").split("\n") else: listOfFiles = glob.glob(folder + "*") self.cached_list_of_files[folder] = listOfFiles else: listOfFiles = self.cached_list_of_files[folder] if isLatino: process = "nanoLatino_" + process + "__part*.root" else: process = process + "*.root" files = list( filter(lambda k: fnmatch.fnmatch(k, folder + process), listOfFiles) ) if isLatino: files = sorted( files, key=lambda k: int(k.split("/")[-1].split(".")[0].split("__part")[-1]), ) else: files = sorted(files) if redirector != "": files = list(map(lambda k: redirector + k, files)) return files
[docs] def searchFilesDAS( self, process, redirector="root://cms-xrd-global.cern.ch/", instance="" ): r"""Search for files given a DAS query. If instance is specified, it will search for files with the provided instance. Parameters ---------- process : str the name of the process to search for redirector : str, optional, default: ``"root://cms-xrd-global.cern.ch/"``. redirector to use. instance : str, optional instance to use. Defaults to "". instance="prod/phys03" will search for files generated with crab. Returns ------- `list of str` list of files found including the redirector """ files = [] if (len(self.cached_list_of_files.get(process, []))) == 0: procString = f'dasgoclient --query="file dataset={process}' if instance != "": procString += " instance=" + instance procString += '"' proc = subprocess.Popen( procString, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) out, err = proc.communicate() out = out.decode("utf-8") out = out.split("\n") files = list(filter(lambda k: k.strip() != "", out)) print(files, len(files)) err = err.decode("utf-8") if len(err) != 0: print("There were some errors in retrieving file:") print(err) sys.exit() else: files = self.cached_list_of_files[process] files = list(map(lambda k: redirector + k, files)) return files[1:]