A configuration folder for shapeAnalysis#

configuration.py#

Configuration file for mkShapesRDF script.

It’s the only necessary python configuration file, all the other files are imported and defined by this one.

configuration.tag = 'new_vbf_16'#

tag used to identify the configuration folder version

configuration.runnerFile = 'default'#

file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner, otherwise specify path to script

configuration.outputFile = 'mkShapes__new_vbf_16.root'#

output file name

configuration.outputFolder = 'rootFiles'#

path to ouput folder

configuration.batchFolder = 'condor'#

path to batch folder (used for condor submission)

configuration.configsFolder = 'configs'#

path to configuration folder (will contain all the compiled configuration files)

configuration.lumi = 36.33#

luminosity to normalize to (in 1/fb)

configuration.aliasesFile = 'aliases.py'#

file with dict of aliases to define

configuration.variablesFile = 'variables.py'#

file with dict of variables

configuration.cutsFile = 'cuts.py'#

file with dict of cuts

configuration.samplesFile = 'samples.py'#

file with dict of samples

configuration.plotFile = 'plot.py'#

file with dict of samples

configuration.structureFile = 'structure.py'#

file with dict of structure (used to define combine processes)

configuration.plotPath = 'plots'#

path to folder where to save plots

configuration.mountEOS = []#

this lines are executed right before the runner on the condor node

configuration.imports = ['os', 'glob', ('collections', 'OrderedDict'), 'ROOT']#

list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py

configuration.filesToExec = ['samples.py', 'aliases.py', 'variables.py', 'cuts.py', 'plot.py', 'nuisances.py', 'structure.py']#

list of files to compile

configuration.varsToKeep = ['batchVars', 'outputFolder', 'batchFolder', 'configsFolder', 'outputFile', 'runnerFile', 'tag', 'samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'plot': 'plot', 'groupPlot': 'groupPlot', 'legend': 'legend'}), 'nuisances', 'structure', 'lumi', 'mountEOS', 'plotPath']#

list of variables to keep in the compiled configuration folder

configuration.batchVars = ['samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'plot': 'plot', 'groupPlot': 'groupPlot', 'legend': 'legend'}), 'nuisances', 'structure', 'lumi', 'mountEOS']#

list of variables to keep in the batch submission script (script.py)


samples.py#

Defines the samples and the list of files together with the weights to use for them.

Examples#

>>> from mkShapesRDF.lib.search_files import SearchFiles
>>> searchFiles = SearchFiles()
>>> redirector = ""
>>> mcProduction = "Summer16_102X_nAODv7_Full2016v7"
>>> dataReco = "Run2016_102X_nAODv7_Full2016v7"
>>> mcSteps = "MCl1loose2016v7__MCCorr2016v7__l2loose__l2tightOR2016v7"
>>> fakeSteps = "DATAl1loose2016v7__l2loose__fakeW"
>>> dataSteps = "DATAl1loose2016v7__l2loose__l2tightOR2016v7"
>>> ##############################################
>>> ###### Tree base directory for the site ######
>>> ##############################################
>>> treeBaseDir = "/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano"
>>> limitFiles = -1
>>> def makeMCDirectory(var=""):
>>>     _treeBaseDir = treeBaseDir + ""
>>>     if redirector != "":
>>>         _treeBaseDir = redirector + treeBaseDir
>>>     if var == "":
>>>         return "/".join([_treeBaseDir, mcProduction, mcSteps])
>>>     else:
>>>         return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var])
>>>
>>>
>>> mcDirectory = makeMCDirectory()
>>> fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps)
>>> dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps)
>>> samples = {}
>>> DataRun = [
>>>     ["B", "Run2016B-02Apr2020_ver2-v1"],
>>>     ["C", "Run2016C-02Apr2020-v1"],
>>>     ["D", "Run2016D-02Apr2020-v1"],
>>>     ["E", "Run2016E-02Apr2020-v1"],
>>>     ["F", "Run2016F-02Apr2020-v1"],
>>>     ["G", "Run2016G-02Apr2020-v1"],
>>>     ["H", "Run2016H-02Apr2020-v1"],
>>> ]
>>> DataSets = ["MuonEG", "SingleMuon", "SingleElectron", "DoubleMuon", "DoubleEG"]
>>> DataTrig = {
>>>     "MuonEG": " Trigger_ElMu",
>>>     "SingleMuon": "!Trigger_ElMu && Trigger_sngMu",
>>>     "SingleElectron": "!Trigger_ElMu && !Trigger_sngMu && Trigger_sngEl",
>>>     "DoubleMuon": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && Trigger_dblMu",
>>>     "DoubleEG": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && !Trigger_dblMu && Trigger_dblEl",
>>> }
>>> mcCommonWeightNoMatch = "XSWeight*SFweight*METFilter_MC"
>>> mcCommonWeight = "XSWeight*SFweight*PromptGenLepMatch2l*METFilter_MC"
>>> 
>>> 
>>> ###### Zjj EWK #######
>>> 
>>> files = nanoGetSampleFiles(mcDirectory, "EWK_LLJJ_MLL-50_MJJ-120")
>>> 
>>> samples["Zjj"] = {
>>>     "name": files,
>>>     "weight": mcCommonWeight,
>>>     "FilesPerJob": 1,
>>> }
>>> 
>>> 
>>> ###### DY MC ######
>>> dys = {
>>>     "DY_hardJets": "hardJets",
>>>     "DY_PUJets": "PUJets",
>>> }
>>> 
>>> files = nanoGetSampleFiles(mcDirectory, "DYJetsToLL_M-50_ext2")
>>> 
>>> samples["DY"] = {
>>>     "name": files,
>>>     "weight": mcCommonWeight
>>>     + "*( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0)) * ewknloW",
>>>     "FilesPerJob": 5,
>>>     "subsamples": dys,
>>> }
>>> 
>>> 
>>> ###########################################
>>> ################## DATA ###################
>>> ###########################################
>>> 
>>> samples["DATA"] = {
>>>     "name": [],
>>>     "weight": "METFilter_DATA*LepWPCut",
>>>     "weights": [],
>>>     "isData": ["all"],
>>>     "FilesPerJob": 50,
>>> }
>>> 
>>> for _, sd in DataRun:
>>>     for pd in DataSets:
>>>         files = nanoGetSampleFiles(dataDirectory, pd + "_" + sd)
>>> 
>>>         samples["DATA"]["name"].extend(files)
>>>         addSampleWeight(samples, "DATA", pd + "_" + sd, DataTrig[pd])
>>>         # samples['DATA']['weights'].extend([DataTrig[pd]] * len(files))
samples.nanoGetSampleFiles(path, name)[source][source]#

Retrieve files given path and name

Parameters:
pathstr

path to folder where to look for files

namestr

name of the file to look for

Returns:
list of tuple

list of tuples in the form of (name, list of files)

Notes

This function uses SearchFiles (the object searchFile) to retrieve the files and the Latino naming convention is assumed. The redirector defined above is also used.

samples.CombineBaseW(samples, proc, samplelist)[source][source]#

Combine baseW for a given process.

If two samples (different names) enter the same phase space the new baseW will consider the XS and the sum of all genEventSumw across all the files.

Parameters:
samplesdict

dictionary of samples

procstr

the samples key for the process

samplelistlist of str

list of sample name inside samples[proc] to combine

Notes

Will call addSampleWeight for each sample in samplelist.

samples.addSampleWeight(samples, sampleName, sampleNameType, weight)[source][source]#

Add weight to a sample

Parameters:
samplesdict

dictionary of samples

sampleNamestr

the samples key for the process

sampleNameTypestr

the sample name inside samples[proc] to add the weight to

weightstr

the weight to add