A configuration folder for shapeAnalysis#

configuration.py#

Configuration file for mkShapesRDF script.

It’s the only necessary python configuration file, all the other files are imported and defined by this one.

configuration.tag = 'new_vbf_16'#

tag used to identify the configuration folder version

configuration.runnerFile = 'default'#

file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner otherwise specify relative path to script

configuration.outputFile = 'mkShapes__new_vbf_16.root'#

output file name

configuration.outputFolder = 'rootFiles'#

path to ouput folder

configuration.batchFolder = 'condor'#

path to batch folder (used for condor submission)

configuration.configsFolder = 'configs'#

path to configuration folder (will contain all the compiled configuration files)

configuration.lumi = 36.33#

luminosity to normalize to (in 1/fb)

configuration.aliasesFile = 'aliases.py'#

file with dict of aliases to define

configuration.variablesFile = 'variables.py'#

file with dict of variables

configuration.cutsFile = 'cuts.py'#

file with dict of cuts

configuration.samplesFile = 'samples.py'#

file with dict of samples

configuration.plotFile = 'plot.py'#

file with dict of samples

configuration.structureFile = 'structure.py'#

file with dict of structure (used to define combine processes)

configuration.condorConfig = 'jdl_dict.py'#

dictionary containing the configurations to be passed to condor

configuration.plotPath = 'plots'#

path to folder where to save plots

configuration.mountEOS = []#

this lines are executed right before the runner on the condor node

configuration.imports = ['os', 'glob', ('collections', 'OrderedDict'), 'ROOT']#

list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py

configuration.filesToExec = ['samples.py', 'aliases.py', 'variables.py', 'cuts.py', 'plot.py', 'nuisances.py', 'structure.py']#

list of files to compile

configuration.varsToKeep = ['batchVars', 'outputFolder', 'batchFolder', 'configsFolder', 'outputFile', 'runnerFile', 'tag', 'samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'groupPlot': 'groupPlot', 'legend': 'legend', 'plot': 'plot'}), 'nuisances', 'structure', 'lumi', 'mountEOS', 'plotPath']#

list of variables to keep in the compiled configuration folder

configuration.batchVars = ['samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'groupPlot': 'groupPlot', 'legend': 'legend', 'plot': 'plot'}), 'nuisances', 'structure', 'lumi', 'mountEOS']#

list of variables to keep in the batch submission script (script.py)


samples.py#

Defines the samples and the list of files together with the weights to use for them.

Examples#

>>> from mkShapesRDF.lib.search_files import SearchFiles
>>> searchFiles = SearchFiles()
>>> redirector = ""
>>> mcProduction = "Summer16_102X_nAODv7_Full2016v7"
>>> dataReco = "Run2016_102X_nAODv7_Full2016v7"
>>> mcSteps = "MCl1loose2016v7__MCCorr2016v7__l2loose__l2tightOR2016v7"
>>> fakeSteps = "DATAl1loose2016v7__l2loose__fakeW"
>>> dataSteps = "DATAl1loose2016v7__l2loose__l2tightOR2016v7"
>>> ##############################################
>>> ###### Tree base directory for the site ######
>>> ##############################################
>>> treeBaseDir = "/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano"
>>> limitFiles = -1
>>> def makeMCDirectory(var=""):
>>>     _treeBaseDir = treeBaseDir + ""
>>>     if redirector != "":
>>>         _treeBaseDir = redirector + treeBaseDir
>>>     if var == "":
>>>         return "/".join([_treeBaseDir, mcProduction, mcSteps])
>>>     else:
>>>         return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var])
>>>
>>>
>>> mcDirectory = makeMCDirectory()
>>> fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps)
>>> dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps)
>>> samples = {}
>>> DataRun = [
>>>     ["B", "Run2016B-02Apr2020_ver2-v1"],
>>>     ["C", "Run2016C-02Apr2020-v1"],
>>>     ["D", "Run2016D-02Apr2020-v1"],
>>>     ["E", "Run2016E-02Apr2020-v1"],
>>>     ["F", "Run2016F-02Apr2020-v1"],
>>>     ["G", "Run2016G-02Apr2020-v1"],
>>>     ["H", "Run2016H-02Apr2020-v1"],
>>> ]
>>> DataSets = ["MuonEG", "SingleMuon", "SingleElectron", "DoubleMuon", "DoubleEG"]
>>> DataTrig = {
>>>     "MuonEG": " Trigger_ElMu",
>>>     "SingleMuon": "!Trigger_ElMu && Trigger_sngMu",
>>>     "SingleElectron": "!Trigger_ElMu && !Trigger_sngMu && Trigger_sngEl",
>>>     "DoubleMuon": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && Trigger_dblMu",
>>>     "DoubleEG": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && !Trigger_dblMu && Trigger_dblEl",
>>> }
>>> mcCommonWeightNoMatch = "XSWeight*SFweight*METFilter_MC"
>>> mcCommonWeight = "XSWeight*SFweight*PromptGenLepMatch2l*METFilter_MC"
>>> 
>>> 
>>> ###### Zjj EWK #######
>>> 
>>> files = nanoGetSampleFiles(mcDirectory, "EWK_LLJJ_MLL-50_MJJ-120")
>>> 
>>> samples["Zjj"] = {
>>>     "name": files,
>>>     "weight": mcCommonWeight,
>>>     "FilesPerJob": 1,
>>> }
>>> 
>>> ###### DY MC ######
>>> # example of subsamples
>>> dys = {
>>>     "DY_hardJets": "hardJets",
>>>     "DY_PUJets": "PUJets",
>>>     "DY_inclusive_rwgt": ("true", "0.8"), 
>>>     # passing as a value a tuple/list of len 2 one can 
>>>     # assign a custom multiplicative weight to this subsample
>>>     # aliases can be used inside subsampleCut or subsampleWeight
>>> }
>>> 
>>> files = nanoGetSampleFiles(mcDirectory, "DYJetsToLL_M-50_ext2")
>>> 
>>> samples["DY"] = {
>>>     "name": files,
>>>     "weight": mcCommonWeight
>>>     + "*( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0)) * ewknloW",
>>>     "FilesPerJob": 5,
>>>     "subsamples": dys,
>>>     "flatten_samples_map": lambda sname, sub: "%s" % (sub) 
>>>     # in this way flatten sampled are simply "DY_hardJets", "DY_PUJets", 
>>>     # and "DY_inclusive_rwgt"
>>>     # default flatten_samples_map is lambda sname, sub: '%s_%s' % (sname, sub)
>>> }
>>> ###########################################
>>> ################## DATA ###################
>>> ###########################################
>>> 
>>> samples["DATA"] = {
>>>     "name": [],
>>>     "weight": "METFilter_DATA*LepWPCut",
>>>     "weights": [],
>>>     "isData": ["all"],
>>>     "FilesPerJob": 50,
>>> }
>>> 
>>> for _, sd in DataRun:
>>>     for pd in DataSets:
>>>         files = nanoGetSampleFiles(dataDirectory, pd + "_" + sd)
>>> 
>>>         samples["DATA"]["name"].extend(files)
>>>         addSampleWeight(samples, "DATA", pd + "_" + sd, DataTrig[pd])
>>>         # samples['DATA']['weights'].extend([DataTrig[pd]] * len(files))
samples.nanoGetSampleFiles(path, name)[source][source]#

Retrieve files given path and name

Parameters:
pathstr

path to folder where to look for files

namestr

name of the file to look for

Returns:
list of tuple

list of tuples in the form of (name, list of files)

Notes

This function uses SearchFiles (the object searchFile) to retrieve the files and the Latino naming convention is assumed. The redirector defined above is also used.

samples.CombineBaseW(samples, proc, samplelist)[source][source]#

Combine baseW for a given process.

If two samples (different names) enter the same phase space the new baseW will consider the XS and the sum of all genEventSumw across all the files.

Parameters:
samplesdict

dictionary of samples

procstr

the samples key for the process

samplelistlist of str

list of sample name inside samples[proc] to combine

Notes

Will call addSampleWeight for each sample in samplelist.

samples.addSampleWeight(samples, sampleName, sampleNameType, weight)[source][source]#

Add weight to a sample

Parameters:
samplesdict

dictionary of samples

sampleNamestr

the samples key for the process

sampleNameTypestr

the sample name inside samples[proc] to add the weight to

weightstr

the weight to add


variables.py#

Defines the variables in variables.py.

Examples#

First define an empty variables dict:

>>> variables = {}
>>> variables['mll'] = {
>>>     'name': 'mll',            #   variable name
>>>     'range' : (20,80,100),    #   variable range as (nbins, xmin, xmax)
>>>     'xaxis' : 'm_{ll} [GeV]',  #   x axis name
>>>     'fold' : 0
>>> }
>>> variables['ptll'] = {
>>>     'name': 'ptll',            #   variable name
>>>     'range' : ([20, 50, 100, 400], ),  
>>>     # variable range as (list_of_bin_edges,)
>>>     'xaxis' : 'p^T_{ll} [GeV]',  #   x axis name
>>>     'fold' : 0
>>> }
>>> variables['ptll_detall'] = {
>>>     'name': 'ptll:detall',            #   variable name
>>>     'range' : (5, 20, 400, 6, 0, 5),    
>>>     # variable range as (nbins_x, xmin, xmax, nbins_y, ymin, ymax)
>>>     'xaxis' : 'p^T_{ll}:eta_{ll}',  #   x axis name
>>>     'fold' : 0
>>> }
>>> variables['ptll_detall'] = {
>>>     'name': 'ptll:detall',            #   variable name
>>>     'range' : ([20, 50, 100, 400], [0, 0.5, 1.5, 2.5, 5.0],),   
>>>     # variable range as (list_of_bin_edges_var1,list_of_bin_edges_var2)
>>>     'xaxis' : 'p^T_{ll}:eta_{ll}',  #   x axis name
>>>     'fold' : 0
>>> }

Save ntuples branches, special column weight is always saved

>>> dnn_branches = {
>>>     # single leptons
>>>     "ptl1": "Lepton_pt[0]",
>>>     "ptl2": "Lepton_pt[1]",
>>>     "etal1": "Lepton_eta[0]",
>>>     "etal2": "Lepton_eta[1]",
>>>     "phil1": "Lepton_phi[0]",
>>>     "phil2": "Lepton_phi[1]",
>>> }
>>> 
>>> variables['test_ntuples'] = {
>>>     'tree': dnn_branches, # dictionary of branches to be saved
>>>     'cuts': ['sr'] # specify cut after which the events will be saved
>>> }
>>> 

nuisances.py#

Defines the nuisances.

Examples#

>>> nuisances = {}

Example of simple lnN (no processing needed)

>>> nuisances['lumi_Uncorrelated'] = {
>>>     'name': 'lumi_13TeV_2018',
>>>     'type': 'lnN',
>>>     'samples': dict((skey, '1.015') for skey in mc if skey not in ['WW', 'top', 'dyll', 'dytt'])
>>> }

Example of suffix nuisance

>>> nuisances['electronpt'] = {
>>>     'name': 'CMS_scale_e_2018',
>>>     'kind': 'suffix',
>>>     'type': 'shape',
>>>     'mapUp': 'ElepTup',
>>>     'mapDown': 'ElepTdo',
>>>     'samples': dict((skey, ['1', '1']) for skey in mc),
>>>     'folderUp': makeMCDirectory('ElepTup_suffix'),
>>>     'folderDown': makeMCDirectory('ElepTdo_suffix'),
>>>     'separator': '_', # used as Nominal + separator + variation
>>>     'AsLnN': '1'
>>> }

Example of weight nuisance

>>> nuisances['eff_e'] = {
>>>     'name': 'CMS_eff_e_2018',
>>>     'kind': 'weight',
>>>     'type': 'shape',
>>>     'samples': dict((skey, ['SFweightEleUp', 'SFweightEleDown']) for skey in mc)
>>> }

jdl_dict.py#

jdl configuration file.

It includes a jdl dict with
  • jdl_dict: all the optional variables that will be set in submit.jdl

  • executable: all the lines that will be written to run.sh

  • condor_config: all the options passed to condor_submit

Examples#

>>> 
>>> import os
>>> 
>>> # Get the current directory
>>> current_directory = os.getcwd()
>>> 
>>> mkshapesrdf_path = "/code/mkShapesRDF"
>>> 
>>> 
>>> project_path = current_directory
>>> 
>>> proxydir = "/builds/cms-analysis/smp/wpwmjj_polarizations/analysis_code"
>>> 
>>> singularity_container = os.getenv("SINGULARITY_CONTAINER")
>>> 
>>> jdl_dict = {
>>>     "transfer_input_files": f"{proxydir}/myproxy,$(Folder)/script.py, {mkshapesrdf_path}/include/headers.hh, {mkshapesrdf_path}/shapeAnalysis/runner.py, {project_path}/DR_lj.cc, {project_path}/bjets.cc,{project_path}/dnn_LLVsOther.cc,{project_path}/dnn_SigVsBkg.cc,{project_path}/dnn_TTVsOther.cc,{project_path}/m_lj.cc,{project_path}/proxyW.cc,{project_path}/generated_code_dnn_emu_LLVsOther.h,{project_path}/generated_code_dnn_emu_SigVsBkg.h,{project_path}/generated_code_dnn_emu_TTVsOther.h",
>>>     "transfer_output_files": "mkShapes__RDF_2017_v9_emu_DNN__ALL__$(Folder).root",
>>>     "+SingularityImage": f'"{singularity_container}/"',
>>>     "num_retries": "3",
>>>     "periodic_remove": "(JobStatus == 2) && (time() - EnteredCurrentStatus) > (0.25 * 3600) || (JobStatus == 1) && (time() - EnteredCurrentStatus) >(0.25 * 3600) || (JobStatus == 5) && (time() - EnteredCurrentStatus) > (0.1 * 3600)",
>>> }
>>> 
>>> 
>>> executable = [
>>>     "#!/bin/bash",
>>>     "source /code/start.sh",
>>>     "export X509_USER_PROXY=myproxy",
>>>     "export X509_CERT_DIR=/cvmfs/cms.cern.ch/grid/etc/grid-security/certificates",
>>>     "time python runner.py",
>>>     "mv output.root mkShapes__RDF_2017_v9_emu_DNN__ALL__${1}.root",
>>> ]
>>> 
>>> condor_config = ["-spool"]