A configuration folder for shapeAnalysis#
configuration.py#
Configuration file for mkShapesRDF script.
It’s the only necessary python configuration file, all the other files are imported and defined by this one.
- configuration.tag = 'new_vbf_16'#
tag used to identify the configuration folder version
- configuration.runnerFile = 'default'#
file to use as runner script, default uses mkShapesRDF.shapeAnalysis.runner otherwise specify relative path to script
- configuration.outputFile = 'mkShapes__new_vbf_16.root'#
output file name
- configuration.outputFolder = 'rootFiles'#
path to ouput folder
- configuration.batchFolder = 'condor'#
path to batch folder (used for condor submission)
- configuration.configsFolder = 'configs'#
path to configuration folder (will contain all the compiled configuration files)
- configuration.lumi = 36.33#
luminosity to normalize to (in 1/fb)
- configuration.aliasesFile = 'aliases.py'#
file with dict of aliases to define
- configuration.variablesFile = 'variables.py'#
file with dict of variables
- configuration.cutsFile = 'cuts.py'#
file with dict of cuts
- configuration.samplesFile = 'samples.py'#
file with dict of samples
- configuration.plotFile = 'plot.py'#
file with dict of samples
- configuration.structureFile = 'structure.py'#
file with dict of structure (used to define combine processes)
- configuration.condorConfig = 'jdl_dict.py'#
dictionary containing the configurations to be passed to condor
- configuration.plotPath = 'plots'#
path to folder where to save plots
- configuration.mountEOS = []#
this lines are executed right before the runner on the condor node
- configuration.imports = ['os', 'glob', ('collections', 'OrderedDict'), 'ROOT']#
list of imports to import when compiling the whole configuration folder, it should not contain imports used by configuration.py
- configuration.filesToExec = ['samples.py', 'aliases.py', 'variables.py', 'cuts.py', 'plot.py', 'nuisances.py', 'structure.py']#
list of files to compile
- configuration.varsToKeep = ['batchVars', 'outputFolder', 'batchFolder', 'configsFolder', 'outputFile', 'runnerFile', 'tag', 'samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'groupPlot': 'groupPlot', 'legend': 'legend', 'plot': 'plot'}), 'nuisances', 'structure', 'lumi', 'mountEOS', 'plotPath']#
list of variables to keep in the compiled configuration folder
- configuration.batchVars = ['samples', 'aliases', 'variables', ('cuts', {'cuts': 'cuts', 'preselections': 'preselections'}), ('plot', {'groupPlot': 'groupPlot', 'legend': 'legend', 'plot': 'plot'}), 'nuisances', 'structure', 'lumi', 'mountEOS']#
list of variables to keep in the batch submission script (script.py)
samples.py#
Defines the samples and the list of files together with the weights to use for them.
Examples#
>>> from mkShapesRDF.lib.search_files import SearchFiles
>>> searchFiles = SearchFiles()
>>> redirector = ""
>>> mcProduction = "Summer16_102X_nAODv7_Full2016v7"
>>> dataReco = "Run2016_102X_nAODv7_Full2016v7"
>>> mcSteps = "MCl1loose2016v7__MCCorr2016v7__l2loose__l2tightOR2016v7"
>>> fakeSteps = "DATAl1loose2016v7__l2loose__fakeW"
>>> dataSteps = "DATAl1loose2016v7__l2loose__l2tightOR2016v7"
>>> ##############################################
>>> ###### Tree base directory for the site ######
>>> ##############################################
>>> treeBaseDir = "/eos/cms/store/group/phys_higgs/cmshww/amassiro/HWWNano"
>>> limitFiles = -1
>>> def makeMCDirectory(var=""):
>>> _treeBaseDir = treeBaseDir + ""
>>> if redirector != "":
>>> _treeBaseDir = redirector + treeBaseDir
>>> if var == "":
>>> return "/".join([_treeBaseDir, mcProduction, mcSteps])
>>> else:
>>> return "/".join([_treeBaseDir, mcProduction, mcSteps + "__" + var])
>>>
>>>
>>> mcDirectory = makeMCDirectory()
>>> fakeDirectory = os.path.join(treeBaseDir, dataReco, fakeSteps)
>>> dataDirectory = os.path.join(treeBaseDir, dataReco, dataSteps)
>>> samples = {}
>>> DataRun = [
>>> ["B", "Run2016B-02Apr2020_ver2-v1"],
>>> ["C", "Run2016C-02Apr2020-v1"],
>>> ["D", "Run2016D-02Apr2020-v1"],
>>> ["E", "Run2016E-02Apr2020-v1"],
>>> ["F", "Run2016F-02Apr2020-v1"],
>>> ["G", "Run2016G-02Apr2020-v1"],
>>> ["H", "Run2016H-02Apr2020-v1"],
>>> ]
>>> DataSets = ["MuonEG", "SingleMuon", "SingleElectron", "DoubleMuon", "DoubleEG"]
>>> DataTrig = {
>>> "MuonEG": " Trigger_ElMu",
>>> "SingleMuon": "!Trigger_ElMu && Trigger_sngMu",
>>> "SingleElectron": "!Trigger_ElMu && !Trigger_sngMu && Trigger_sngEl",
>>> "DoubleMuon": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && Trigger_dblMu",
>>> "DoubleEG": "!Trigger_ElMu && !Trigger_sngMu && !Trigger_sngEl && !Trigger_dblMu && Trigger_dblEl",
>>> }
>>> mcCommonWeightNoMatch = "XSWeight*SFweight*METFilter_MC"
>>> mcCommonWeight = "XSWeight*SFweight*PromptGenLepMatch2l*METFilter_MC"
>>>
>>>
>>> ###### Zjj EWK #######
>>>
>>> files = nanoGetSampleFiles(mcDirectory, "EWK_LLJJ_MLL-50_MJJ-120")
>>>
>>> samples["Zjj"] = {
>>> "name": files,
>>> "weight": mcCommonWeight,
>>> "FilesPerJob": 1,
>>> }
>>>
>>> ###### DY MC ######
>>> # example of subsamples
>>> dys = {
>>> "DY_hardJets": "hardJets",
>>> "DY_PUJets": "PUJets",
>>> "DY_inclusive_rwgt": ("true", "0.8"),
>>> # passing as a value a tuple/list of len 2 one can
>>> # assign a custom multiplicative weight to this subsample
>>> # aliases can be used inside subsampleCut or subsampleWeight
>>> }
>>>
>>> files = nanoGetSampleFiles(mcDirectory, "DYJetsToLL_M-50_ext2")
>>>
>>> samples["DY"] = {
>>> "name": files,
>>> "weight": mcCommonWeight
>>> + "*( !(Sum(PhotonGen_isPrompt==1 && PhotonGen_pt>15 && abs(PhotonGen_eta)<2.6) > 0)) * ewknloW",
>>> "FilesPerJob": 5,
>>> "subsamples": dys,
>>> "flatten_samples_map": lambda sname, sub: "%s" % (sub)
>>> # in this way flatten sampled are simply "DY_hardJets", "DY_PUJets",
>>> # and "DY_inclusive_rwgt"
>>> # default flatten_samples_map is lambda sname, sub: '%s_%s' % (sname, sub)
>>> }
>>> ###########################################
>>> ################## DATA ###################
>>> ###########################################
>>>
>>> samples["DATA"] = {
>>> "name": [],
>>> "weight": "METFilter_DATA*LepWPCut",
>>> "weights": [],
>>> "isData": ["all"],
>>> "FilesPerJob": 50,
>>> }
>>>
>>> for _, sd in DataRun:
>>> for pd in DataSets:
>>> files = nanoGetSampleFiles(dataDirectory, pd + "_" + sd)
>>>
>>> samples["DATA"]["name"].extend(files)
>>> addSampleWeight(samples, "DATA", pd + "_" + sd, DataTrig[pd])
>>> # samples['DATA']['weights'].extend([DataTrig[pd]] * len(files))
- samples.nanoGetSampleFiles(path, name)[source][source]#
Retrieve files given path and name
- Parameters:
- Returns:
- list of tuple
list of tuples in the form of
(name, list of files)
Notes
This function uses SearchFiles (the object
searchFile) to retrieve the files and the Latino naming convention is assumed. Theredirectordefined above is also used.
- samples.CombineBaseW(samples, proc, samplelist)[source][source]#
Combine baseW for a given process.
If two samples (different names) enter the same phase space the new baseW will consider the XS and the sum of all
genEventSumwacross all the files.- Parameters:
Notes
Will call
addSampleWeightfor each sample insamplelist.
variables.py#
Defines the variables in variables.py.
Examples#
First define an empty variables dict:
>>> variables = {}
>>> variables['mll'] = {
>>> 'name': 'mll', # variable name
>>> 'range' : (20,80,100), # variable range as (nbins, xmin, xmax)
>>> 'xaxis' : 'm_{ll} [GeV]', # x axis name
>>> 'fold' : 0
>>> }
>>> variables['ptll'] = {
>>> 'name': 'ptll', # variable name
>>> 'range' : ([20, 50, 100, 400], ),
>>> # variable range as (list_of_bin_edges,)
>>> 'xaxis' : 'p^T_{ll} [GeV]', # x axis name
>>> 'fold' : 0
>>> }
>>> variables['ptll_detall'] = {
>>> 'name': 'ptll:detall', # variable name
>>> 'range' : (5, 20, 400, 6, 0, 5),
>>> # variable range as (nbins_x, xmin, xmax, nbins_y, ymin, ymax)
>>> 'xaxis' : 'p^T_{ll}:eta_{ll}', # x axis name
>>> 'fold' : 0
>>> }
>>> variables['ptll_detall'] = {
>>> 'name': 'ptll:detall', # variable name
>>> 'range' : ([20, 50, 100, 400], [0, 0.5, 1.5, 2.5, 5.0],),
>>> # variable range as (list_of_bin_edges_var1,list_of_bin_edges_var2)
>>> 'xaxis' : 'p^T_{ll}:eta_{ll}', # x axis name
>>> 'fold' : 0
>>> }
Save ntuples branches, special column weight is always saved
>>> dnn_branches = {
>>> # single leptons
>>> "ptl1": "Lepton_pt[0]",
>>> "ptl2": "Lepton_pt[1]",
>>> "etal1": "Lepton_eta[0]",
>>> "etal2": "Lepton_eta[1]",
>>> "phil1": "Lepton_phi[0]",
>>> "phil2": "Lepton_phi[1]",
>>> }
>>>
>>> variables['test_ntuples'] = {
>>> 'tree': dnn_branches, # dictionary of branches to be saved
>>> 'cuts': ['sr'] # specify cut after which the events will be saved
>>> }
>>>
nuisances.py#
Defines the nuisances.
Examples#
>>> nuisances = {}
Example of simple lnN (no processing needed)
>>> nuisances['lumi_Uncorrelated'] = {
>>> 'name': 'lumi_13TeV_2018',
>>> 'type': 'lnN',
>>> 'samples': dict((skey, '1.015') for skey in mc if skey not in ['WW', 'top', 'dyll', 'dytt'])
>>> }
Example of suffix nuisance
>>> nuisances['electronpt'] = {
>>> 'name': 'CMS_scale_e_2018',
>>> 'kind': 'suffix',
>>> 'type': 'shape',
>>> 'mapUp': 'ElepTup',
>>> 'mapDown': 'ElepTdo',
>>> 'samples': dict((skey, ['1', '1']) for skey in mc),
>>> 'folderUp': makeMCDirectory('ElepTup_suffix'),
>>> 'folderDown': makeMCDirectory('ElepTdo_suffix'),
>>> 'separator': '_', # used as Nominal + separator + variation
>>> 'AsLnN': '1'
>>> }
Example of weight nuisance
>>> nuisances['eff_e'] = {
>>> 'name': 'CMS_eff_e_2018',
>>> 'kind': 'weight',
>>> 'type': 'shape',
>>> 'samples': dict((skey, ['SFweightEleUp', 'SFweightEleDown']) for skey in mc)
>>> }
jdl_dict.py#
jdl configuration file.
- It includes a jdl dict with
jdl_dict: all the optional variables that will be set in submit.jdl
executable: all the lines that will be written to run.sh
condor_config: all the options passed to condor_submit
Examples#
>>>
>>> import os
>>>
>>> # Get the current directory
>>> current_directory = os.getcwd()
>>>
>>> mkshapesrdf_path = "/code/mkShapesRDF"
>>>
>>>
>>> project_path = current_directory
>>>
>>> proxydir = "/builds/cms-analysis/smp/wpwmjj_polarizations/analysis_code"
>>>
>>> singularity_container = os.getenv("SINGULARITY_CONTAINER")
>>>
>>> jdl_dict = {
>>> "transfer_input_files": f"{proxydir}/myproxy,$(Folder)/script.py, {mkshapesrdf_path}/include/headers.hh, {mkshapesrdf_path}/shapeAnalysis/runner.py, {project_path}/DR_lj.cc, {project_path}/bjets.cc,{project_path}/dnn_LLVsOther.cc,{project_path}/dnn_SigVsBkg.cc,{project_path}/dnn_TTVsOther.cc,{project_path}/m_lj.cc,{project_path}/proxyW.cc,{project_path}/generated_code_dnn_emu_LLVsOther.h,{project_path}/generated_code_dnn_emu_SigVsBkg.h,{project_path}/generated_code_dnn_emu_TTVsOther.h",
>>> "transfer_output_files": "mkShapes__RDF_2017_v9_emu_DNN__ALL__$(Folder).root",
>>> "+SingularityImage": f'"{singularity_container}/"',
>>> "num_retries": "3",
>>> "periodic_remove": "(JobStatus == 2) && (time() - EnteredCurrentStatus) > (0.25 * 3600) || (JobStatus == 1) && (time() - EnteredCurrentStatus) >(0.25 * 3600) || (JobStatus == 5) && (time() - EnteredCurrentStatus) > (0.1 * 3600)",
>>> }
>>>
>>>
>>> executable = [
>>> "#!/bin/bash",
>>> "source /code/start.sh",
>>> "export X509_USER_PROXY=myproxy",
>>> "export X509_CERT_DIR=/cvmfs/cms.cern.ch/grid/etc/grid-security/certificates",
>>> "time python runner.py",
>>> "mv output.root mkShapes__RDF_2017_v9_emu_DNN__ALL__${1}.root",
>>> ]
>>>
>>> condor_config = ["-spool"]