Source code for mkShapesRDF.shapeAnalysis.latinos.mkDatacards

#!/usr/bin/env python
import sys

import ROOT

import argparse
import collections
import os.path
import shutil
from textwrap import dedent


ROOT.gROOT.SetBatch(True)

argv = sys.argv
sys.argv = argv[:1]


# ----------------------------------------------------- DatacardFactory --------------------------------------


[docs] class DatacardFactory: # _logger = logging.getLogger('DatacardFactory') # _____________________________________________________________________________ def __init__(self): self._fileIn = None self._skipMissingNuisance = False self.onlyVariables = None self.onlyCuts = None # _____________________________________________________________________________ # a datacard for each "cut" and each "variable" will be produced, in separate sub-folders, names after "cut/variable" # _____________________________________________________________________________
[docs] def makeDatacards( self, inputFile, outputDirDatacard, variables, cuts, samples, structureFile, nuisances, ): print("=======================") print("==== makeDatacards ====") print("=======================") if os.path.isdir(inputFile): # ONLY COMPATIBLE WITH OUTPUTS MERGED TO SAMPLE LEVEL!! self._fileIn = {} for sampleName in samples: self._fileIn[sampleName] = ROOT.TFile.Open( inputFile + "/plots_%s_ALL_%s.root" % (self._tag, sampleName) ) if not self._fileIn[sampleName]: raise RuntimeError( "Input file for sample " + sampleName + " missing" ) else: self._fileIn = ROOT.TFile(inputFile, "READ") ROOT.gROOT.GetListOfFiles().Remove(self._fileIn) # Speed up # categorize the sample names signal_ids = ( collections.OrderedDict() ) # id map of alternative_signals + cumulative_signals alternative_signals = [""] cumulative_signals = [] data = [] background_ids = collections.OrderedDict() # divide the list of samples among signal, background and data isig = 0 ibkg = 1 for sampleName in samples: if sampleName not in list(structureFile.keys()): print("Sample", sampleName, "is not included in structure file") continue if structureFile[sampleName]["isSignal"] != 0: signal_ids[sampleName] = isig isig -= 1 if structureFile[sampleName]["isSignal"] == 2: alternative_signals.append(sampleName) if structureFile[sampleName]["isSignal"] == 1: cumulative_signals.append(sampleName) if structureFile[sampleName]["isData"] == 1: data.append(sampleName) if ( structureFile[sampleName]["isSignal"] == 0 and structureFile[sampleName]["isData"] == 0 ): background_ids[sampleName] = ibkg ibkg += 1 print("Number of Signals: " + str(len(signal_ids))) print("Number of Alternative Signals: " + str(len(alternative_signals) - 1)) print("Number of Cumulative Signals: " + str(len(cumulative_signals))) print("Number of Backgrounds: " + str(len(background_ids))) if not os.path.isdir(outputDirDatacard + "/"): os.makedirs(outputDirDatacard + "/") # loop over cuts. One directory per cut will be created for cutName in cuts: if self.onlyCuts and cutName not in self.onlyCuts: continue print("cut = ", cutName) try: shutil.rmtree(outputDirDatacard + "/" + cutName) except OSError: pass os.makedirs(outputDirDatacard + "/" + cutName) # # prepare the signals and background list of samples # after removing the ones not to be used in this specific phase space # cut_signals = [] cut_backgrounds = [] for snameList, idmap in [ (cut_signals, signal_ids), (cut_backgrounds, background_ids), ]: for sampleName in idmap: if ( "removeFromCuts" in structureFile[sampleName] and cutName in structureFile[sampleName]["removeFromCuts"] ): # remove from the list print(" remove ", sampleName, " from ", cutName) else: snameList.append(sampleName) print(" sampleNames = ", cut_signals + cut_backgrounds) # loop over variables for variableName, variable in variables.items(): if "cuts" in variable and cutName not in variable["cuts"]: continue if self.onlyVariables and variableName not in self.onlyVariables: continue print(" variableName = ", variableName) tagNameToAppearInDatacard = cutName # e.g. hww2l2v_13TeV_of0j # to be defined in cuts.py os.makedirs( outputDirDatacard + "/" + cutName + "/" + variableName, exist_ok=True, ) os.makedirs( outputDirDatacard + "/" + cutName + "/" + variableName + "/shapes/", exist_ok=True, ) # and the folder for the root files self._outFile = ROOT.TFile.Open( outputDirDatacard + "/" + cutName + "/" + variableName + "/shapes/histos_" + tagNameToAppearInDatacard + ".root", "recreate", ) ROOT.gROOT.GetListOfFiles().Remove(self._outFile) # prepare yields yieldsSig = {} yieldsBkg = {} yieldsData = {} # Bin to be killed # killBinSig = {} # killBinBkg = {} for snameList, yields in [ (cut_signals, yieldsSig), (cut_backgrounds, yieldsBkg), (data, yieldsData), ]: for sampleName in snameList: histo = self._getHisto(cutName, variableName, sampleName) histo.SetDirectory(self._outFile) # get the integral == rate from histogram if structureFile[sampleName]["isData"] == 1: yields["data"] = histo.Integral() histo.SetName("histo_Data") else: # if 'removeNegNomVal' in structureFile[sampleName] and structureFile[sampleName]['removeNegNomVal'] : # for iBin in range(0,histo.GetNbinsX()+2) : # BinContent = histo.GetBinContent(iBin) # if BinContent < 0.1 and not BinContent == 0: # print('Killing Bin :' , sampleName , iBin , BinContent) # if not sampleName in killBinSig : killBinSig[sampleName] = [] # killBinSig[sampleName].append(iBin) # histo.SetBinContent(iBin,0.) if "scaleSampleForDatacard" in structure[sampleName]: scaleFactor = 1.0 if ( type( structure[sampleName]["scaleSampleForDatacard"] ) is dict ): try: scaleFactor = structure[sampleName][ "scaleSampleForDatacard" ][cutName] except: pass if ( type( structure[sampleName]["scaleSampleForDatacard"] ) is int or type( structure[sampleName]["scaleSampleForDatacard"] ) is float ): scaleFactor = structure[sampleName][ "scaleSampleForDatacard" ] histo.Scale(scaleFactor) yields[sampleName] = histo.Integral() # # Remove statistical uncertainty from the histogram # This may be used to suppress the effect on "autoMCstat" of a specific # sample, by means of putting its uncertainty to 0 # if ( "removeStatUnc" in structureFile[sampleName] and structureFile[sampleName]["removeStatUnc"] ): self._removeStatUncertainty(histo) self._outFile.cd() histo.Write() # Loop over alternative signal samples. One card per signal (there is always at least one entry ("")) for signalName in alternative_signals: if signalName == "": signals = [ name for name in cumulative_signals if name in cut_signals ] else: if signalName not in cut_signals: continue signals = [signalName] + [ name for name in cumulative_signals if name in cut_signals ] processes = signals + cut_backgrounds print(" Alternative signal: " + str(signalName)) alternativeSignalName = str(signalName) alternativeSignalTitle = "" if alternativeSignalName != "": alternativeSignalTitle = "_" + str(signalName) # start creating the datacard cardPath = ( outputDirDatacard + "/" + cutName + "/" + variableName + "/datacard" + alternativeSignalTitle + ".txt" ) print(" Writing to " + cardPath) card = open(cardPath, "w") print(" headers..") card.write("## Shape input card\n") card.write("imax 1 number of channels\n") card.write("jmax * number of background\n") card.write("kmax * number of nuisance parameters\n") card.write("-" * 100 + "\n") card.write("bin %s" % tagNameToAppearInDatacard + "\n") if len(data) == 0: self._logger.warning("no data, no fun! ") # raise RuntimeError('No Data found!') yieldsData["data"] = 0 card.write("observation %.0f\n" % yieldsData["data"]) card.write( "shapes * * " + "shapes/histos_" + tagNameToAppearInDatacard + ".root" + " histo_$PROCESS histo_$PROCESS_$SYSTEMATIC" + "\n" ) card.write( "shapes data_obs * " + "shapes/histos_" + tagNameToAppearInDatacard + ".root" + " histo_Data" + "\n" ) # shapes * * shapes/hww-19.36fb.mH125.of_vh2j_shape_mll.root histo_$PROCESS histo_$PROCESS_$SYSTEMATIC # shapes data_obs * shapes/hww-19.36fb.mH125.of_vh2j_shape_mll.root histo_Data columndef = 30 # adapt column length to long bin names if len(tagNameToAppearInDatacard) >= (columndef - 5): columndef = len(tagNameToAppearInDatacard) + 7 for name in signals: if len(name) >= (columndef - 5): columndef = len(name) + 7 print(" processes and rates..") card.write("bin".ljust(80)) card.write( "".join( [tagNameToAppearInDatacard.ljust(columndef)] * (len(signals) + len(cut_backgrounds)) ) + "\n" ) card.write("process".ljust(80)) card.write("".join(name.ljust(columndef) for name in signals)) card.write( "".join(name.ljust(columndef) for name in cut_backgrounds) ) card.write("\n") card.write("process".ljust(80)) card.write( "".join( ("%d" % signal_ids[name]).ljust(columndef) for name in signals ) ) card.write( "".join( ("%d" % background_ids[name]).ljust(columndef) for name in cut_backgrounds ) ) card.write("\n") card.write("rate".ljust(80)) card.write( "".join( ("%-.4f" % yieldsSig[name]).ljust(columndef) for name in signals ) ) card.write( "".join( ("%-.4f" % yieldsBkg[name]).ljust(columndef) for name in cut_backgrounds ) ) card.write("\n") card.write("-" * 100 + "\n") print(" nuisances..") nuisanceGroups = collections.defaultdict(list) # add normalization and shape nuisances for nuisanceName, nuisance in nuisances.items(): if "type" not in nuisance: raise RuntimeError( "Nuisance " + nuisanceName + " is missing the type specification" ) if nuisanceName == "stat" or nuisance["type"] == "rateParam": # will deal with these later continue # check if a nuisance can be skipped because not in this particular cut if "cuts" in nuisance and cutName not in nuisance["cuts"]: continue if nuisance["type"] in ["lnN", "lnU"]: # why is adding CMS_ not the default for lnN/lnU? (Y.I. 2019.11.06) entryName = nuisance["name"] if ( "perRecoBin" in nuisance.keys() and nuisance["perRecoBin"] ): entryName += "_" + cutName card.write(entryName.ljust(80 - 20)) if ( "AsShape" in nuisance and float(nuisance["AsShape"]) >= 1.0 ): print( ">>>>>", nuisance["name"], " was derived as a lnN uncertainty but is being treated as a shape", ) card.write(("shape").ljust(20)) for sampleName in processes: if ( "cuts_samples" in nuisance and sampleName in nuisance["cuts_samples"] and cutName not in nuisance["cuts_samples"][sampleName] ): # If the cuts_samples options is there and the sample is inserted in the dictionary # check if the current cutName is included. Excluded the cuts not in the list print( "Removing nuisance ", nuisanceName, " for sample ", sampleName, " from cut ", cutName, ) card.write(("-").ljust(columndef)) elif ( "all" in nuisance and nuisance["all"] == 1 ) or ( "samples" in nuisance and sampleName in nuisance["samples"] ): histo = self._getHisto( cutName, variableName, sampleName ) histoUp = histo.Clone( "%s_%sUp" % (histo.GetName(), nuisance["name"]) ) histoDown = histo.Clone( "%s_%sDown" % (histo.GetName(), nuisance["name"]) ) histoUp.SetDirectory(self._outFile) histoDown.SetDirectory(self._outFile) if ( "scaleSampleForDatacard" in structure[sampleName] ): scaleFactor = 1.0 if ( type( structure[sampleName][ "scaleSampleForDatacard" ] ) is dict ): try: scaleFactor = structure[sampleName][ "scaleSampleForDatacard" ][cutName] except: pass if ( type( structure[sampleName][ "scaleSampleForDatacard" ] ) is int or type( structure[sampleName][ "scaleSampleForDatacard" ] ) is float ): scaleFactor = structure[sampleName][ "scaleSampleForDatacard" ] histoUp.Scale(scaleFactor) histoDown.Scale(scaleFactor) if "/" in nuisance["samples"][sampleName]: up, down = nuisance["samples"][ sampleName ].split("/") histoUp.Scale(float(up)) histoDown.Scale(float(down)) else: histoUp.Scale( float(nuisance["samples"][sampleName]) ) histoDown.Scale( 1.0 / float(nuisance["samples"][sampleName]) ) self._outFile.cd() histoUp.Write() histoDown.Write() card.write("1.000".ljust(columndef)) else: card.write(("-").ljust(columndef)) else: card.write((nuisance["type"]).ljust(20)) if ( "all" in nuisance and nuisance["all"] == 1 ): # for all samples card.write( "".join( ("%-.4f" % nuisance["value"]).ljust( columndef ) for _ in processes ) ) else: # apply only to selected samples for sampleName in processes: if sampleName in nuisance["samples"]: # in this case nuisance['samples'] is a dict mapping sample name to nuisance values in string card.write( ( "%s" % nuisance["samples"][sampleName] ).ljust(columndef) ) else: card.write(("-").ljust(columndef)) elif nuisance["type"] == "shape": # # 'skipCMS' is a flag not to introduce "CMS" automatically in the name # of the nuisance. # It may be needed for combination purposes with ATLAS # and agreed upon for all CMS analyses. # For example: theoretical uncertainties on ggH with migration scheme 2017 # if "skipCMS" in nuisance and nuisance["skipCMS"] == 1: entryName = nuisance["name"] else: entryName = "CMS_" + nuisance["name"] if ( "perRecoBin" in nuisance.keys() and nuisance["perRecoBin"] ): entryName += "_" + cutName card.write(entryName.ljust(80 - 20)) if "AsLnN" in nuisance and float(nuisance["AsLnN"]) >= 1.0: print( ">>>>>", nuisance["name"], " was derived as a shape uncertainty but is being treated as a lnN", ) card.write(("lnN").ljust(20)) for sampleName in processes: if ( "cuts_samples" in nuisance and sampleName in nuisance["cuts_samples"] and cutName not in nuisance["cuts_samples"][sampleName] ): # If the cuts_samples options is there and the sample is inserted in the dictionary # check if the current cutName is included. Excluded the cuts not in the list print( "Removing nuisance ", nuisanceName, " for sample ", sampleName, " from cut ", cutName, ) card.write(("-").ljust(columndef)) elif ( "all" in nuisance and nuisance["all"] == 1 ) or ( "samples" in nuisance and sampleName in nuisance["samples"] ): histo = self._getHisto( cutName, variableName, sampleName ) histoUp = self._getHisto( cutName, variableName, sampleName, "_" + nuisance["name"] + "Up", ) histoDown = self._getHisto( cutName, variableName, sampleName, "_" + nuisance["name"] + "Down", ) if not histoUp or not histoDown: print( "Histogram for nuisance", nuisance["name"], "on sample", sampleName, "missing", ) if self._skipMissingNuisance: card.write(("-").ljust(columndef)) continue histoIntegral = histo.Integral() histoUpIntegral = histoUp.Integral() histoDownIntegral = histoDown.Integral() if ( histoIntegral > 0.0 and histoUpIntegral > 0.0 ): diffUp = ( (histoUpIntegral - histoIntegral) / histoIntegral / float(nuisance["AsLnN"]) ) else: diffUp = 0.0 if ( histoIntegral > 0.0 and histoDownIntegral > 0.0 ): diffDo = ( (histoDownIntegral - histoIntegral) / histoIntegral / float(nuisance["AsLnN"]) ) else: diffDo = 0.0 if ( "symmetrize" in nuisance and nuisance["symmetrize"] ): diff = (diffUp - diffDo) * 0.5 if diff >= 1.0: # can't symmetrize diffUp = diff * 2.0 - 0.999 diffDo = -0.999 elif diff <= -1.0: diffUp = -0.999 diffDo = -diff * 2.0 - 0.999 else: diffUp = diff diffDo = -diff lnNUp = 1.0 + diffUp lnNDo = 1.0 + diffDo # # avoid 0.00/XXX and XXX/0.00 in the lnN --> ill defined nuisance # if this happens put 0.00 ---> 1.00, meaning *no* effect of this nuisance # Done like this because it is very likely what you are experiencing # is a MC fluctuation in the varied sample, that is the up/down histogram has 0 entries! # # NB: with the requirement "histoUpIntegral > 0" and "histoDownIntegral > 0" this should never happen, # except for some strange coincidence of "AsLnN" ... # This fix is left, just for sake of safety # if lnNUp == 0: lnNUp = 1.0 if lnNDo == 0: lnNDo = 1.0 if ( abs(lnNUp - 1.0) < 5.0e-4 and abs(lnNDo - 1.0) < 5.0e-4 ): card.write(("-").ljust(columndef)) else: card.write( ( ("%-.4f" % lnNUp) + "/" + ("%-.4f" % lnNDo) ).ljust(columndef) ) else: card.write(("-").ljust(columndef)) else: card.write("shape".ljust(20)) for sampleName in processes: if ( "cuts_samples" in nuisance and sampleName in nuisance["cuts_samples"] and cutName not in nuisance["cuts_samples"][sampleName] ): # If the cuts_samples options is there and the sample is inserted in the dictionary # check if the current cutName is included. Excluded the cuts not in the list print( "Removing nuisance ", nuisanceName, " for sample ", sampleName, " from cut ", cutName, ) card.write(("-").ljust(columndef)) elif ( "all" in nuisance and nuisance["all"] == 1 ) or ( "samples" in nuisance and sampleName in nuisance["samples"] ): # save the nuisance histograms in the root file if ("skipCMS" in nuisance.keys()) and nuisance[ "skipCMS" ] == 1: suffixOut = None else: suffixOut = "_CMS_" + nuisance["name"] if ( "perRecoBin" in nuisance.keys() and nuisance["perRecoBin"] ): if ( "skipCMS" in nuisance.keys() ) and nuisance["skipCMS"] == 1: suffixOut = "_" + nuisance["name"] else: suffixOut = "_CMS_" + nuisance["name"] suffixOut += "_" + cutName symmetrize = ( "symmetrize" in nuisance and nuisance["symmetrize"] ) saved = self._saveNuisanceHistos( cutName, variableName, sampleName, "_" + nuisance["name"], suffixOut, symmetrize, ) if saved: card.write("1.000".ljust(columndef)) else: # saved can be false if skipMissingNuisance is true and histogram cannot be found card.write(("-").ljust(columndef)) else: card.write(("-").ljust(columndef)) # closing block if type == lnN or shape card.write("\n") if "group" in nuisance: nuisanceGroups[nuisance["group"]].append(entryName) for group in sorted(nuisanceGroups.keys()): card.write( "%s group = %s\n" % (group, " ".join(nuisanceGroups[group])) ) # done with normalization and shape nuisances. # now add the stat nuisances if "stat" in nuisances: nuisance = nuisances["stat"] if nuisance["type"] == "auto": # use autoMCStats feature of combine card.write( "* autoMCStats " + nuisance["maxPoiss"] + " " + nuisance["includeSignal"] ) # nuisance ['maxPoiss'] = Number of threshold events for Poisson modelling # nuisance ['includeSignal'] = Include MC stat nuisances on signal processes (1=True, 0=False) card.write("\n") else: # otherwise we process stat nuisances sample-by-sample for sampleName in processes: if sampleName not in nuisance["samples"]: continue sampleNuisance = nuisance["samples"][sampleName] sampleIndex = processes.index(sampleName) if ( sampleNuisance["typeStat"] == "uni" ): # unified approach # save the nuisance histograms in the root file saved = self._saveNuisanceHistos( cutName, variableName, sampleName, "stat", "_CMS_" + tagNameToAppearInDatacard + "_" + sampleName + "_stat", ) if saved: card.write( ( "CMS_" + tagNameToAppearInDatacard + "_" + sampleName + "_stat" ).ljust(80 - 20) ) card.write((nuisance["type"]).ljust(20)) # write the line in datacard. Only the column for this sample gets 1.000 for idx in range(len(processes)): if idx == sampleIndex: card.write(("1.000").ljust(columndef)) else: card.write(("-").ljust(columndef)) card.write("\n") elif sampleNuisance["typeStat"] == "bbb": # bin-by-bin histoTemplate = self._getHisto( cutName, variableName, sampleName ) for iBin in range(1, histoTemplate.GetNbinsX() + 1): if "correlate" in sampleNuisance: # specify the sample that is source of the variation tag = "_ibin" + sampleName + "_" correlate = list( sampleNuisance["correlate"] ) # list of sample names to correlate bin-by-bin with this sample else: tag = "_ibin_" correlate = [] # save the nuisance histograms in the root file saved = self._saveNuisanceHistos( cutName, variableName, sampleName, tag + str(iBin) + "_stat", "_CMS_" + tagNameToAppearInDatacard + "_" + sampleName + "_ibin_" + str(iBin) + "_stat", ) if not saved: continue for other in list(correlate): saved = self._saveNuisanceHistos( cutName, variableName, other, tag + str(iBin) + "_stat", "_CMS_" + tagNameToAppearInDatacard + "_" + sampleName + "_ibin_" + str(iBin) + "_stat", ) if not saved: correlate.remove(other) card.write( ( "CMS_" + tagNameToAppearInDatacard + "_" + sampleName + "_ibin_" + str(iBin) + "_stat" ).ljust(100 - 20) ) card.write((nuisance["type"]).ljust(20)) # write line in datacard. One line per sample per bin for idx in range(len(processes)): if ( idx == sampleIndex or processes[idx] in correlate ): card.write(("1.000").ljust(columndef)) else: card.write(("-").ljust(columndef)) card.write("\n") # now add the "rateParam" for the normalization # e.g.: z_norm rateParam htsearch zll 1 # see: https://twiki.cern.ch/twiki/bin/view/CMS/HiggsWG/SWGuideNonStandardCombineUses#Rate_Parameters # 'rateParam' has a separate treatment -> it's just a line at the end of the datacard. It defines "free floating" samples # I do it here and not before because I want the freee floating parameters at the end of the datacard for nuisance in nuisances.values(): if nuisance["type"] != "rateParam": continue # check if a nuisance can be skipped because not in this particular cut if "cuts" in nuisance and cutName not in nuisance["cuts"]: continue card.write(nuisance["name"].ljust(80 - 20)) card.write("rateParam".ljust(25)) card.write( tagNameToAppearInDatacard.ljust(columndef) ) # the bin # there can be only 1 sample per rateParam if len(nuisance["samples"]) != 1: raise RuntimeError( "Invalid rateParam: number of samples != 1" ) sampleName, initialValue = list(nuisance["samples"].items())[0] if sampleName not in samples: raise RuntimeError( "Invalid rateParam: unknown sample %s" % sampleName ) card.write(sampleName.ljust(25)) card.write(("%-.4f" % float(initialValue)).ljust(columndef)) card.write("\n") # now add other nuisances # Are there other kind of nuisances I forgot? card.write("-" * 100 + "\n") card.write("\n") card.close() print(" done.") self._outFile.Close() if type(self._fileIn) is dict: for source in self._fileIn.values(): source.Close() else: self._fileIn.Close()
# _____________________________________________________________________________ def _saveNuisanceHistos( self, cutName, variableName, sampleName, suffixIn, suffixOut=None, symmetrize=False, ): histoUp = self._getHisto(cutName, variableName, sampleName, suffixIn + "Up") histoDown = self._getHisto(cutName, variableName, sampleName, suffixIn + "Down") if not histoUp or not histoDown: print( "Up/down histogram for", cutName, variableName, sampleName, suffixIn, "missing", ) if self._skipMissingNuisance: return False # else let ROOT raise if "scaleSampleForDatacard" in structure[sampleName]: scaleFactor = 1.0 if type(structure[sampleName]["scaleSampleForDatacard"]) is dict: try: scaleFactor = structure[sampleName]["scaleSampleForDatacard"][ cutName ] except: pass if ( type(structure[sampleName]["scaleSampleForDatacard"]) is int or type(structure[sampleName]["scaleSampleForDatacard"]) is float ): scaleFactor = structure[sampleName]["scaleSampleForDatacard"] histoUp.Scale(scaleFactor) histoDown.Scale(scaleFactor) if symmetrize: histoNom = self._getHisto(cutName, variableName, sampleName) histoDiff = histoUp.Clone("histoDiff") histoDiff.Add(histoDown, -1.0) histoDiff.Scale(0.5) histoUp.Reset() histoUp.Add(histoNom) histoUp.Add(histoDiff) histoDown.Reset() histoDown.Add(histoNom) histoDown.Add(histoDiff, -1.0) histoUp.SetDirectory(self._outFile) histoDown.SetDirectory(self._outFile) if suffixOut: histoUp.SetName("histo_%s%sUp" % (sampleName, suffixOut)) histoDown.SetName("histo_%s%sDown" % (sampleName, suffixOut)) self._outFile.cd() histoUp.Write() histoDown.Write() return True # _____________________________________________________________________________ def _getHisto(self, cutName, variableName, sampleName, suffix=None): shapeName = "%s/%s/histo_%s" % (cutName, variableName, sampleName) if suffix: shapeName += suffix if type(self._fileIn) is dict: # by-sample ROOT file histo = self._fileIn[sampleName].Get(shapeName) else: # Merged single ROOT file histo = self._fileIn.Get(shapeName) if not histo: print(shapeName, "not found") # if 'met' in variableName.lower() and 'met' in suffix.lower(): # print(variableName, 'does not contain', suffix) # sys.exit() if suffix: print("Getting the nominal instead of varied") histo = self._getHisto(cutName, variableName, sampleName) histo.SetName(shapeName) return histo return histo # _____________________________________________________________________________ def _removeStatUncertainty(self, histo): for iBin in range(0, histo.GetNbinsX() + 2): histo.SetBinError(iBin, 0.0)
[docs] class list_maker: def __init__(self, var, sep=",", __type=None): self._type = __type self._var = var self._sep = sep def __call__(self, option, opt_str, value, parser): if not hasattr(parser.values, self._var): setattr(parser.values, self._var, []) try: array = value.split(self._sep) if self._type: array = [self._type(e) for e in array] setattr(parser.values, self._var, array) except: print("Malformed option (comma separated list expected):", value)
[docs] def defaultParser(): sys.argv = argv parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter ) parser.add_argument( "--outputDirDatacard", dest="outputDirDatacard", help="output directory", default="./datacards", ) parser.add_argument( "--skipMissingNuisance", dest="skipMissingNuisance", help="Don't write nuisance lines when histograms are missing", default=False, action="store_true", ) parser.add_argument( "--onlyVariables", dest="onlyVariables", help="Only make datacards for the provided variables, list of variables comma separated", default="", ) parser.add_argument( "--onlyCuts", dest="onlyCuts", help="Only make datacards for the provided cuts, list of cuts comma separated", default="", ) return parser
[docs] def main(): header = """ -------------------------------------------------------------------------------------------------- __ \ | | \ | | | | _` | __| _` | __| _` | __| _` | |\/ | _` | | / _ \ __| | | ( | | ( | ( ( | | ( | | | ( | < __/ | ____/ \__,_| \__| \__,_| \___| \__,_| _| \__,_| _| _| \__,_| _|\_\ \___| _| -------------------------------------------------------------------------------------------------- """ header = dedent(header) print(header) parser = defaultParser() opt = parser.parse_args() onlyVariables = opt.onlyVariables if onlyVariables == "": onlyVariables = None else: onlyVariables = [s.strip() for s in onlyVariables.split(",")] onlyCuts = opt.onlyCuts if onlyCuts == "": onlyCuts = None else: onlyCuts = [s.strip() for s in onlyCuts.split(",")] global cuts, plot from mkShapesRDF.shapeAnalysis.ConfigLib import ConfigLib configsFolder = "configs" ConfigLib.loadLatestPickle(os.path.abspath(configsFolder), globals()) print(dir()) print(globals().keys()) # unpacking of variables cuts = cuts["cuts"] inputFile = outputFolder + "/" + outputFile ROOT.TH1.SetDefaultSumw2(True) factory = DatacardFactory() factory._tag = tag factory._skipMissingNuisance = opt.skipMissingNuisance factory.onlyCuts = onlyCuts factory.onlyVariables = onlyVariables import mkShapesRDF.shapeAnalysis.latinos.LatinosUtils as utils subsamplesmap = utils.flatten_samples(samples) categoriesmap = utils.flatten_cuts(cuts) utils.update_variables_with_categories(variables, categoriesmap) utils.update_nuisances_with_subsamples(nuisances, subsamplesmap) utils.update_nuisances_with_categories(nuisances, categoriesmap) factory.makeDatacards( inputFile, opt.outputDirDatacard, variables, cuts, samples, structure, nuisances )
if __name__ == "__main__": main()