Skip to content

Commit b06b708

Browse files
committed
first implementation of qcd_from_data
1 parent 83dfbda commit b06b708

1 file changed

Lines changed: 141 additions & 9 deletions

File tree

bin/qcd_from_data

100644100755
Lines changed: 141 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -10,27 +10,159 @@ data_file:
1010
mc_file:
1111
path to MC file, used to normalisation of data histograms
1212
13-
output_file:
14-
path to the output file
13+
subtract_other_samples:
14+
dictionary of (sample, file) to be removed from the template control region (data_file)
15+
16+
histogram_path:
17+
source of histograms. All sub-paths will be considered (recursive)
18+
19+
ignore_subpaths:
20+
sub-paths of histogram_path to be ignored
21+
22+
normalisation_keyword_in_path:
23+
part of the path that determines the normalisation (usually signal) region
24+
25+
shape_keyword_in_path:
26+
part of the path that determines the template (usually control/sideband) region
27+
28+
shape_btag:
29+
b-tag multiplicity to be used for the template region
30+
31+
shape_btag_for_exceptions:
32+
b-tag multiplicity to be used for the template region for exceptions (see shape_btag_exceptions)
1533
16-
data_histograms:
17-
paths to data histograms (from data file)
34+
shape_btag_exceptions:
35+
list of histogram names that have a different b-tag multiplicity for the template region
1836
19-
mc_histograms:
20-
paths to MC histograms (from MC file)
37+
remove_for_shape:
38+
part of the histogram name to be removed fromt the template histogram name.
39+
Useful for re-weighted histograms (mc-only).
40+
41+
output_file:
42+
path to the output file
2143
22-
output_histograms:
23-
paths of the output histograms
44+
Uses the data_file to extract the templates, removes other samples
45+
(subtract_other_samples) and normalises it according to mc_file.
2446
2547
'''
48+
from ROOT import gROOT
49+
gcd = gROOT.cd
50+
from optparse import OptionParser
51+
from tools.file_utilities import write_data_to_JSON, read_data_from_JSON
52+
from tools.ROOT_utililities import root_mkdir, find_btag, get_histogram_dictionary
53+
from tools.hist_utilities import clean_control_region
54+
from rootpy.io import root_open
2655

2756
def main():
2857
print "Welcome to the QCD-from-data merging script"
2958
print 'Please take a seat while the code is being developed.'
3059
print 'Once finished you will be able to create a single file using shapes from data and normalisation from MC'
3160
print 'In the meantime have a look at the script usage'
3261
print
33-
print __doc__
62+
options, input_values_sets, json_input_files = parse_options()
63+
if options.test:
64+
input_values_sets = [setup_test_values()]
65+
json_input_files = ['test.json']
66+
67+
for input_values, json_file in zip(input_values_sets, json_input_files):
68+
print 'Processing', json_file
69+
create_qcd_file(input_values)
70+
71+
def parse_options():
72+
parser = OptionParser( __doc__ )
73+
parser.add_option( "-t", "--test", dest = "test", action = "store_true",
74+
help = "Run with test values and write them to test.json" )
75+
( options, args ) = parser.parse_args()
76+
77+
input_values_sets = []
78+
json_input_files = []
79+
add_set = input_values_sets.append
80+
add_json_file = json_input_files.append
81+
if not options.test:
82+
for arg in args:
83+
input_values = read_data_from_JSON(arg)
84+
add_set(input_values)
85+
add_json_file(arg)
86+
87+
return options, input_values_sets, json_input_files
88+
89+
def create_qcd_file(input_values):
90+
data_file = input_values['data_file']
91+
mc_file = input_values['mc_file']
92+
histogram_path = input_values['histogram_path']
93+
shape_keyword_in_path = input_values['shape_keyword_in_path']
94+
shape_btag = input_values['shape_btag']
95+
shape_btag_for_exceptions = input_values['shape_btag_for_exceptions']
96+
shape_btag_exceptions = input_values['shape_btag_exceptions']
97+
remove_for_shape = input_values['remove_for_shape']
98+
normalisation_keyword_in_path = input_values['normalisation_keyword_in_path']
99+
ignore_subpaths = input_values['ignore_subpaths']
100+
subtract_other_samples = input_values['subtract_other_samples']
101+
output_file = input_values['output_file']
102+
103+
104+
total_histograms = 0
105+
data_file_handle = root_open(data_file)
106+
get_shape_hist = data_file_handle.Get
107+
output = {}
108+
with root_open(mc_file) as f:
109+
for path,_,histograms in f.walk():
110+
ignore_path = False
111+
for subpath in ignore_subpaths:
112+
if subpath in path:
113+
ignore_path = True
114+
if not histogram_path in path or not histograms or ignore_path:
115+
continue
116+
for histogram in histograms:
117+
hist = f.Get(path + '/' + histogram)
118+
normalisation = hist.integral(overflow = True)
119+
shape_path = path.replace(normalisation_keyword_in_path, shape_keyword_in_path)
120+
# now swap the b-tag
121+
current_btag, _ = find_btag(histogram)
122+
is_exception = False
123+
for var in shape_btag_exceptions:
124+
if var in histogram:
125+
is_exception = True
126+
shape_histogram = histogram
127+
for r in remove_for_shape:
128+
shape_histogram = shape_histogram.replace(r, '')
129+
if is_exception:
130+
shape_histogram = shape_histogram.replace(current_btag, shape_btag_for_exceptions)
131+
else:
132+
shape_histogram = shape_histogram.replace(current_btag, shape_btag)
133+
gcd()
134+
output_hist = get_shape_hist(shape_path + '/' + shape_histogram).clone()
135+
other_samples = get_histogram_dictionary(shape_path + '/' + shape_histogram, subtract_other_samples)
136+
subtract_samples = other_samples.keys()
137+
other_samples['data'] = output_hist
138+
output_hist = clean_control_region(other_samples,
139+
subtract = subtract_samples)
140+
# scale the histogram
141+
n_entries_shape = output_hist.integral(overflow = True)
142+
scale_factor = 1
143+
if n_entries_shape > 0:
144+
if normalisation == 0:
145+
# bug fix for empty templates
146+
scale_factor = 1/n_entries_shape
147+
else:
148+
scale_factor = normalisation/n_entries_shape
149+
150+
output_hist.Scale(scale_factor)
151+
output[path + '/' + histogram] = output_hist
152+
total_histograms += len(histograms)
153+
154+
data_file_handle.close()
155+
output_file_handle = root_open(output_file, 'recreate')
156+
# probably faster to use TFileCache within the loop above.
157+
for path_with_hist, histogram in output.iteritems():
158+
histogram_name = path_with_hist.split('/')[-1]
159+
path = path_with_hist.replace('/' + histogram_name, '')
160+
root_mkdir(output_file_handle, path)
161+
output_file_handle.cd(path)
162+
histogram.write(histogram_name)
163+
output_file_handle.cd()
164+
output_file_handle.close()
165+
print 'Processed', total_histograms, 'histograms'
34166

35167
if __name__ == '__main__':
36168
main()

0 commit comments

Comments
 (0)