Skip to content

Commit 4852d6e

Browse files
committed
[RF][HF] Check for consistent datasets across channels before combining
This makes problems like the one reported in #21066 easier to debug by the user, because there will be a meaningful error message like this instead of a crash: ```txt [#2] FATAL:HistFactory -- ERROR: Inconsistent datasets across channel workspaces. Workspace for channel "SR_3L_0RC_binary_binary_3L_0RC" does not match the datasets in channel "SR_2L_mva_2L_multiclass_ttll". Missing datasets: - obsData All channel workspaces must contain exactly the same datasets. terminate called after throwing an instance of 'RooStats::HistFactory::hf_exc' what(): HistFactory - Exception ``` (cherry picked from commit e37fefd)
1 parent 0233b3e commit 4852d6e

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

roofit/histfactory/src/HistoToWorkspaceFactoryFast.cxx

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,80 @@ Arg_t &emplace(RooWorkspace &ws, std::string const &name, Args_t &&...args)
317317
return *dynamic_cast<Arg_t *>(ws.arg(name));
318318
}
319319

320+
321+
/// Check whether all channel workspaces contain consistent datasets.
322+
///
323+
/// This function compares the datasets stored in each channel workspace against
324+
/// those in the first workspace.
325+
///
326+
/// \param chs Vector of channel workspaces to compare (first is the reference).
327+
/// \param ch_names Names of the channels, used for error reporting.
328+
/// \param allowedInconsistent Dataset names that are allowed to differ between channels.
329+
///
330+
/// \return A pair consisting of:
331+
/// - bool: true if all channels are consistent (after ignoring allowed datasets),
332+
/// false otherwise.
333+
/// - std::string: empty if consistent; otherwise, a detailed error message
334+
/// describing the inconsistencies.
335+
336+
std::pair<bool, std::string> isChannelDataConsistent(std::vector<std::unique_ptr<RooWorkspace>> const &chs,
337+
std::vector<std::string> const &ch_names,
338+
std::set<std::string> const &allowedInconsistent)
339+
{
340+
// Collect the reference list of dataset names from the first workspace
341+
std::set<std::string> referenceDataNames;
342+
for (RooAbsData *data : chs[0]->allData()) {
343+
referenceDataNames.insert(data->GetName());
344+
}
345+
346+
// Check that all other workspaces have the same datasets
347+
for (std::size_t i = 1; i < chs.size(); ++i) {
348+
std::set<std::string> thisDataNames;
349+
for (RooAbsData *data : chs[i]->allData()) {
350+
thisDataNames.insert(data->GetName());
351+
}
352+
353+
// Find missing and extra datasets in this workspace
354+
std::vector<std::string> missing;
355+
std::vector<std::string> extra;
356+
std::set_difference(referenceDataNames.begin(), referenceDataNames.end(), thisDataNames.begin(),
357+
thisDataNames.end(), std::back_inserter(missing));
358+
std::set_difference(thisDataNames.begin(), thisDataNames.end(), referenceDataNames.begin(),
359+
referenceDataNames.end(), std::back_inserter(extra));
360+
361+
// Remove allowed inconsistencies
362+
auto isAllowed = [&](std::string const &name) { return allowedInconsistent.count(name) != 0; };
363+
364+
missing.erase(std::remove_if(missing.begin(), missing.end(), isAllowed), missing.end());
365+
extra.erase(std::remove_if(extra.begin(), extra.end(), isAllowed), extra.end());
366+
367+
if (!missing.empty() || !extra.empty()) {
368+
std::stringstream errMsg;
369+
errMsg << "ERROR: Inconsistent datasets across channel workspaces.\n"
370+
<< "Workspace for channel \"" << ch_names[i] << "\" does not match "
371+
<< "the datasets in channel \"" << ch_names[0] << "\".\n";
372+
373+
if (!missing.empty()) {
374+
errMsg << " Missing datasets:\n";
375+
for (const auto &name : missing) {
376+
errMsg << " - " << name << "\n";
377+
}
378+
}
379+
380+
if (!extra.empty()) {
381+
errMsg << " Extra datasets:\n";
382+
for (const auto &name : extra) {
383+
errMsg << " - " << name << "\n";
384+
}
385+
}
386+
387+
errMsg << "All channel workspaces must contain exactly the same datasets.\n";
388+
return {false, errMsg.str()};
389+
}
390+
}
391+
return {true, ""};
392+
}
393+
320394
} // namespace
321395

322396
/// Create observables of type RooRealVar. Creates 1 to 3 observables, depending on the type of the histogram.
@@ -1537,6 +1611,17 @@ RooArgList HistoToWorkspaceFactoryFast::createObservables(const TH1 *hist, RooWo
15371611
combined->defineSet("observables",{obsList, channelCat}, /*importMissing=*/true);
15381612
combined_config->SetObservables(*combined->set("observables"));
15391613

1614+
// Check if the channel datasets are consistent
1615+
{
1616+
bool isConsistent = false;
1617+
std::string errMsg;
1618+
std::set<std::string> allowedInconsistent{"asimovData"};
1619+
std::tie(isConsistent, errMsg) = isChannelDataConsistent(chs, ch_names, allowedInconsistent);
1620+
if (!isConsistent) {
1621+
cxcoutFHF << errMsg;
1622+
throw hf_exc();
1623+
}
1624+
}
15401625

15411626
// Now merge the observable datasets across the channels
15421627
for(RooAbsData * data : chs[0]->allData()) {

0 commit comments

Comments
 (0)