Skip to content

Commit e1f159e

Browse files
committed
[RF][HF] Check for consistent datasets across channels before combining
This makes problems like the one reported in #21066 easier to debug by the user, because there will be a meaningful error message like this instead of a crash: ```txt [#2] FATAL:HistFactory -- ERROR: Inconsistent datasets across channel workspaces. Workspace for channel "SR_3L_0RC_binary_binary_3L_0RC" does not match the datasets in channel "SR_2L_mva_2L_multiclass_ttll". Missing datasets: - obsData All channel workspaces must contain exactly the same datasets. terminate called after throwing an instance of 'RooStats::HistFactory::hf_exc' what(): HistFactory - Exception ``` (cherry picked from commit e37fefd)
1 parent cc087ed commit e1f159e

File tree

1 file changed

+85
-0
lines changed

1 file changed

+85
-0
lines changed

roofit/histfactory/src/HistoToWorkspaceFactoryFast.cxx

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,80 @@ Arg_t &emplace(RooWorkspace &ws, std::string const &name, Args_t &&...args)
313313
return *dynamic_cast<Arg_t *>(ws.arg(name));
314314
}
315315

316+
317+
/// Check whether all channel workspaces contain consistent datasets.
318+
///
319+
/// This function compares the datasets stored in each channel workspace against
320+
/// those in the first workspace.
321+
///
322+
/// \param chs Vector of channel workspaces to compare (first is the reference).
323+
/// \param ch_names Names of the channels, used for error reporting.
324+
/// \param allowedInconsistent Dataset names that are allowed to differ between channels.
325+
///
326+
/// \return A pair consisting of:
327+
/// - bool: true if all channels are consistent (after ignoring allowed datasets),
328+
/// false otherwise.
329+
/// - std::string: empty if consistent; otherwise, a detailed error message
330+
/// describing the inconsistencies.
331+
332+
std::pair<bool, std::string> isChannelDataConsistent(std::vector<std::unique_ptr<RooWorkspace>> const &chs,
333+
std::vector<std::string> const &ch_names,
334+
std::set<std::string> const &allowedInconsistent)
335+
{
336+
// Collect the reference list of dataset names from the first workspace
337+
std::set<std::string> referenceDataNames;
338+
for (RooAbsData *data : chs[0]->allData()) {
339+
referenceDataNames.insert(data->GetName());
340+
}
341+
342+
// Check that all other workspaces have the same datasets
343+
for (std::size_t i = 1; i < chs.size(); ++i) {
344+
std::set<std::string> thisDataNames;
345+
for (RooAbsData *data : chs[i]->allData()) {
346+
thisDataNames.insert(data->GetName());
347+
}
348+
349+
// Find missing and extra datasets in this workspace
350+
std::vector<std::string> missing;
351+
std::vector<std::string> extra;
352+
std::set_difference(referenceDataNames.begin(), referenceDataNames.end(), thisDataNames.begin(),
353+
thisDataNames.end(), std::back_inserter(missing));
354+
std::set_difference(thisDataNames.begin(), thisDataNames.end(), referenceDataNames.begin(),
355+
referenceDataNames.end(), std::back_inserter(extra));
356+
357+
// Remove allowed inconsistencies
358+
auto isAllowed = [&](std::string const &name) { return allowedInconsistent.count(name) != 0; };
359+
360+
missing.erase(std::remove_if(missing.begin(), missing.end(), isAllowed), missing.end());
361+
extra.erase(std::remove_if(extra.begin(), extra.end(), isAllowed), extra.end());
362+
363+
if (!missing.empty() || !extra.empty()) {
364+
std::stringstream errMsg;
365+
errMsg << "ERROR: Inconsistent datasets across channel workspaces.\n"
366+
<< "Workspace for channel \"" << ch_names[i] << "\" does not match "
367+
<< "the datasets in channel \"" << ch_names[0] << "\".\n";
368+
369+
if (!missing.empty()) {
370+
errMsg << " Missing datasets:\n";
371+
for (const auto &name : missing) {
372+
errMsg << " - " << name << "\n";
373+
}
374+
}
375+
376+
if (!extra.empty()) {
377+
errMsg << " Extra datasets:\n";
378+
for (const auto &name : extra) {
379+
errMsg << " - " << name << "\n";
380+
}
381+
}
382+
383+
errMsg << "All channel workspaces must contain exactly the same datasets.\n";
384+
return {false, errMsg.str()};
385+
}
386+
}
387+
return {true, ""};
388+
}
389+
316390
} // namespace
317391

318392
/// Create observables of type RooRealVar. Creates 1 to 3 observables, depending on the type of the histogram.
@@ -1538,6 +1612,17 @@ RooArgList HistoToWorkspaceFactoryFast::createObservables(const TH1 *hist, RooWo
15381612
combined->defineSet("observables",{obsList, channelCat}, /*importMissing=*/true);
15391613
combined_config->SetObservables(*combined->set("observables"));
15401614

1615+
// Check if the channel datasets are consistent
1616+
{
1617+
bool isConsistent = false;
1618+
std::string errMsg;
1619+
std::set<std::string> allowedInconsistent{"asimovData"};
1620+
std::tie(isConsistent, errMsg) = isChannelDataConsistent(chs, ch_names, allowedInconsistent);
1621+
if (!isConsistent) {
1622+
cxcoutFHF << errMsg;
1623+
throw hf_exc();
1624+
}
1625+
}
15411626

15421627
// Now merge the observable datasets across the channels
15431628
for(RooAbsData * data : chs[0]->allData()) {

0 commit comments

Comments
 (0)