LHAPDF  6.5.5
PDFSet.h
1 // -*- C++ -*-
2 //
3 // This file is part of LHAPDF
4 // Copyright (C) 2012-2024 The LHAPDF collaboration (see AUTHORS for details)
5 //
6 #pragma once
7 #ifndef LHAPDF_PDFSet_H
8 #define LHAPDF_PDFSet_H
9 
10 #include "LHAPDF/Info.h"
11 #include "LHAPDF/Factories.h"
12 #include "LHAPDF/Version.h"
13 #include "LHAPDF/Config.h"
14 #include "LHAPDF/Utils.h"
15 
16 namespace LHAPDF {
17 
18 
19  /// CL percentage for a Gaussian 1-sigma
20  const double CL1SIGMA = 100*erf(1/sqrt(2));
21 
22 
23  // Forward declaration
24  class PDF;
25 
26 
27  /// @defgroup uncertainties Calculating PDF uncertainties
28  ///
29  /// See the PDFSet class and its PDFSet::uncertainty functions for usage.
30  /// @{
31 
32 
33  /// @brief Structure for storage of uncertainty info calculated over a PDF error set
34  ///
35  /// Used by the PDFSet::uncertainty functions.
36  struct PDFUncertainty {
37  using ErrPairs = std::vector<std::pair<double,double>>;
38 
39  /// Constructor
40  PDFUncertainty(double cent=0, double eplus=0, double eminus=0, double esymm=0, double scalefactor=1,
41  double eplus_pdf=0, double eminus_pdf=0, double esymm_pdf=0,
42  double eplus_par=0, double eminus_par=0, double esymm_par=0)
43  : central(cent), errplus(eplus), errminus(eminus), errsymm(esymm), scale(scalefactor),
44  errplus_pdf(eplus_pdf), errminus_pdf(eminus_pdf), errsymm_pdf(esymm_pdf),
45  errplus_par(eplus_par), errminus_par(eminus_par), errsymm_par(esymm_par)
46  { }
47 
48  /// Variables for the central value, +ve, -ve & symmetrised errors, and a CL scalefactor
49  double central, errplus, errminus, errsymm, scale;
50 
51  /// Variables for separate PDF and parameter variation errors with combined sets
52  double errplus_pdf, errminus_pdf, errsymm_pdf;
53  double errplus_par, errminus_par, errsymm_par;
54  double err_par; ///< @deprecated Remove redundant err_par; use errsymm_par
55 
56  /// Full error-breakdown of all quadrature uncertainty components, as (+,-) pairs
58  };
59 
60 
61 
62  /// @brief Structure encoding the structure of the PDF error-set
63  struct PDFErrInfo {
64  using EnvPart = std::pair<std::string, size_t>;
65  using EnvParts = std::vector<EnvPart>;
66  using QuadParts = std::vector<EnvParts>;
67 
68  /// Constructor
69  PDFErrInfo(QuadParts parts, double cl, const std::string& errtypestr="")
71  { }
72 
73  /// Default constructor (for STL, Cython, etc.)
75 
76  /// Error-set quadrature parts
78 
79  /// Default confidence-level
80  double conflevel;
81 
82  /// Error-type annotation
84 
85  /// Calculated name of a quadrature part
86  std::string coreType() const { return qpartName(0); }
87 
88  /// Calculated name of a quadrature part
90  /// Calculated names of all quadrature parts
92 
93  /// Number of core-set members
94  size_t nmemCore() const;
95  /// Number of par-set members
96  size_t nmemPar() const;
97 
98  };
99 
100  /// @}
101 
102 
103 
104  /// Class for PDF-set metadata and manipulation
105  class PDFSet : public Info {
106  public:
107 
108  /// Default constructor (for container compatibility)
109  /// @todo Remove?
110  PDFSet() { }
111 
112  /// Constructor from a set name
113  /// @todo Remove?
114  PDFSet(const std::string& setname);
115 
116 
117  /// @name PDF set metadata specialisations
118  /// @{
119 
120  /// @brief PDF set name
121  ///
122  /// @note _Not_ taken from the .info metadata file.
123  std::string name() const {
124  return _setname;
125  }
126 
127  /// Description of the set
128  std::string description() const {
129  return get_entry("SetDesc");
130  }
131 
132  /// First LHAPDF global index in this PDF set
133  int lhapdfID() const {
134  return get_entry_as<int>("SetIndex", -1);
135  }
136 
137  /// Version of this PDF set's data files
138  int dataversion() const {
139  return get_entry_as<int>("DataVersion", -1);
140  }
141 
142  /// Get the type of PDF errors in this set (replicas, symmhessian, hessian, custom, etc.)
143  std::string errorType() const {
144  return to_lower(get_entry("ErrorType", "UNKNOWN"));
145  }
146 
147  /// Get the structured decomposition of the error-type string
149 
150  /// @brief Get the confidence level of the Hessian eigenvectors, in percent.
151  ///
152  /// If not defined, assume 1-sigma = erf(1/sqrt(2)) =~ 68.268949% by default,
153  /// unless this is a replica set for which return -1.
154  double errorConfLevel() const;
155 
156  /// Number of members in this set
157  // int numMembers() const {
158  // return get_entry_as<int>("NumMembers");
159  // }
160  size_t size() const {
161  return get_entry_as<unsigned int>("NumMembers");
162  }
163 
164  /// Number of error members in this set
165  ///
166  /// @note Equal to size()-1
167  size_t errorSize() const {
168  return size()-1;
169  }
170  /// Number of error members in this set
171  ///
172  /// @note Alias for preferred/consistent errorSize()
173  size_t errSize() const {
174  return errorSize();
175  }
176 
177  /// @}
178 
179 
180  /// Summary printout
181  void print(std::ostream& os=std::cout, int verbosity=1) const;
182 
183 
184  /// @name Creating PDF members
185  /// @{
186 
187  /// Make the nth PDF member in this set, returned by pointer
188  ///
189  /// @note As with the mkPDF factory method, the PDF pointer returned by this
190  /// method is heap allocated and its memory management is now the
191  /// responsibility of the caller.
192  PDF* mkPDF(size_t member) const {
193  return LHAPDF::mkPDF(name(), member);
194  }
195 
196 
197  /// Make all the PDFs in this set, filling a supplied vector with PDF pointers
198  ///
199  /// This version may be preferred in many circumstances, since it can avoid
200  /// the overhead of creating a new temporary vector.
201  ///
202  /// A vector of *smart* pointers can be used, for any smart pointer type which
203  /// supports construction from a raw pointer argument, e.g. unique_ptr<PDF>(PDF*).
204  ///
205  /// @note The supplied vector will be cleared before filling!
206  ///
207  /// @note As with the mkPDF method and factory function, the PDF pointers
208  /// returned by this method are heap allocated and their memory management
209  /// is now the responsibility of the caller, either manually for raw pointers
210  /// or automatically if smart pointers are used.
211  ///
212  /// @note Use an *appropriate* smart pointer, of course! This depends in
213  /// detail on how you will use the PDF objects (do you want shared or unique
214  /// pointers?), but they also need to be compatible with storage in STL
215  /// containers, e.g. std::unique_ptr or std::shared_ptr but *not* the
216  /// deprecated std::auto_ptr.
217  //
218  /// @todo Needs to be implemented in the header since the arg type is templated.
219  template <typename PTR>
220  void mkPDFs(std::vector<PTR>& pdfs) const {
221  const int v = verbosity();
222  if (v > 0) {
223  std::cout << "LHAPDF " << version() << " loading all " << size() << " PDFs in set " << name() << std::endl;
224  this->print(std::cout, v);
225  if (this->has_key("Note")) std::cout << get_entry("Note") << std::endl;
226  }
227  pdfs.clear();
228  pdfs.reserve(size());
229  if (v < 2) setVerbosity(0); //< Disable every-member printout unless verbosity level is high
230  for (size_t i = 0; i < size(); ++i) {
231  /// @todo Need to use an std::move here, or write differently, for unique_ptr to work?
232  pdfs.push_back( PTR(mkPDF(i)) );
233  }
234  setVerbosity(v);
235  }
236 
237  /// Make all the PDFs in this set, returning as a vector of PDF pointers
238  ///
239  /// @note As with the mkPDF method and factory function, the PDF pointers
240  /// returned by this method are heap allocated and their memory management
241  /// is now the responsibility of the caller.
242  std::vector<PDF*> mkPDFs() const {
243  std::vector<PDF*> rtn;
244  mkPDFs(rtn);
245  return rtn;
246  }
247 
248  /// @todo Use the following with default function template args if C++11 is being used
249  // template <typename PTR=PDF*>
250  template <typename PTR>
251  std::vector<PTR> mkPDFs() const {
252  std::vector<PTR> rtn;
253  mkPDFs(rtn);
254  return rtn;
255  }
256 
257  /// @}
258 
259 
260  /// @todo Add AlphaS getter for set-level alphaS?
261 
262 
263  /// @name Generic metadata cascading mechanism
264  /// @{
265 
266  /// Get the keys defined on this object or higher
267  std::vector<std::string> keys() const {
268  std::vector<std::string> rtn = getConfig().keys();
269  for (const std::string& k : keys_local()) {
270  if (!contains(rtn, k)) rtn.push_back(k);
271  }
272  return rtn;
273  }
274 
275  /// Can this Info object return a value for the given key? (it may be defined non-locally)
276  bool has_key(const std::string& key) const {
277  return has_key_local(key) || getConfig().has_key(key);
278  }
279 
280  /// Retrieve a metadata string by key name
281  const std::string& get_entry(const std::string& key) const {
282  if (has_key_local(key)) return get_entry_local(key); //< value is defined locally
283  return getConfig().get_entry(key); //< fall back to the global config
284  }
285 
286  /// Retrieve a metadata string by key name, with a fallback
287  const std::string& get_entry(const std::string& key, const std::string& fallback) const {
288  return Info::get_entry(key, fallback);
289  }
290 
291  /// @}
292 
293 
294  /// @name PDF set uncertainty functions
295  ///
296  /// See the @ref uncertainties group for more details
297  /// @{
298 
299  /// @brief Calculate the central value and PDF uncertainty on an observable.
300  ///
301  /// @warning The @c values vector corresponds to the members of this PDF set and must be ordered accordingly.
302  ///
303  /// In the Hessian approach, the central value is the best-fit
304  /// "values[0]" and the uncertainty is given by either the symmetric or
305  /// asymmetric formula using eigenvector PDF sets.
306  ///
307  /// If the PDF set is given in the form of replicas, by default, the central value is
308  /// given by the mean and is not necessarily "values[0]" for quantities with a non-linear
309  /// dependence on PDFs, while the uncertainty is given by the standard deviation.
310  ///
311  /// Optional argument @c cl is used to rescale uncertainties to a
312  /// particular confidence level (in percent); a negative number will rescale to the
313  /// default CL for this set.
314  ///
315  /// @note If @c cl is omitted, automatically rescale to normal 1-sigma ~ 68.268949% uncertainties.
316  ///
317  /// If the PDF set is given in the form of replicas, then optional argument
318  /// @c alternative equal to true (default: false) will construct a confidence
319  /// interval from the probability distribution of replicas, with the central
320  /// value given by the median.
321  ///
322  /// For a combined set, a breakdown of the separate PDF and parameter
323  /// variation uncertainties is available. The parameter variation
324  /// uncertainties are computed from the last 2*n members of the set, with n
325  /// the number of parameters.
326  ///
327  /// See the @ref uncertainties group for more details
328  ///
329  /// @todo Add option to restrict replica mean & stddev calculation to a central CI set?
330  PDFUncertainty uncertainty(const std::vector<double>& values,
331  double cl=CL1SIGMA, bool alternative=false) const {
332  PDFUncertainty rtn;
333  uncertainty(rtn, values, cl, alternative);
334  return rtn;
335  }
336 
337 
338  // // Trick to ensure no calls with implicit type conversion
339  // template <typename T1, typename T2>
340  // void uncertainty(const std::vector<double>& values, T1, T2) const = delete;
341 
342  // /// Alternative form allowing the alternative computation with default CL
343  // PDFUncertainty uncertainty(const std::vector<double>& values,
344  // bool alternative, double cl=CL1SIGMA) const {
345  // return uncertainty(values, cl, alternative);
346  // }
347 
348 
349  /// @brief Calculate the PDF uncertainty on an observable (as above), with efficient no-copy return to the @c rtn argument.
350  ///
351  /// @warning The @c values vector corresponds to the members of this PDF set and must be ordered accordingly.
352  ///
353  /// @todo For real efficiency, the chaining of these functions should be the other way around
354  ///
355  /// See the @ref uncertainties group for more details
357  const std::vector<double>& values,
358  double cl=CL1SIGMA, bool alternative=false) const;
359 
360  // // Trick to ensure no calls with implicit type conversion
361  // template <typename T1, typename T2>
362  // void uncertainty(PDFUncertainty& rtn, const std::vector<double>& values, T1, T2) const = delete;
363 
364  // /// Alternative form allowing the alternative computation with default CL
365  // void uncertainty(PDFUncertainty& rtn,
366  // const std::vector<double>& values,
367  // bool alternative, double cl=CL1SIGMA) const {
368  // uncertainty(rtn, values, cl, alternative);
369  // }
370 
371 
372  /// @brief Calculate PDF uncertainties on multiple observables at once.
373  ///
374  /// The @c observables_values nested vector is a list of variation-value
375  /// lists, with the first (outer) index corresponding to the M observables,
376  /// e.g. a set of differential-observable bins, and the inner index the N PDF
377  /// members.
378  ///
379  /// The return value is an M-element vector of PDFUncertainty summaray
380  /// structs, one per observable.
381  ///
382  /// @warning The inner @c _values vector corresponds to the members of this
383  /// PDF set and must be ordered accordingly.
385  double cl=CL1SIGMA, bool alternative=false) const {
386  std::vector<PDFUncertainty> rtn;
387  uncertainties(rtn, observables_values, cl, alternative);
388  return rtn;
389  }
390 
391 
392  /// @brief Calculate multiple PDF uncertainties (as above), with efficient no-copy return to the @c rtn argument.
393  ///
394  /// @warning The inner @c _values vector corresponds to the members of this
395  /// PDF set and must be ordered accordingly.
396  void uncertainties(std::vector<PDFUncertainty>& rtn,
397  const std::vector<std::vector<double>>& observables_values,
398  double cl=CL1SIGMA, bool alternative=false) const;
399 
400 
401  /// @brief Calculate the PDF correlation between @c valuesA and @c valuesB using appropriate formulae for this set.
402  ///
403  /// The correlation can vary between -1 and +1 where values close to {-1,0,+1} mean that the two
404  /// quantities A and B are {anticorrelated,uncorrelated,correlated}, respectively.
405  ///
406  /// For a combined set, the parameter variations are not included in the calculation of the correlation.
407  ///
408  /// See the @ref uncertainties group for more details
409  double correlation(const std::vector<double>& valuesA, const std::vector<double>& valuesB) const;
410 
411  /// @brief Generate a random value from Hessian @c values and Gaussian random numbers.
412  ///
413  /// @note This routine is intended for advanced users!
414  ///
415  /// See Section 6 of G. Watt and R.S. Thorne, JHEP 1208 (2012) 052 [arXiv:1205.4024 [hep-ph]].
416  ///
417  /// Pass a vector @c values containing a value for each member of the Hessian PDF set.
418  /// Pass a vector @c randoms containing neigen random numbers, where neigen is the number of distinct eigenvectors.
419  ///
420  /// Option @c symmetrise equal to true will symmetrise the random values (in the case of an asymmetric Hessian set)
421  /// using a corrected Eq. (6.5) of arXiv:1205.4024v2, so that the average tends to the best-fit for a large number of replicas.
422  ///
423  /// Option @c symmetrise equal to false will use Eq. (6.4) of arXiv:1205.4024v2 (for an asymmetric Hessian set),
424  /// then the average differs from the best-fit. Option @c symmetrise has no effect for a symmetric Hessian set.
425  ///
426  /// Random values generated in this way can subsequently be used for applications such as Bayesian reweighting
427  /// or combining predictions from different groups (as an alternative to taking the envelope).
428  /// See, for example, supplementary material at http://mstwpdf.hepforge.org/random/.
429  ///
430  /// Use of this routine with a non-Hessian PDF set will throw a UserError.
431  ///
432  /// For a combined set, the parameter variations are not included in the generation of the random value.
433  ///
434  /// See the @ref uncertainties group for more details
435  double randomValueFromHessian(const std::vector<double>& values, const std::vector<double>& randoms, bool symmetrise=true) const;
436 
437 
438  /// Check that the type of each member matches the ErrorType of the set.
439  ///
440  /// @todo We need to make the signature clearer -- what is the arg? Why not
441  /// automatically check the members? Why not a plural name? Why not on PDF?
442  /// "Hiding" the name for now with the leading underscore.
443  void _checkPdfType(const std::vector<string>& pdftypes) const;
444 
445  /// @}
446 
447 
448  private:
449 
450  /// Name of this set
452 
453  /// Cached PDF error-info breakdown struct
455 
456  };
457 
458 
459 }
460 #endif