IsoSpec  2.2.1
marginalTrek++.h
1 /*
2  * Copyright (C) 2015-2020 Mateusz Łącki and Michał Startek.
3  *
4  * This file is part of IsoSpec.
5  *
6  * IsoSpec is free software: you can redistribute it and/or modify
7  * it under the terms of the Simplified ("2-clause") BSD licence.
8  *
9  * IsoSpec is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
12  *
13  * You should have received a copy of the Simplified BSD Licence
14  * along with IsoSpec. If not, see <https://opensource.org/licenses/BSD-2-Clause>.
15  */
16 
17 #pragma once
18 
19 #include <queue>
20 #include <algorithm>
21 #include <vector>
22 #include <functional>
23 #include <utility>
24 #include "conf.h"
25 #include "allocator.h"
26 #include "operators.h"
27 #include "summator.h"
28 #include "pod_vector.h"
29 
30 
31 namespace IsoSpec
32 {
33 
35 
42 class Marginal
43 {
44  private:
45  bool disowned;
46  protected:
47  const unsigned int isotopeNo;
48  const unsigned int atomCnt;
49  const double* const atom_lProbs;
50  const double* const atom_masses;
51  const double loggamma_nominator;
52  Conf mode_conf;
53  double mode_lprob;
56  public:
58 
65  Marginal(
66  const double* _masses, // masses size = logProbs size = isotopeNo
67  const double* _probs,
68  int _isotopeNo,
69  int _atomCnt
70  );
71 
72  // Get rid of the C++ generated assignment constructor.
73  Marginal& operator= (const Marginal& other) = delete;
74 
76  Marginal(const Marginal& other);
77 
79  Marginal(Marginal&& other);
80 
82  virtual ~Marginal();
83 
85 
88  inline int get_isotopeNo() const { return isotopeNo; }
89 
90  inline const double* get_lProbs() const { return atom_lProbs; }
91 
93 
96  double getLightestConfMass() const;
97 
99 
102  double getHeaviestConfMass() const;
103 
105 
109  double getMonoisotopicConfMass() const;
110 
112 
115  inline double getModeMass() { ensureModeConf(); return calc_mass(mode_conf, atom_masses, isotopeNo); }
116 
118 
121  inline double getModeLProb() { ensureModeConf(); return mode_lprob; }
122 
124  inline double fastGetModeLProb() { return mode_lprob; }
125 
127 
130 // inline double getModeProb() const { return exp(getModeLProb()); }
131 
133  Conf computeModeConf() const;
134 
136 
139  inline double getSmallestLProb() const { return atomCnt * *std::min_element(atom_lProbs, atom_lProbs+isotopeNo); }
140 
142 
145  double getAtomAverageMass() const;
146 
148 
151  inline double getTheoreticalAverageMass() const { return getAtomAverageMass() * atomCnt; }
152 
154 
158  protected:
159  ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const { double ret = 0.0; for(size_t ii = 0; ii < isotopeNo; ii++) ret += minuslogFactorial(conf[ii]) + conf[ii] * atom_lProbs[ii]; return ret; }
160  ISOSPEC_FORCE_INLINE double logProb(Conf conf) const { return loggamma_nominator + unnormalized_logProb(conf); }
161  public:
163  double variance() const;
164 
166  double getLogSizeEstimate(double logEllipsoidRadius) const;
167 
168  inline void ensureModeConf() { if (mode_conf == nullptr) setupMode(); }
169  private:
170  void setupMode();
171 };
172 
173 
175 class MarginalTrek : public Marginal
176 {
177  private:
178  int current_count;
179  const ConfOrderMarginal orderMarginal;
180  std::priority_queue<ProbAndConfPtr, pod_vector<ProbAndConfPtr> > pq;
182  Allocator<int> allocator;
183  pod_vector<double> _conf_lprobs;
184  pod_vector<double> _conf_masses;
185  pod_vector<int*> _confs;
186 
187  const double min_lprob;
188  size_t current_bucket;
189  size_t initialized_until;
190 
192  bool add_next_conf();
193 
194  size_t bucket_no(double lprob) { return static_cast<size_t>( (mode_lprob - lprob) * 100.0 ); }
195 
196  public:
198 
202  MarginalTrek(
203  Marginal&& m,
204  int tabSize = 1000,
205  int hashSize = 1000
206  ); // NOLINT(runtime/explicit) - Constructor deliberately left usable as a conversion.
207 
208  MarginalTrek(const MarginalTrek& other) = delete;
209  MarginalTrek& operator=(const MarginalTrek& other) = delete;
210 
212 
218  inline bool probeConfigurationIdx(int idx)
219  {
220  while(current_count <= idx)
221  if(!add_next_conf())
222  return false;
223  return true;
224  }
225 
227 
230  inline double getModeLProb() const { return mode_lprob; }
231 
232 
233  inline const pod_vector<double>& conf_lprobs() const { return _conf_lprobs; }
234  inline const pod_vector<double>& conf_masses() const { return _conf_masses; }
235  inline const pod_vector<Conf>& confs() const { return _confs; }
236 
237 
238  virtual ~MarginalTrek();
239 };
240 
241 
243 
252 {
253  protected:
254  pod_vector<Conf> configurations;
255  Conf* confs;
256  unsigned int no_confs;
257  double* masses;
258  pod_vector<double> lProbs;
259  double* probs;
260  Allocator<int> allocator;
261  public:
263 
271  Marginal&& m,
272  double lCutOff,
273  bool sort = true,
274  int tabSize = 1000,
275  int hashSize = 1000
276  );
277 
278  PrecalculatedMarginal(const PrecalculatedMarginal& other) = delete;
279  PrecalculatedMarginal& operator=(const PrecalculatedMarginal& other) = delete;
280 
282  virtual ~PrecalculatedMarginal();
283 
285 
288  inline bool inRange(unsigned int idx) const { return idx < no_confs; }
289 
291 
295  inline const double& get_lProb(int idx) const { return lProbs[idx]; }
296 
298 
302  inline const double& get_prob(int idx) const { return probs[idx]; }
303 
305 
309  inline const double& get_mass(int idx) const { return masses[idx]; }
310 
312 
315  inline const double* get_lProbs_ptr() const { return lProbs.data(); }
316 
318 
321  inline const double* get_masses_ptr() const { return masses; }
322 
323 
325 
329  inline const Conf& get_conf(int idx) const { return confs[idx]; }
330 
332 
335  inline unsigned int get_no_confs() const { return no_confs; }
336 
338 
341  inline double getModeLProb() const { return mode_lprob; }
342 };
343 
344 
345 
347 
350 class LayeredMarginal : public Marginal
351 {
352  private:
353  double current_threshold;
354  pod_vector<Conf> configurations;
355  pod_vector<Conf> fringe;
356  pod_vector<double> fringe_unn_lprobs;
357  Allocator<int> allocator;
358  const ConfEqual equalizer;
359  const KeyHasher keyHasher;
360  pod_vector<double> lProbs;
361  pod_vector<double> probs;
362  pod_vector<double> masses;
363  double* guarded_lProbs;
364 
365  public:
367 
371  LayeredMarginal(Marginal&& m, int tabSize = 1000, int hashSize = 1000); // NOLINT(runtime/explicit) - constructor deliberately left usable as a conversion
372 
373  LayeredMarginal(const LayeredMarginal& other) = delete;
374  LayeredMarginal& operator=(const LayeredMarginal& other) = delete;
375 
377 
381  bool extend(double new_threshold, bool do_sort = true);
382 
384  inline double get_lProb(int idx) const { return guarded_lProbs[idx]; } // access to idx == -1 is valid and gives a guardian of +inf
385 
387  inline double get_prob(int idx) const { return probs[idx]; }
388 
390  inline double get_mass(int idx) const { return masses[idx]; }
391 
393  inline const double* get_lProbs_ptr() const { return lProbs.data()+1; }
394 
396  inline const Conf& get_conf(int idx) const { return configurations[idx]; }
397 
399  inline unsigned int get_no_confs() const { return configurations.size(); }
400 
402  double get_min_mass() const;
403 
405  double get_max_mass() const;
406 
408 
411  inline double getModeLProb() const { return mode_lprob; }
412 };
413 
414 
415 
416 } // namespace IsoSpec
LayeredMarginal class.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
const double * get_lProbs_ptr() const
get the pointer to lProbs array. Accessing index -1 is legal and returns a guardian of -inf....
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
bool extend(double new_threshold, bool do_sort=true)
Extend the set of computed subisotopologues to those above the new threshold.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues, see details in PrecalculatedMarginal::get_no_confs.
const Conf & get_conf(int idx) const
get the counts of isotopes that define the subisotopologue, see details in PrecalculatedMarginal::get...
double get_max_mass() const
Get the maximal mass in current layer.
double get_min_mass() const
Get the minimal mass in current layer.
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
LayeredMarginal(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
The marginal distribution class (a subisotopologue).
int get_isotopeNo() const
Get the number of isotopes of the investigated element.
Conf computeModeConf() const
The the probability of the mode subisotopologue.
double getSmallestLProb() const
The the log-probability of the lightest subisotopologue.
Marginal(const double *_masses, const double *_probs, int _isotopeNo, int _atomCnt)
Class constructor.
const unsigned int atomCnt
double fastGetModeLProb()
Get the log-probability of the mode subisotopologue. Results undefined if ensureModeConf() wasn't cal...
double getModeLProb()
Get the log-probability of the mode subisotopologue.
double getModeMass()
The the mass of the mode subisotopologue.
double getLightestConfMass() const
Get the mass of the lightest subisotopologue.
const unsigned int isotopeNo
const double *const atom_masses
double variance() const
Calculate the variance of the theoretical distribution describing the subisotopologue.
ISOSPEC_FORCE_INLINE double unnormalized_logProb(Conf conf) const
Calculate the log-probability of a given subisotopologue.
const double loggamma_nominator
double getHeaviestConfMass() const
Get the mass of the heaviest subisotopologue.
double getAtomAverageMass() const
The average mass of a single atom.
double getMonoisotopicConfMass() const
Get the mass of the monoisotopic subisotopologue.
double getTheoreticalAverageMass() const
The theoretical average mass of the molecule.
virtual ~Marginal()
Destructor.
double getLogSizeEstimate(double logEllipsoidRadius) const
Return estimated logarithm of size of the marginal at a given ellipsoid radius.
const double *const atom_lProbs
The marginal distribution class (a subisotopologue).
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
bool probeConfigurationIdx(int idx)
Check if the table of computed subisotopologues does not have to be extended.
MarginalTrek(Marginal &&m, int tabSize=1000, int hashSize=1000)
Move constructor: specializes the Marginal class.
Precalculated Marginal class.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
unsigned int get_no_confs() const
Get the number of precomputed subisotopologues.
const Conf & get_conf(int idx) const
Get the counts of isotopes that define the subisotopologue.
virtual ~PrecalculatedMarginal()
Destructor.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
bool inRange(unsigned int idx) const
Is there a subisotopologue with a given number?
const double * get_masses_ptr() const
Get the table of the masses of subisotopologues.
PrecalculatedMarginal(Marginal &&m, double lCutOff, bool sort=true, int tabSize=1000, int hashSize=1000)
The move constructor (disowns the Marginal).
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.
double getModeLProb() const
Get the log-probability of the mode subisotopologue.
const double * get_lProbs_ptr() const
Get the table of the log-probabilities of subisotopologues.