IMP logo
IMP Reference Guide  develop.7bfed8c07c,2024/04/27
The Integrative Modeling Platform
ProfileFitter.h
Go to the documentation of this file.
1 /**
2  * \file IMP/saxs/ProfileFitter.h \brief a class for fitting two profiles
3  *
4  * Copyright 2007-2022 IMP Inventors. All rights reserved.
5  *
6  */
7 
8 #ifndef IMPSAXS_PROFILE_FITTER_H
9 #define IMPSAXS_PROFILE_FITTER_H
10 
11 #include "ChiScore.h"
12 #include "FitParameters.h"
13 #include "Profile.h"
14 #include <IMP/Object.h>
15 
16 #include <fstream>
17 
18 IMPSAXS_BEGIN_NAMESPACE
19 
20 //! Fit two profiles with user-defined scoring function as a template parameter.
21 /** By default chi score is used.
22  The scoring function template parameter class has to implement three
23  basic functions: compute_score, compute_scale_factor and compute_offset.
24  see ChiScore for example.
25  Currently four scoring functions are implemented:
26  ChiScore, ChiScoreLog, ChiFreeScore, and RatioVolatilityScore
27  */
28 template <typename ScoringFunctionT = ChiScore>
29 class ProfileFitter : public Object {
30  public:
31  //! Constructor
32  /**
33  \param[in] exp_profile Experimental profile we want to fit
34  */
35  ProfileFitter(const Profile* exp_profile)
36  : Object("ProfileFitter%1%"), exp_profile_(exp_profile) {
37  set_was_used(true);
38  scoring_function_ = new ScoringFunctionT();
39  }
40 
41  ProfileFitter(const Profile* exp_profile, ScoringFunctionT* sf)
42  : Object("ProfileFitter%1%"), exp_profile_(exp_profile) {
43  set_was_used(true);
44  scoring_function_ = sf;
45  }
46 
47  //! compute fit score
48  double compute_score(const Profile* model_profile, bool use_offset = false,
49  const std::string fit_file_name = "") const;
50 
51  //! fit experimental profile through optimization of c1 and c2 parameters
52  /**
53  c1 - adjusts the excluded volume, valid range [0.95 - 1.05]
54  c2 - adjusts the density of hydration layer, valid range [-2.0 - 4.0]
55  \param[in] partial_profile partial profiles computed
56  \param[in] min_c1 minimal c1 value
57  \param[in] max_c1 maximal c1 value
58  \param[in] min_c2 minimal c2 value
59  \param[in] max_c2 maximal c2 value
60  \param[in] use_offset use offset in fitting
61  \param[in] fit_file_name write fit in the given filename,
62  nothing is written if empty
63  \return FitParameters (score, c1, c2)
64  */
65  FitParameters fit_profile(Profile* partial_profile, double min_c1 = 0.95,
66  double max_c1 = 1.05, double min_c2 = -2.0,
67  double max_c2 = 4.0, bool use_offset = false,
68  const std::string fit_file_name = "") const;
69 
70  //! computes the scaling factor needed for fitting the modeled profile
71  // onto the experimental one
72  /** resampling of the modeled profile is required prior to calling
73  to this function, in order to match the q values of the exp. profile.
74  this is not done by default to minimize the number of calls to resample.
75  */
76  double compute_scale_factor(const Profile* model_profile,
77  double offset = 0.0) const {
78  return scoring_function_->compute_scale_factor(exp_profile_, model_profile,
79  offset);
80  }
81 
82  //! computes offset for fitting for fitting the modeled profile
83  // onto the experimental one
84  /** resampling of the modeled profile is required prior to calling
85  to this function, in order to match the q values of the exp. profile.
86  this is not done by default to minimize the number of calls to resample.
87  */
88  double compute_offset(const Profile* model_profile) const {
89  return scoring_function_->compute_offset(exp_profile_, model_profile);
90  }
91 
92  //! resampling of the modeled profile is required to fit the q
93  // values of the computational profile to the experimental one
94  void resample(const Profile* model_profile, Profile* resampled_profile) const;
95 
96  // writes 3 column fit file, given scale factor c, offset and chi square
97  void write_SAXS_fit_file(const std::string& file_name,
98  const Profile* model_profile, const double chi_square,
99  const double c = 1, const double offset = 0) const;
100 
101  //! \return the profile used
102  const Profile *get_profile() const { return exp_profile_; }
103 
104  private:
105  FitParameters search_fit_parameters(Profile* partial_profile, double min_c1,
106  double max_c1, double min_c2, double max_c2,
107  bool use_offset, double old_chi) const;
108 
110  friend class DerivativeCalculator;
111 
112  protected:
113  // experimental saxs profile
114  PointerMember<const Profile> exp_profile_;
115  PointerMember<ScoringFunctionT> scoring_function_;
116 };
117 
118 template <typename ScoringFunctionT>
120  const Profile* model_profile, Profile* resampled_profile) const {
121  model_profile->resample(exp_profile_, resampled_profile);
122 }
123 
124 template <typename ScoringFunctionT>
126  Profile* partial_profile, double min_c1, double max_c1, double min_c2,
127  double max_c2, bool use_offset, double old_chi) const {
128  int c1_cells = 10;
129  int c2_cells = 10;
130  if (old_chi < (std::numeric_limits<double>::max() - 1)) { // second iteration
131  c1_cells = 5;
132  c2_cells = 5;
133  }
134 
135  double delta_c1 = (max_c1 - min_c1) / c1_cells;
136  double delta_c2 = (max_c2 - min_c2) / c2_cells;
137 
138  bool last_c1 = false;
139  bool last_c2 = false;
140  if (delta_c1 < 0.0001) {
141  c1_cells = 1;
142  delta_c1 = max_c1 - min_c1;
143  last_c1 = true;
144  }
145  if (delta_c2 < 0.001) {
146  c2_cells = 1;
147  delta_c2 = max_c2 - min_c2;
148  last_c2 = true;
149  }
150  double best_c1(1.0), best_c2(0.0), best_chi(old_chi);
151  bool best_set = false;
152 
153  double c1(min_c1);
154  for (int i = 0; i <= c1_cells; i++, c1 += delta_c1) {
155  double c2 = min_c2;
156  for (int j = 0; j <= c2_cells; j++, c2 += delta_c2) {
157  partial_profile->sum_partial_profiles(c1, c2);
158  double curr_chi = compute_score(partial_profile, use_offset);
159  if (!best_set || curr_chi < best_chi) {
160  best_set = true;
161  best_chi = curr_chi;
162  best_c1 = c1;
163  best_c2 = c2;
164  }
165  }
166  }
167 
168  if (std::fabs(best_chi - old_chi) > 0.0001 &&
169  (!(last_c1 && last_c2))) { // refine more
170  min_c1 = std::max(best_c1 - delta_c1, min_c1);
171  max_c1 = std::min(best_c1 + delta_c1, max_c1);
172  min_c2 = std::max(best_c2 - delta_c2, min_c2);
173  max_c2 = std::min(best_c2 + delta_c2, max_c2);
174  return search_fit_parameters(partial_profile, min_c1, max_c1, min_c2,
175  max_c2, use_offset, best_chi);
176  }
177  return FitParameters(best_chi, best_c1, best_c2);
178 }
179 
180 template <typename ScoringFunctionT>
182  Profile* partial_profile, double min_c1, double max_c1, double min_c2,
183  double max_c2, bool use_offset, const std::string fit_file_name) const {
184 
185  // compute chi value for default c1/c1 (remove?)
186  double default_c1 = 1.0, default_c2 = 0.0;
187  partial_profile->sum_partial_profiles(default_c1, default_c2);
188  double default_chi = compute_score(partial_profile, use_offset);
189 
190  FitParameters fp =
191  search_fit_parameters(partial_profile, min_c1, max_c1, min_c2, max_c2,
192  use_offset, std::numeric_limits<double>::max());
193  double best_c1 = fp.get_c1();
194  double best_c2 = fp.get_c2();
195  fp.set_default_chi_square(default_chi);
196 
197  // compute a profile for best c1/c2 combination
198  partial_profile->sum_partial_profiles(best_c1, best_c2);
199  compute_score(partial_profile, use_offset, fit_file_name);
200 
201  return fp;
202 }
203 
204 template <typename ScoringFunctionT>
206  const Profile* model_profile, bool use_offset,
207  const std::string fit_file_name) const {
208  IMP_NEW(Profile, resampled_profile,
209  (exp_profile_->get_min_q(), exp_profile_->get_max_q(),
210  exp_profile_->get_delta_q()));
211  model_profile->resample(exp_profile_, resampled_profile);
212 
213  double score = scoring_function_->compute_score(exp_profile_,
214  resampled_profile,
215  use_offset);
216  if (fit_file_name.length() > 0) {
217  double offset = 0.0;
218  if (use_offset)
219  offset =
220  scoring_function_->compute_offset(exp_profile_, resampled_profile);
221  double c = scoring_function_->compute_scale_factor(exp_profile_,
222  resampled_profile,
223  offset);
224  write_SAXS_fit_file(fit_file_name, resampled_profile, score, c, offset);
225  }
226  return score;
227 }
228 
229 template <typename ScoringFunctionT>
231  const std::string& file_name, const Profile* model_profile,
232  const double score, const double c, const double offset) const {
233  std::ofstream out_file(file_name.c_str());
234  if (!out_file) {
235  IMP_THROW("Can't open file " << file_name, IOException);
236  }
237 
238  unsigned int profile_size =
239  std::min(model_profile->size(), exp_profile_->size());
240  // header line
241  out_file.precision(15);
242  out_file << "# SAXS profile: number of points = " << profile_size
243  << ", q_min = " << exp_profile_->get_min_q()
244  << ", q_max = " << exp_profile_->get_max_q();
245  out_file << ", delta_q = " << exp_profile_->get_delta_q() << std::endl;
246 
247  out_file.setf(std::ios::showpoint);
248  out_file << "# offset = " << offset << ", scaling c = " << c
249  << ", Chi^2 = " << score << std::endl;
250  out_file << "# q exp_intensity model_intensity error" << std::endl;
251 
252  out_file.setf(std::ios::fixed, std::ios::floatfield);
253  // Main data
254  for (unsigned int i = 0; i < profile_size; i++) {
255  out_file.setf(std::ios::left);
256  out_file.width(10);
257  out_file.precision(8);
258  out_file << exp_profile_->get_q(i) << " ";
259 
260  out_file.setf(std::ios::left);
261  out_file.width(15);
262  out_file.precision(8);
263  out_file << exp_profile_->get_intensity(i) << " ";
264 
265  out_file.setf(std::ios::left);
266  out_file.width(15);
267  out_file.precision(8);
268  out_file << model_profile->get_intensity(i) * c - offset << " ";
269 
270  out_file.setf(std::ios::left);
271  out_file.width(10);
272  out_file.precision(8);
273  out_file << exp_profile_->get_error(i) << std::endl;
274  }
275  out_file.close();
276 
277  // we are writing to the second file with .fit extension in the different
278  // column order. the goal is to retire .dat extension after some time
279  std::string file_name2 = file_name.substr(0, file_name.length()-4);
280  file_name2 += ".fit";
281  std::ofstream out_file2(file_name2.c_str());
282  if (!out_file2) {
283  IMP_THROW("Can't open file " << file_name2, IOException);
284  }
285 
286  // header line
287  out_file2.precision(15);
288  out_file2 << "# SAXS profile: number of points = " << profile_size
289  << ", q_min = " << exp_profile_->get_min_q()
290  << ", q_max = " << exp_profile_->get_max_q();
291  out_file2 << ", delta_q = " << exp_profile_->get_delta_q() << std::endl;
292 
293  out_file2.setf(std::ios::showpoint);
294  out_file2 << "# offset = " << offset << ", scaling c = " << c
295  << ", Chi^2 = " << score << std::endl;
296  out_file2 << "# q exp_intensity error model_intensity" << std::endl;
297 
298  out_file2.setf(std::ios::fixed, std::ios::floatfield);
299  // Main data
300  for (unsigned int i = 0; i < profile_size; i++) {
301  out_file2.setf(std::ios::left);
302  out_file2.width(10);
303  out_file2.precision(8);
304  out_file2 << exp_profile_->get_q(i) << " ";
305 
306  out_file2.setf(std::ios::left);
307  out_file2.width(15);
308  out_file2.precision(8);
309  out_file2 << exp_profile_->get_intensity(i) << " ";
310 
311  out_file2.setf(std::ios::left);
312  out_file2.width(10);
313  out_file2.precision(8);
314  out_file2 << exp_profile_->get_error(i) << " ";
315 
316  out_file2.setf(std::ios::left);
317  out_file2.width(15);
318  out_file2.precision(8);
319  out_file2 << model_profile->get_intensity(i) * c - offset << std::endl;
320 
321  }
322  out_file2.close();
323 
324 }
325 
326 IMPSAXS_END_NAMESPACE
327 
328 #endif /* IMPSAXS_PROFILE_FITTER_H */
unsigned int size() const
return number of entries in SAXS profile
Definition: Profile.h:181
ProfileFitter(const Profile *exp_profile)
Constructor.
Definition: ProfileFitter.h:35
Basic chi score implementation.
Parameters of a fit, from ProfileFitter.
Definition: FitParameters.h:16
#define IMP_REF_COUNTED_DESTRUCTOR(Name)
Set up destructor for a ref counted object.
#define IMP_NEW(Typename, varname, args)
Declare a ref counted pointer to a new object.
Definition: object_macros.h:74
An input/output exception.
Definition: exception.h:173
Common base class for heavy weight IMP objects.
Definition: Object.h:111
A smart pointer to a ref-counted Object that is a class member.
Definition: Pointer.h:143
Copyright 2007-2022 IMP Inventors. All rights reserved.
double compute_offset(const Profile *model_profile) const
computes offset for fitting for fitting the modeled profile
Definition: ProfileFitter.h:88
#define IMP_THROW(message, exception_name)
Throw an exception with a message.
Definition: check_macros.h:50
A shared base class to help in debugging and things.
void resample(const Profile *exp_profile, Profile *resampled_profile) const
return a profile that is sampled on the q values of the exp_profile
Fit two profiles with user-defined scoring function as a template parameter.
Definition: ProfileFitter.h:29
void sum_partial_profiles(double c1, double c2, bool check_cashed=true)
computes full profile for given fitting parameters
double compute_scale_factor(const Profile *model_profile, double offset=0.0) const
computes the scaling factor needed for fitting the modeled profile
Definition: ProfileFitter.h:76
const Profile * get_profile() const
A class for profile storing and computation.