IMP  2.0.0
The Integrative Modeling Platform
ProfileFitter.h
Go to the documentation of this file.
1 /**
2  * \file IMP/saxs/ProfileFitter.h \brief a class for fitting two profiles
3  *
4  * Copyright 2007-2013 IMP Inventors. All rights reserved.
5  *
6  */
7 
8 #ifndef IMPSAXS_PROFILE_FITTER_H
9 #define IMPSAXS_PROFILE_FITTER_H
10 
11 #include "ChiScore.h"
12 #include "FitParameters.h"
13 #include "Profile.h"
14 
15 #include <fstream>
16 
17 IMPSAXS_BEGIN_NAMESPACE
18 
19 /** \name ProfileFitter
20 
21  ProfileFitter is a class for fitting the two profiles with user-defined
22  scoring function that is a template parameter. By default chi score is used.
23  The scoring function template parameter class has to implement three
24  basic functions: compute_score, compute_scale_factor and compute_offset.
25  see ChiScore for example.
26  Currently two scoring functions are implemented: ChiScore and LogChiScore.
27  */
28 template<class ScoringFunctionT = ChiScore>
29 class ProfileFitter: public base::RefCounted {
30 public:
31  //! Constructor
32  /**
33  \param[in] exp_profile Experimental profile we want to fit
34  */
35  ProfileFitter(const Profile& exp_profile): exp_profile_(exp_profile) {
36  scoring_function_ = new ScoringFunctionT();
37  }
38 
39  //! compute fit score
40  Float compute_score(const Profile& model_profile,
41  bool use_offset = false,
42  const std::string fit_file_name = "") const;
43 
44  //! compute fit score in the interval
45  Float compute_score(const Profile& model_profile,
46  Float min_q, Float max_q) const;
47 
48  //! fit experimental profile through optimization of c1 and c2 parameters
49  /**
50  \param[in] partial_profile partial profiles computed
51  \param[in] min/max c1, min/max c2 - search range for c1 and c2
52  c1 - adjusts the excluded volume, valid range [0.95 - 1.05]
53  c2 - adjusts the density of hydration layer, valid range [-2.0 - 4.0]
54  \return FitParameters (score, c1, c2)
55  */
56  FitParameters fit_profile(Profile& partial_profile,
57  float min_c1=0.95, float max_c1=1.05,
58  float min_c2=-2.0, float max_c2=4.0,
59  bool use_offset = false,
60  const std::string fit_file_name = "") const;
61 
62  //! computes the scaling factor needed for fitting the modeled profile
63  // onto the experimental one
64  /** resampling of the modeled profile is required prior to calling
65  to this function, in order to match the q values of the exp. profile.
66  this is not done by default to minimize the number of calls to resample.
67  */
68  Float compute_scale_factor(const Profile& model_profile,
69  Float offset = 0.0) const {
70  return scoring_function_->compute_scale_factor(exp_profile_, model_profile,
71  offset);
72  }
73 
74  //! computes offset for fitting for fitting the modeled profile
75  // onto the experimental one
76  /** resampling of the modeled profile is required prior to calling
77  to this function, in order to match the q values of the exp. profile.
78  this is not done by default to minimize the number of calls to resample.
79  */
80  Float compute_offset(const Profile& model_profile) const {
81  return scoring_function_->compute_offset(exp_profile_, model_profile);
82  }
83 
84  //! resampling of the modeled profile is required to fit the q
85  // values of the computational profile to the experimental one
86  void resample(const Profile& model_profile, Profile& resampled_profile) const;
87 
88  private:
89 
90  FitParameters search_fit_parameters(Profile& partial_profile,
91  float min_c1, float max_c1,
92  float min_c2, float max_c2,
93  bool use_offset, float old_chi) const;
94 
95  // writes 3 column fit file, given scale factor c, offset and chi square
96  void write_SAXS_fit_file(const std::string& file_name,
97  const Profile& model_profile,
98  const Float chi_square,
99  const Float c=1, const Float offset=0) const;
100 
101  IMP_REF_COUNTED_DESTRUCTOR(ProfileFitter);
102  friend class DerivativeCalculator;
103 
104  private:
105  const Profile exp_profile_; // experimental saxs profile
106  ScoringFunctionT* scoring_function_;
107 };
108 
109 template<class ScoringFunctionT>
110 void ProfileFitter<ScoringFunctionT>::resample(const Profile& model_profile,
111  Profile& resampled_profile) const
112 {
113  // map of q values for fast search
114  std::map<float, unsigned int> q_mapping;
115  for (unsigned int k=0; k<model_profile.size(); k++) {
116  q_mapping[model_profile.get_q(k)] = k;
117  }
118 
119  for (unsigned int k=0; k<exp_profile_.size(); k++) {
120  Float q = exp_profile_.get_q(k);
121  if(q>model_profile.get_max_q()) break;
122  std::map<float, unsigned int>::iterator it = q_mapping.lower_bound(q);
123  if(it == q_mapping.end()) break;
124  unsigned int i = it->second;
125  if(i == 0) {
126  resampled_profile.add_entry(q, model_profile.get_intensity(i));
127  } else {
128  Float delta_q = model_profile.get_q(i)-model_profile.get_q(i-1);
129  if(delta_q <= 1.0e-16) {
130  resampled_profile.add_entry(q, model_profile.get_intensity(i));
131  } else {
132  Float alpha = (q - model_profile.get_q(i-1)) / delta_q;
133  if(alpha > 1.0) alpha = 1.0; // handle rounding errors
134  Float intensity = model_profile.get_intensity(i-1)
135  + (alpha)*(model_profile.get_intensity(i)
136  - model_profile.get_intensity(i-1));
137  resampled_profile.add_entry(q, intensity);
138  }
139  }
140  }
141 }
142 
143 template<class ScoringFunctionT>
144 FitParameters ProfileFitter<ScoringFunctionT>::search_fit_parameters(
145  Profile& partial_profile,
146  float min_c1, float max_c1,
147  float min_c2, float max_c2,
148  bool use_offset, float old_chi) const
149 {
150  int c1_cells = 10;
151  int c2_cells = 10;
152  if(old_chi < (std::numeric_limits<float>::max()-1)) { // second iteration
153  c1_cells = 5;
154  c2_cells = 5;
155  }
156 
157  float delta_c1 = (max_c1-min_c1)/c1_cells;
158  float delta_c2 = (max_c2-min_c2)/c2_cells;
159 
160  bool last_c1 = false;
161  bool last_c2 = false;
162  if(delta_c1 < 0.0001) { c1_cells=1; delta_c1 = max_c1-min_c1; last_c1=true; }
163  if(delta_c2 < 0.001) { c2_cells=1; delta_c2 = max_c2-min_c2; last_c2=true; }
164  float best_c1(1.0), best_c2(0.0), best_chi(old_chi);
165  bool best_set = false;
166 
167  float c1(min_c1);
168  for(int i=0; i<=c1_cells; i++, c1+= delta_c1) {
169  float c2 = min_c2;
170  for(int j=0; j<=c2_cells; j++, c2+= delta_c2) {
171  partial_profile.sum_partial_profiles(c1, c2, partial_profile);
172  float curr_chi = compute_score(partial_profile, use_offset);
173  if(!best_set || curr_chi < best_chi) {
174  best_set = true;
175  best_chi = curr_chi;
176  best_c1 = c1;
177  best_c2 = c2;
178  }
179  }
180  }
181 
182  if(std::fabs(best_chi-old_chi) > 0.0001 &&
183  (!(last_c1 && last_c2))) { //refine more
184  min_c1 = std::max(best_c1-delta_c1, min_c1);
185  max_c1 = std::min(best_c1+delta_c1, max_c1);
186  min_c2 = std::max(best_c2-delta_c2, min_c2);
187  max_c2 = std::min(best_c2+delta_c2, max_c2);
188  return search_fit_parameters(partial_profile,
189  min_c1, max_c1, min_c2, max_c2,
190  use_offset, best_chi);
191  }
192  return FitParameters(best_chi, best_c1, best_c2);
193 }
194 
195 template<class ScoringFunctionT>
196 FitParameters ProfileFitter<ScoringFunctionT>::fit_profile(
197  Profile& partial_profile,
198  float min_c1, float max_c1,
199  float min_c2, float max_c2,
200  bool use_offset,
201  const std::string fit_file_name) const {
202 
203  // compute chi value for default c1/c1 (remove?)
204  float default_c1 = 1.0, default_c2 = 0.0;
205  partial_profile.sum_partial_profiles(default_c1, default_c2, partial_profile);
206  float default_chi = compute_score(partial_profile, use_offset);
207 
208  FitParameters fp = search_fit_parameters(partial_profile,
209  min_c1, max_c1, min_c2, max_c2,
210  use_offset,
211  std::numeric_limits<float>::max());
212  float best_c1 = fp.get_c1();
213  float best_c2 = fp.get_c2();
214  fp.set_default_chi(default_chi);
215 
216  // compute a profile for best c1/c2 combination
217  partial_profile.sum_partial_profiles(best_c1, best_c2, partial_profile);
218  compute_score(partial_profile, use_offset, fit_file_name);
219 
220  // std::cout << " Chi = " << best_chi << " c1 = " << best_c1 << " c2 = "
221  // << best_c2 << " default chi = " << default_chi << std::endl;
222  return fp;
223 }
224 
225 template<class ScoringFunctionT>
226 Float ProfileFitter<ScoringFunctionT>::compute_score(
227  const Profile& model_profile,
228  Float min_q, Float max_q) const
229 {
230  Profile resampled_profile(exp_profile_.get_min_q(),
231  exp_profile_.get_max_q(),
232  exp_profile_.get_delta_q());
233  resample(model_profile, resampled_profile);
234 
235  Float score = scoring_function_->compute_score(exp_profile_,
236  resampled_profile,
237  min_q, max_q);
238  return score;
239 }
240 
241 template<class ScoringFunctionT>
242 Float ProfileFitter<ScoringFunctionT>::compute_score(
243  const Profile& model_profile,
244  bool use_offset,
245  const std::string fit_file_name) const
246 {
247  Profile resampled_profile(exp_profile_.get_min_q(),
248  exp_profile_.get_max_q(),
249  exp_profile_.get_delta_q());
250  resample(model_profile, resampled_profile);
251 
252  Float score = scoring_function_->compute_score(exp_profile_,
253  resampled_profile, use_offset);
254 
255  if(fit_file_name.length() > 0) {
256  Float offset = 0.0;
257  if(use_offset)
258  offset=scoring_function_->compute_offset(exp_profile_, resampled_profile);
259  Float c= scoring_function_->compute_scale_factor(exp_profile_,
260  resampled_profile, offset);
261  write_SAXS_fit_file(fit_file_name, resampled_profile, score, c, offset);
262  }
263  return score;
264 }
265 
266 template<class ScoringFunctionT>
267 void ProfileFitter<ScoringFunctionT>::write_SAXS_fit_file(
268  const std::string& file_name,
269  const Profile& model_profile,
270  const Float score,
271  const Float c,
272  const Float offset) const {
273  std::ofstream out_file(file_name.c_str());
274  if (!out_file) {
275  IMP_THROW("Can't open file " << file_name,
276  IOException);
277  }
278 
279  unsigned int profile_size = std::min(model_profile.size(),
280  exp_profile_.size());
281  // header line
282  out_file.precision(15);
283  out_file << "# SAXS profile: number of points = " << profile_size
284  << ", q_min = " << exp_profile_.get_min_q()
285  << ", q_max = " << exp_profile_.get_max_q();
286  out_file << ", delta_q = " << exp_profile_.get_delta_q() << std::endl;
287 
288  out_file.setf(std::ios::showpoint);
289  out_file << "# offset = " << offset << ", scaling c = " << c
290  << ", Chi = " << score << std::endl;
291  out_file << "# q exp_intensity model_intensity"
292  << std::endl;
293 
294  out_file.setf(std::ios::fixed, std::ios::floatfield);
295  // Main data
296  for (unsigned int i = 0; i < profile_size; i++) {
297  out_file.setf(std::ios::left);
298  out_file.width(10);
299  out_file.precision(5);
300  out_file << exp_profile_.get_q(i) << " ";
301 
302  out_file.setf(std::ios::left);
303  out_file.width(15);
304  out_file.precision(8);
305  out_file << exp_profile_.get_intensity(i) << " ";
306 
307  out_file.setf(std::ios::left);
308  out_file.width(15);
309  out_file.precision(8);
310  out_file << model_profile.get_intensity(i)*c - offset << std::endl;
311  }
312  out_file.close();
313 }
314 
315 IMPSAXS_END_NAMESPACE
316 
317 #endif /* IMPSAXS_PROFILE_FITTER_H */