IMP  2.0.0
The Integrative Modeling Platform
foxs.cpp
1 /**
2  This is the program for SAXS profile computation and fitting.
3  see FOXS for webserver (salilab.org/foxs)
4  */
5 #include <IMP/Model.h>
6 #include <IMP/atom/pdb.h>
7 
8 #include <IMP/saxs/Profile.h>
10 #include <IMP/saxs/ChiScoreLog.h>
13 
14 #include "Gnuplot.h"
15 #include "JmolWriter.h"
16 
17 #include <fstream>
18 #include <vector>
19 #include <string>
20 
21 #include <boost/program_options.hpp>
22 namespace po = boost::program_options;
23 
24 int main(int argc, char **argv)
25 {
26  // output arguments
27  for (int i = 0; i < argc; i++) std::cerr << argv[i] << " ";
28  std::cerr << std::endl;
29 
30  int profile_size = 500;
31  float max_q = 0.5;
32  float background_adjustment_q = 0.0;
33  float excluded_volume_c1 = 0.0;
34  bool use_offset = false;
35  bool fit = true;
36  float MAX_C2 = 4.0; float MIN_C2 = -MAX_C2/2.0;
37  float water_layer_c2 = MAX_C2;
38  bool heavy_atoms_only = true;
39  bool residue_level = false;
40  bool score_log = false;
41  bool interval_chi = false;
42  int multi_model_pdb = 1;
43  po::options_description desc("Options");
44  desc.add_options()
45  ("help", "Any number of input PDBs and profiles is supported. \
46 Each PDB will be fitted against each profile.")
47  ("version", "FoXS (IMP applications)\nCopyright 2007-2013 IMP Inventors.\n\
48 All rights reserved. \nLicense: GNU LGPL version 2.1 or later\n\
49 <http://gnu.org/licenses/lgpl.html>.\n\
50 Written by Dina Schneidman.")
51  ("max_q,q", po::value<float>(&max_q)->default_value(0.5),
52  "maximal q value (default = 0.5)")
53  ("profile_size,s", po::value<int>(&profile_size)->default_value(500),
54  "number of points in the profile (default = 500)")
55  ("water_layer_c2,w",
56  po::value<float>(&water_layer_c2)->default_value(MAX_C2),
57  "set hydration layer density. \
58 Valid range: -2.0 < c2 < 4.0 (default = 0.0)")
59  ("hydrogens,h", "explicitly consider hydrogens in PDB files \
60 (default = false)")
61  ("residues,r", "perform fast coarse grained profile calculation using \
62 CA atoms only (default = false)")
63  ("excluded_volume,e",
64  po::value<float>(&excluded_volume_c1)->default_value(0.0),
65  "excluded volume parameter, enumerated by default. \
66 Valid range: 0.95 < c1 < 1.05")
67  ("background_q,b",
68  po::value<float>(&background_adjustment_q)->default_value(0.0),
69  "background adjustment, not used by default. if enabled, \
70 recommended q value is 0.2")
71  ("offset,o", "use offset in fitting (default = false)")
72  ("multi-model-pdb,m", po::value<int>(&multi_model_pdb)->default_value(1),
73  "1 - read the first MODEL only (default), \
74 2 - read each MODEL into a separate structure, \
75 3 - read all models into a single structure")
76  ("score_log,l", "use log(intensity) in fitting and scoring \
77 (default = false)")
78  ;
79 
80 
81  std::string form_factor_table_file;
82  bool ab_initio = false;
83  bool vacuum = false;
84  bool javascript = false;
85  po::options_description hidden("Hidden options");
86  hidden.add_options()
87  ("input-files", po::value< std::vector<std::string> >(),
88  "input PDB and profile files")
89  ("form_factor_table,f", po::value<std::string>(&form_factor_table_file),
90  "ff table name")
91  ("ab_initio,a", "compute profile for a bead model with \
92 constant form factor (default = false)")
93  ("vacuum,v", "compute profile in vacuum (default = false)")
94  ("javascript,j",
95  "output javascript for browser viewing of the results (default = false)")
96  ("interval_chi,i", "compute chi for intervals (default = false)")
97  ;
98 
99  po::options_description cmdline_options;
100  cmdline_options.add(desc).add(hidden);
101 
102  po::options_description visible("Usage: <pdb_file1> <pdb_file2> \
103 ... <profile_file1> <profile_file2> ... ");
104  visible.add(desc);
105 
106  po::positional_options_description p;
107  p.add("input-files", -1);
108  po::variables_map vm;
109  po::store(po::command_line_parser(argc,
110  argv).options(cmdline_options).positional(p).run(), vm);
111  po::notify(vm);
112 
113  std::vector<std::string> files, pdb_files, dat_files;
114  if(vm.count("input-files")) {
115  files = vm["input-files"].as< std::vector<std::string> >();
116  }
117  if(vm.count("help") || files.size() == 0) {
118  std::cout << visible << "\n";
119  return 0;
120  }
121  if(vm.count("hydrogens")) heavy_atoms_only=false;
122  if(vm.count("residues")) residue_level=true;
123  if(vm.count("offset")) use_offset=true;
124  if(vm.count("score_log")) score_log=true;
125  // no water layer or fitting in ab initio mode for now
126  if(vm.count("ab_initio")) { ab_initio=true; water_layer_c2 = 0.0;
127  fit = false; excluded_volume_c1 = 1.0; }
128  if(vm.count("vacuum")) { vacuum = true; }
129  if(vm.count("javascript")) { javascript = true; }
130  if(vm.count("interval_chi")) { interval_chi = true; }
131 
132  if(multi_model_pdb != 1 && multi_model_pdb != 2 && multi_model_pdb != 3) {
133  std::cerr << "Incorrect option for multi_model_pdb "
134  << multi_model_pdb << std::endl;
135  std::cerr << "Use 1 to read first MODEL only\n"
136  << " 2 to read each MODEL into a separate structure,\n"
137  << " 3 to read all models into a single structure\n";
138  std::cerr << "Default value of 1 is used\n";
139  multi_model_pdb = 1;
140  }
141  float delta_q = max_q / profile_size;
142 
143  bool reciprocal = false;
144  IMP::saxs::FormFactorTable* ft = NULL;
145  if(form_factor_table_file.length() > 0) {
146  //reciprocal space calculation, requires form factor file
147  ft = new IMP::saxs::FormFactorTable(form_factor_table_file,
148  0.0, max_q, delta_q);
149  reciprocal = true;
150  }
151 
152  // 1. read pdbs and profiles, prepare particles
153  IMP::Model *model = new IMP::Model();
154  std::vector<IMP::Particles> particles_vec;
155  std::vector<IMP::saxs::Profile *> exp_profiles;
156  for(unsigned int i=0; i<files.size(); i++) {
157  // check if file exists
158  std::ifstream in_file(files[i].c_str());
159  if(!in_file) {
160  std::cerr << "Can't open file " << files[i] << std::endl; return 1;
161  }
162  // A. try as pdb
163  try {
164  IMP::atom::Hierarchies mhds;
165  IMP::atom::PDBSelector* selector;
166  if(residue_level) // read CA only
167  selector = new IMP::atom::CAlphaPDBSelector();
168  else
169  if(heavy_atoms_only) // read without hydrogens
171  else // read with hydrogens
172  selector = new IMP::atom::NonWaterPDBSelector();
173 
174  if(multi_model_pdb == 2) {
175  mhds = read_multimodel_pdb(files[i], model, selector, true);
176  } else {
177  if(multi_model_pdb == 3) {
179  IMP::atom::read_pdb(files[i], model, selector, false, true);
180  mhds.push_back(mhd);
181  } else {
183  IMP::atom::read_pdb(files[i], model, selector, true, true);
184  mhds.push_back(mhd);
185  }
186  }
187 
188 
189  for(unsigned int h_index=0; h_index<mhds.size(); h_index++) {
190  IMP::ParticlesTemp particles =
191  get_by_type(mhds[h_index], IMP::atom::ATOM_TYPE);
192  if(particles.size() > 0) { // pdb file
193  std::string pdb_id = files[i];
194  if(mhds.size() > 1) {
195  pdb_id = trim_extension(files[i]) + "_m" +
196  std::string(boost::lexical_cast<std::string>(h_index+1)) + ".pdb";
197  }
198  pdb_files.push_back(pdb_id);
199  particles_vec.push_back(IMP::get_as<IMP::Particles>(particles));
200  std::cout << particles.size() << " atoms were read from PDB file "
201  << files[i];
202  if(mhds.size() > 1) std::cout << " MODEL " << h_index+1;
203  std::cout << std::endl;
204 
205  if(water_layer_c2 != 0.0) { // add radius
206  IMP::saxs::FormFactorTable* ft=IMP::saxs::default_form_factor_table();
207  IMP::saxs::FormFactorType ff_type = IMP::saxs::HEAVY_ATOMS;
208  if(residue_level) ff_type = IMP::saxs::CA_ATOMS;
209  if(!heavy_atoms_only) ff_type = IMP::saxs::ALL_ATOMS;
210  for(unsigned int p_index=0; p_index<particles.size(); p_index++) {
211  float radius = ft->get_radius(particles[p_index], ff_type);
212  IMP::core::XYZR::setup_particle(particles[p_index], radius);
213  }
214  }
215  }
216  }
217  } catch(IMP::ValueException e) { // not a pdb file
218  // B. try as dat file
219  IMP::saxs::Profile *profile = new IMP::saxs::Profile(files[i]);
220  if(profile->size() == 0) {
221  std::cerr << "can't parse input file " << files[i] << std::endl;
222  return 1;
223  } else {
224  dat_files.push_back(files[i]);
225  if(background_adjustment_q > 0.0) {
226  profile->background_adjust(background_adjustment_q);
227  }
228  exp_profiles.push_back(profile);
229  std::cout << "Profile read from file " << files[i] << " size = "
230  << profile->size() << std::endl;
231  }
232  }
233  }
234 
235 
236  // 2. compute profiles for input pdbs
237  std::vector<IMP::saxs::Profile *> profiles;
238  std::vector<IMP::saxs::FitParameters> fps;
239  for(unsigned int i=0; i<particles_vec.size(); i++) {
240  // compute surface accessibility
241  IMP::Floats surface_area;
243  if(water_layer_c2 != 0.0) {
244  surface_area =
245  s.get_solvent_accessibility(IMP::core::XYZRs(particles_vec[i]));
246  }
247 
248  // compute profile
249  std::cerr << "Computing profile for " << pdb_files[i]
250  << " " << particles_vec[i].size() << " atoms "<< std::endl;
251  IMP::saxs::Profile *partial_profile =
252  new IMP::saxs::Profile(0.0, max_q, delta_q);
253  if(reciprocal) partial_profile->set_ff_table(ft);
254  if(excluded_volume_c1 == 1.0 && water_layer_c2 == 0.0) fit = false;
255  IMP::saxs::FormFactorType ff_type = IMP::saxs::HEAVY_ATOMS;
256  if(!heavy_atoms_only) ff_type = IMP::saxs::ALL_ATOMS;
257  if(residue_level) ff_type = IMP::saxs::CA_ATOMS;
258  // calculate total volume and average radius
259  IMP::saxs::FormFactorTable* ft = IMP::saxs::default_form_factor_table();
260  float average_radius = 0.0;
261  float volume = 0;
262  for(unsigned int k=0; k<particles_vec[i].size(); k++) {
263  average_radius += ft->get_radius(particles_vec[i][k], ff_type);
264  volume += ft->get_volume(particles_vec[i][k], ff_type);
265  }
266  average_radius /= particles_vec[i].size();
267  partial_profile->set_average_radius(average_radius);
268 
269  if(dat_files.size() == 0 || !fit) { // regular profile, no c1/c2 fitting
270  if(ab_initio) { // bead model, constant form factor
271  partial_profile->
272  calculate_profile_constant_form_factor(particles_vec[i]);
273  } else {
274  if(vacuum) {
275  partial_profile->calculate_profile_partial(particles_vec[i],
276  surface_area, ff_type);
277  partial_profile->sum_partial_profiles(0.0, 0.0, *partial_profile);
278  } else {
279  partial_profile->calculate_profile(particles_vec[i],
280  ff_type, reciprocal);
281  }
282  }
283  } else { // c1/c2 fitting
284  if(reciprocal)
285  partial_profile->calculate_profile_reciprocal_partial(particles_vec[i],
286  surface_area, ff_type);
287  else
288  partial_profile->calculate_profile_partial(particles_vec[i],
289  surface_area, ff_type);
290  }
291  // save the profile
292  profiles.push_back(partial_profile);
293  // write profile file
294  std::string profile_file_name = std::string(pdb_files[i]) + ".dat";
295  partial_profile->add_errors();
296  partial_profile->write_SAXS_file(profile_file_name);
297  Gnuplot::print_profile_script(pdb_files[i]);
298 
299  // 3. fit experimental profiles
300  for(unsigned int j=0; j<dat_files.size(); j++) {
301  IMP::saxs::Profile* exp_saxs_profile = exp_profiles[j];
302 
303  std::string fit_file_name2 = trim_extension(pdb_files[i]) + "_" +
304  trim_extension(basename(const_cast<char *>(dat_files[j].c_str())))
305  + ".dat";
306 
307  float min_c1=0.95; float max_c1=1.05;
308  if(excluded_volume_c1 > 0.0) { min_c1 = max_c1 = excluded_volume_c1; }
309  if(std::fabs(water_layer_c2 - MAX_C2) < 0.00000000001) { // enumerate
310  } else { MIN_C2 = MAX_C2 = water_layer_c2; } // set specific value
311 
312  IMP::saxs::FitParameters fp;
313  if(score_log) {
314  IMP::Pointer<IMP::saxs::ProfileFitter<IMP::saxs::ChiScoreLog> > pf =
315  new IMP::saxs::ProfileFitter<IMP::saxs::ChiScoreLog>(*exp_saxs_profile);
316  fp = pf->fit_profile(*partial_profile,
317  min_c1, max_c1, MIN_C2, MAX_C2,
318  use_offset, fit_file_name2);
319  } else {
320  IMP::Pointer<IMP::saxs::ProfileFitter<IMP::saxs::ChiScore> > pf =
321  new IMP::saxs::ProfileFitter<IMP::saxs::ChiScore>(*exp_saxs_profile);
322  fp = pf->fit_profile(*partial_profile,
323  min_c1, max_c1, MIN_C2, MAX_C2,
324  use_offset, fit_file_name2);
325  if(interval_chi) {
326  std::cout << "interval_chi " <<pdb_files[i] << " "
327  << pf->compute_score(*partial_profile, 0.0, 0.05) << " "
328  << pf->compute_score(*partial_profile, 0.0, 0.1) << " "
329  << pf->compute_score(*partial_profile, 0.0, 0.15) << " "
330  << pf->compute_score(*partial_profile, 0.0, 0.2) << " "
331  << pf->compute_score(*partial_profile) << std::endl;
332  }
333  }
334  std::cout << pdb_files[i] << " " << dat_files[j]
335  << " Chi = " << fp.get_chi()
336  << " c1 = " << fp.get_c1()
337  << " c2 = " << fp.get_c2()
338  << " default chi = " << fp.get_default_chi() << std::endl;
339  fp.set_pdb_file_name(pdb_files[i]);
340  fp.set_profile_file_name(dat_files[j]);
341  fp.set_mol_index(i);
342  Gnuplot::print_fit_script(fp);
343  fps.push_back(fp);
344  }
345  }
346 
347  std::sort(fps.begin(), fps.end(),
348  IMP::saxs::FitParameters::compare_fit_parameters());
349 
350  if(pdb_files.size() > 1) {
351  Gnuplot::print_profile_script(pdb_files);
352  if(dat_files.size() > 0) Gnuplot::print_fit_script(fps);
353  }
354  if(javascript) {
355  if(dat_files.size() > 0) {
356  Gnuplot::print_canvas_script(fps, JmolWriter::MAX_DISPLAY_NUM_);
357  JmolWriter::prepare_jmol_script(fps, particles_vec, "jmoltable");
358  } else {
359  Gnuplot::print_canvas_script(pdb_files, JmolWriter::MAX_DISPLAY_NUM_);
360  JmolWriter::prepare_jmol_script(pdb_files, particles_vec, "jmoltable");
361  }
362  }
363  return 0;
364 }