IMP  2.0.0
The Integrative Modeling Platform
pdb.h
Go to the documentation of this file.
1 /**
2  * \file IMP/atom/pdb.h
3  * \brief Functions to read pdbs
4  *
5  * Copyright 2007-2013 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPATOM_PDB_H
10 #define IMPATOM_PDB_H
11 
12 #include <IMP/atom/atom_config.h>
13 #include "Hierarchy.h"
14 #include "Atom.h"
15 #include "element.h"
16 #include "internal/pdb.h"
17 #include "atom_macros.h"
18 #include <IMP/file.h>
19 #include "Selection.h"
20 #include <IMP/Model.h>
21 #include <IMP/Particle.h>
22 #include <IMP/OptimizerState.h>
23 #include <IMP/internal/utility.h>
24 #include <boost/format.hpp>
25 
26 IMPATOM_BEGIN_NAMESPACE
27 
28 
29 //! Select which atoms to read from a PDB file
30 /** Selector is a general purpose class used to select records from a PDB
31  file. Using descendants of this class one may implement arbitrary
32  selection functions with operator() and pass them to PDB reading functions
33  for object selection. Simple selectors can be used to build more complicated
34  ones. Inheritence means "AND" unless otherwise noted (that is, the
35  CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
36  NonAlternativePDBSelector).
37 
38  \see read_pdb
39 */
40 class IMPATOMEXPORT PDBSelector: public IMP::base::Object {
41  public:
42  PDBSelector(std::string name): Object(name){}
43  //! Return true if the line should be processed
44  virtual bool get_is_selected(const std::string& pdb_line) const=0;
45  virtual ~PDBSelector();
46 };
47 
49 
50 //! Select all ATOM and HETATM records which are not alternatives
52  public:
54  return (internal::atom_alt_loc_indicator(pdb_line) == ' '
55  || internal::atom_alt_loc_indicator(pdb_line)
56  == 'A'),out << "");
57 };
58 
59 //! Select all non-alternative ATOM records
61 public:
63  return NonAlternativePDBSelector::get_is_selected(pdb_line)
64  && internal::is_ATOM_rec(pdb_line),out << "");
65 };
66 
67 
68 //! Select all CA ATOM records
70  public:
72  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
73  return false;
74  }
75  const std::string type = internal::atom_type(pdb_line);
76  return (type[1] == 'C' && type[2] == 'A'
77  && type[3] == ' '),out << "");
78 };
79 
80 //! Select all CB ATOM records
82  public:
84  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)){
85  return false;
86  }
87  const std::string type = internal::atom_type(pdb_line);
88  return (type[1] == 'C' && type[2] == 'B'
89  && type[3] == ' '),out << "");
90 };
91 
92 //! Select all C (not CA or CB) ATOM records
94  public:
96  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
97  return false;
98  }
99  const std::string type = internal::atom_type(pdb_line);
100  return (type[1] == 'C' && type[2] == ' ' && type[3] == ' '),
101  out << ""
102  );
103 };
104 
105 //! Select all N ATOM records
107  public:
109  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
110  return false;
111  }
112  const std::string type = internal::atom_type(pdb_line);
113  return (type[1] == 'N' && type[2] == ' ' && type[3] == ' '),
114  out << ""
115  );
116 };
117 
118 //! Defines a selector that will pick every ATOM and HETATM record
119 class AllPDBSelector : public PDBSelector {
120 public:
122  return true || pdb_line.empty(),
123  out << "");
124 };
125 
126 //! Select all ATOM and HETATMrecords with the given chain ids
128  public:
129  bool get_is_selected(const std::string &pdb_line) const {
130  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
131  return false;
132  }
133  for(int i=0; i < (int)chains_.length(); i++) {
134  if(internal::atom_chain_id(pdb_line) == chains_[i])
135  return true;
136  }
137  return false;
138  }
139  IMP_OBJECT_INLINE(ChainPDBSelector,out << chains_,);
140  //! The chain id can be any character in chains
141  ChainPDBSelector(const std::string &chains,
142  std::string name="ChainPDBSelector%1%"):
143  NonAlternativePDBSelector(name), chains_(chains) {}
144  private:
145  std::string chains_;
146 };
147 
148 //! Select all non-water ATOM and HETATMrecords
150  public:
152  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
153  return false;
154  }
155  const std::string res_name
156  = internal::atom_residue_name(pdb_line);
157  return ((res_name[0]=='H' && res_name[1] =='O'
158  && res_name[2]=='H') ||
159  (res_name[0]=='D' && res_name[1] =='O'
160  && res_name[2]=='D')),
161  out << ""
162  );
163 };
164 
165 //! Select all hydrogen ATOM and HETATM records
166 class IMPATOMEXPORT HydrogenPDBSelector : public NonAlternativePDBSelector {
167  bool is_hydrogen(std::string pdb_line) const;
168  public:
170  return is_hydrogen(pdb_line);,
171  out << "");
172 };
173 
174 //! Select non water and non hydrogen atoms
176  IMP::OwnerPointer<PDBSelector> ws_, hs_;
177  public:
178  bool get_is_selected(const std::string &pdb_line) const {
179  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
180  return false;
181  }
182  return (! ws_->get_is_selected(pdb_line)
183  && ! hs_->get_is_selected(pdb_line));
184  }
186  NonWaterNonHydrogenPDBSelector(std::string name):
188  ws_(new WaterPDBSelector()),
189  hs_(new HydrogenPDBSelector()){}
190  NonWaterNonHydrogenPDBSelector():
191  NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
192  ws_(new WaterPDBSelector()),
193  hs_(new HydrogenPDBSelector()){}
194 };
195 
196 //! Select all non-water non-alternative ATOM and HETATM records
198  IMP::OwnerPointer<PDBSelector> ws_;
199  public:
200  bool get_is_selected(const std::string &pdb_line) const {
201  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
202  return false;
203  }
204  return( ! ws_->get_is_selected(pdb_line));
205  }
207  NonWaterPDBSelector(std::string name): NonAlternativePDBSelector(name),
208  ws_(new WaterPDBSelector()){}
209  NonWaterPDBSelector(): NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
210  ws_(new WaterPDBSelector()){}
211 };
212 
213 //! Select all P ATOM records
215  public:
218  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
219  return false;
220  }
221  const std::string type = internal::atom_type(pdb_line);
222  return (type[1] == 'P' && type[2] == ' '),
223  out << "");
224 };
225 
226 // these do not work in python as the wrapped selectors get cleaned up
227 //! Select atoms which are selected by both selectors
228 /** To use do something like
229  \code
230  read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
231  \endcode
232  */
234  const IMP::OwnerPointer<PDBSelector> a_, b_;
235 public:
236  bool get_is_selected(const std::string &pdb_line) const {
237  return a_->get_is_selected(pdb_line)
238  && b_->get_is_selected(pdb_line);
239  }
240  IMP_OBJECT_INLINE(AndPDBSelector,out << *a_ << " and " << *b_,);
242  PDBSelector("AndPDBSelector%1%"), a_(a), b_(b){}
243 };
244 
245 //! Select atoms which are selected by either selector
246 /** To use do something like
247  \code
248  read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
249  \endcode
250  */
251 class OrPDBSelector: public PDBSelector {
252  const IMP::OwnerPointer<PDBSelector> a_, b_;
253 public:
254  bool get_is_selected(const std::string &pdb_line) const {
255  return a_->get_is_selected(pdb_line)
256  || b_->get_is_selected(pdb_line);
257  }
258  IMP_OBJECT_INLINE(OrPDBSelector,out << *a_ << " or " << *b_,);
260  PDBSelector("OrPDBSelector%1%"), a_(a), b_(b){}
261 };
262 
263 //! Select atoms which not selected by a given selector
264 /** To use do something like
265  \code
266  read_pdb(name, m, NotPDBSelector(PPDBSelector()));
267  \endcode
268  */
270  const IMP::OwnerPointer<PDBSelector> a_;
271 public:
272  bool get_is_selected(const std::string &pdb_line) const {
273  return !a_->get_is_selected(pdb_line);
274  }
275  IMP_OBJECT_INLINE(NotPDBSelector,out << "not" << *a_,);
276  NotPDBSelector( PDBSelector *a): PDBSelector("NotPDBSelector%1%"),
277  a_(a){}
278 };
279 
280 
281 /** @name PDB Reading
282  \anchor pdb_in
283  The read PDB methods produce a hierarchy that looks as follows:
284  - One Atom per ATOM or HETATM record in the PDB.
285  - All Atom particles have a parent which is a Residue.
286  - All Residue particles have a parent which is a Chain.
287 
288  Waters are currently dropped if they are ATOM records. This can be fixed.
289 
290  The read_pdb() functions should successfully parse all valid pdb files. It
291  can produce warnings on files which are not valid. It will attempt to read
292  such files, but all bets are off.
293 
294  When reading PDBs, PDBSelector objects can be used to choose to only process
295  certain record types. See the class documentation for more information.
296  When no PDB selector is supplied for reading, the
297  NonWaterPDBSelector is used.
298 
299  Set the IMP::LogLevel to VERBOSE to see details of parse errors.
300 */
301 //!@{
302 
303 /** Read a all the molecules in the first model of the
304  pdb file.
305  */
306 IMPATOMEXPORT Hierarchy read_pdb(base::TextInput input,
307  Model* model);
308 
309 /** Rewrite the coordinates of the passed hierarchy based
310  on the contents of the first model in the pdb file.
311 
312  The hierarchy must have been created by reading from a pdb
313  file and the atom numbers must correspond between the files.
314  These are not really checked.
315 
316  A ValueException is thrown if there are insufficient models
317  in the file.
318 
319  core::RigidMember particles are handled by updating the
320  core::RigidBody algebra::ReferenceFrame3D to align with the
321  loaded particles. Bad things will happen if the loaded coordinates
322  are not a rigid transform of the prior coordinates.
323  */
324 IMPATOMEXPORT void read_pdb(base::TextInput input,
325  int model,
326  Hierarchy h);
327 
328 
329 /** \relatesalso Hierarchy
330  */
331 IMPATOMEXPORT Hierarchy
332 read_pdb(base::TextInput input,
333  Model* model,
334  PDBSelector* selector,
335  bool select_first_model = true
336 #ifndef IMP_DOXYGEN
337  , bool no_radii=false
338 #endif
339  );
340 
341 
342 
343 /** Read all models from the pdb file.
344  */
345 IMPATOMEXPORT Hierarchies read_multimodel_pdb(base::TextInput input,
346  Model *model,
347  PDBSelector* selector
348 #ifndef IMP_DOXYGEN
349  , bool noradii=false
350 #endif
351  );
352 /** Read all models from the pdb file.
353  */
354 IMPATOMEXPORT Hierarchies read_multimodel_pdb(base::TextInput input,
355  Model *model);
356 /** @} */
357 
358 /** @name PDB Writing
359  \anchor pdb_out
360  The methods to write a PDBs expects a Hierarchy that looks as follows:
361  - all leaves are Atom particles
362  - all Atom particles have Residue particles as parents
363 
364  All Residue particles that have a Chain particle as an ancestor
365  are considered part of a protein, DNA or RNA, ones without are
366  considered heterogens.
367 
368  The functions produce files that are not valid PDB files,
369  eg only ATOM/HETATM lines are printed for all Atom particles
370  in the hierarchy. Complain if your favorite program can't read them and
371  we might fix it.
372 */
373 //!@{
374 
375 /** Write some atoms to a PDB.
376 */
377 IMPATOMEXPORT void write_pdb(const Selection& mhd,
378  base::TextOutput out,
379  unsigned int model=0);
380 
381 /** \brief Write a hierarchy to a pdb as C_alpha atoms.
382 
383  This method is used to write a non-atomic hierarchy into a pdb in a way
384  that can be read by most programs. If the leaves are Residue particles
385  then the index and residue type will be read from them. Otherwise default
386  values will be used so that each leaf ends up in a separate residue.
387 */
388 IMPATOMEXPORT void write_pdb_of_c_alphas(const Selection& mhd,
389  base::TextOutput out,
390  unsigned int model=0);
391 
392 /** Write the hierarchies one per frame.
393 */
394 IMPATOMEXPORT void write_multimodel_pdb(const Hierarchies& mhd,
395  base::TextOutput out);
396 /** @} */
397 
398 
399 
400 
401 #ifndef IMP_DOXYGEN
402 
403 /**
404  This function returns a string in PDB ATOM format
405 */
406 IMPATOMEXPORT std::string get_pdb_string(const algebra::Vector3D& v,
407  int index = -1,
408  AtomType at = AT_C,
409  ResidueType rt = atom::ALA,
410  char chain = ' ',
411  int res_index = 1,
412  char res_icode = ' ',
413  double occpancy = 1.00,
414  double tempFactor = 0.00,
415  Element e = C);
416 
417 /**
418  This function returns a connectivity string in PDB format
419  /note The CONECT records specify connectivity between atoms for which
420  coordinates are supplied. The connectivity is described using
421  the atom serial number as found in the entry.
422  /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
423 */
424 IMPATOMEXPORT std::string get_pdb_conect_record_string(int,int);
425 #endif
426 
427 
428 
429 /** \class WritePDBOptimizerState
430  This writes a PDB file at the specified interval during optimization.
431  If the file name contains %1% then a new file is written each time
432  with the %1% replaced by the index. Otherwise a new model is written
433  each time to the same file.
434 */
435 IMP_MODEL_SAVE(WritePDB, (const atom::Hierarchies& mh, std::string file_name),
436  atom::Hierarchies mh_;,
437  mh_=mh;,
438  ,
439  {
440  base::TextOutput to(file_name, append);
441  IMP_LOG_TERSE( "Writing pdb file " << file_name << std::endl);
442  atom::write_pdb(mh_,to, append?call:0);
443  });
444 
445 
446 IMPATOM_END_NAMESPACE
447 
448 #endif /* IMPATOM_PDB_H */