IMP  2.2.1
The Integrative Modeling Platform
pdb.h
Go to the documentation of this file.
1 /**
2  * \file IMP/atom/pdb.h
3  * \brief Functions to read pdbs
4  *
5  * Copyright 2007-2014 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPATOM_PDB_H
10 #define IMPATOM_PDB_H
11 
12 #include <IMP/atom/atom_config.h>
13 #include "Hierarchy.h"
14 #include "Atom.h"
15 #include "element.h"
16 #include "internal/pdb.h"
17 #include "atom_macros.h"
18 #include <IMP/base/file.h>
19 #include "Selection.h"
20 #include <IMP/kernel/Model.h>
21 #include <IMP/kernel/Particle.h>
23 #include <IMP/kernel/internal/utility.h>
24 #include <boost/format.hpp>
25 
26 IMPATOM_BEGIN_NAMESPACE
27 
28 //! Select which atoms to read from a PDB file
29 /** Selector is a general purpose class used to select records from a PDB
30  file. Using descendants of this class one may implement arbitrary
31  selection functions with operator() and pass them to PDB reading functions
32  for object selection. Simple selectors can be used to build more complicated
33  ones. Inheritence means "AND" unless otherwise noted (that is, the
34  CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
35  NonAlternativePDBSelector).
36 
37  \see read_pdb
38 */
39 class IMPATOMEXPORT PDBSelector : public IMP::base::Object {
40  public:
41  PDBSelector(std::string name) : Object(name) {}
42  //! Return true if the line should be processed
43  virtual bool get_is_selected(const std::string &pdb_line) const = 0;
44  virtual ~PDBSelector();
45 };
46 
48 
49 //! Select all ATOM and HETATM records which are not alternatives
51  public:
52  NonAlternativePDBSelector(std::string name = "NonAlternativePDBSelector%1%")
53  : PDBSelector(name) {}
54 
55  bool get_is_selected(const std::string &pdb_line) const {
56  return (internal::atom_alt_loc_indicator(pdb_line) == ' ' ||
57  internal::atom_alt_loc_indicator(pdb_line) == 'A');
58  }
60 };
61 
62 //! Select all non-alternative ATOM records
64  public:
65  ATOMPDBSelector(std::string name = "ATOMPDBSelector%1%")
66  : NonAlternativePDBSelector(name) {}
67 
68  bool get_is_selected(const std::string &pdb_line) const {
69  return (NonAlternativePDBSelector::get_is_selected(pdb_line) &&
70  internal::is_ATOM_rec(pdb_line));
71  }
73 };
74 
75 //! Select all CA ATOM records
77  public:
78  CAlphaPDBSelector(std::string name = "CAlphaPDBSelector%1%")
79  : NonAlternativePDBSelector(name) {}
80 
81  bool get_is_selected(const std::string &pdb_line) const {
82  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
83  const std::string type = internal::atom_type(pdb_line);
84  return (type[1] == 'C' && type[2] == 'A' && type[3] == ' ');
85  }
87 };
88 
89 //! Select all CB ATOM records
91  public:
92  CBetaPDBSelector(std::string name = "CBetaPDBSelector%1%")
93  : NonAlternativePDBSelector(name) {}
94 
95  bool get_is_selected(const std::string &pdb_line) const {
96  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
97  const std::string type = internal::atom_type(pdb_line);
98  return (type[1] == 'C' && type[2] == 'B' && type[3] == ' ');
99  }
101 };
102 
103 //! Select all C (not CA or CB) ATOM records
105  public:
106  CPDBSelector(std::string name = "CPDBSelector%1%")
107  : NonAlternativePDBSelector(name) {}
108 
109  bool get_is_selected(const std::string &pdb_line) const {
110  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
111  const std::string type = internal::atom_type(pdb_line);
112  return (type[1] == 'C' && type[2] == ' ' && type[3] == ' ');
113  }
115 };
116 
117 //! Select all N ATOM records
119  public:
120  NPDBSelector(std::string name = "NPDBSelector%1%")
121  : NonAlternativePDBSelector(name) {}
122 
123  bool get_is_selected(const std::string &pdb_line) const {
124  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
125  const std::string type = internal::atom_type(pdb_line);
126  return (type[1] == 'N' && type[2] == ' ' && type[3] == ' ');
127  }
129 };
130 
131 //! Defines a selector that will pick every ATOM and HETATM record
132 class AllPDBSelector : public PDBSelector {
133  public:
134  AllPDBSelector(std::string name = "AllPDBSelector%1%") : PDBSelector(name) {}
135 
136  bool get_is_selected(const std::string &pdb_line) const {
137  return (true || pdb_line.empty());
138  }
140 };
141 
142 //! Select all ATOM and HETATMrecords with the given chain ids
144  public:
145  bool get_is_selected(const std::string &pdb_line) const {
146  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
147  return false;
148  }
149  for (int i = 0; i < (int)chains_.length(); i++) {
150  if (internal::atom_chain_id(pdb_line) == chains_[i]) return true;
151  }
152  return false;
153  }
155  //! The chain id can be any character in chains
156  ChainPDBSelector(const std::string &chains,
157  std::string name = "ChainPDBSelector%1%")
158  : NonAlternativePDBSelector(name), chains_(chains) {}
159 
160  private:
161  std::string chains_;
162 };
163 
164 //! Select all non-water ATOM and HETATMrecords
166  public:
167  WaterPDBSelector(std::string name = "WaterPDBSelector%1%")
168  : NonAlternativePDBSelector(name) {}
169 
170  bool get_is_selected(const std::string &pdb_line) const {
171  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
172  return false;
173  }
174  const std::string res_name = internal::atom_residue_name(pdb_line);
175  return ((res_name[0] == 'H' && res_name[1] == 'O' && res_name[2] == 'H') ||
176  (res_name[0] == 'D' && res_name[1] == 'O' && res_name[2] == 'D'));
177  }
179 };
180 
181 //! Select all hydrogen ATOM and HETATM records
182 class IMPATOMEXPORT HydrogenPDBSelector : public NonAlternativePDBSelector {
183  bool is_hydrogen(std::string pdb_line) const;
184 
185  public:
186  HydrogenPDBSelector(std::string name = "HydrogenPDBSelector%1%")
187  : NonAlternativePDBSelector(name) {}
188 
189  bool get_is_selected(const std::string &pdb_line) const {
190  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
191  return is_hydrogen(pdb_line);
192  }
194 };
195 
196 //! Select non water and non hydrogen atoms
199 
200  public:
201  bool get_is_selected(const std::string &pdb_line) const {
202  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
203  return false;
204  }
205  return (!ws_->get_is_selected(pdb_line) && !hs_->get_is_selected(pdb_line));
206  }
208  NonWaterNonHydrogenPDBSelector(std::string name)
210  ws_(new WaterPDBSelector()),
211  hs_(new HydrogenPDBSelector()) {}
213  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
214  ws_(new WaterPDBSelector()),
215  hs_(new HydrogenPDBSelector()) {}
216 };
217 
218 //! Select all non-water non-alternative ATOM and HETATM records
221 
222  public:
223  bool get_is_selected(const std::string &pdb_line) const {
224  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
225  return false;
226  }
227  return (!ws_->get_is_selected(pdb_line));
228  }
230  NonWaterPDBSelector(std::string name)
231  : NonAlternativePDBSelector(name), ws_(new WaterPDBSelector()) {}
233  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
234  ws_(new WaterPDBSelector()) {}
235 };
236 
237 //! Select all backbone (N,CA,C,O) ATOM records
239  public:
240  BackbonePDBSelector(std::string name = "BackbonePDBSelector%1%")
242 
243  bool get_is_selected(const std::string &pdb_line) const {
244  if (!NonWaterNonHydrogenPDBSelector::get_is_selected(pdb_line))
245  return false;
246  const std::string type = internal::atom_type(pdb_line);
247  return ((type[1] == 'N' && type[2] == ' ' && type[3] == ' ') ||
248  (type[1] == 'C' && type[2] == 'A' && type[3] == ' ') ||
249  (type[1] == 'C' && type[2] == ' ' && type[3] == ' ') ||
250  (type[1] == 'O' && type[2] == ' ' && type[3] == ' '));
251  }
253 };
254 
255 //! Select all P (= phosphate) ATOM records
257  public:
258  PPDBSelector(std::string name = "PPDBSelector%1%")
259  : NonAlternativePDBSelector(name) {}
260 
261  bool get_is_selected(const std::string &pdb_line) const {
262  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
263  const std::string type = internal::atom_type(pdb_line);
264  return (type[1] == 'P' && type[2] == ' ' && type[3] == ' ');
265  }
267 };
268 
269 // these do not work in python as the wrapped selectors get cleaned up
270 //! Select atoms which are selected by both selectors
271 /** To use do something like
272  \code
273  read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
274  \endcode
275  */
276 class AndPDBSelector : public PDBSelector {
278 
279  public:
280  bool get_is_selected(const std::string &pdb_line) const {
281  return a_->get_is_selected(pdb_line) && b_->get_is_selected(pdb_line);
282  }
285  : PDBSelector("AndPDBSelector%1%"), a_(a), b_(b) {}
286 };
287 
288 //! Select atoms which are selected by either selector
289 /** To use do something like
290  \code
291  read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
292  \endcode
293  */
294 class OrPDBSelector : public PDBSelector {
296 
297  public:
298  bool get_is_selected(const std::string &pdb_line) const {
299  return a_->get_is_selected(pdb_line) || b_->get_is_selected(pdb_line);
300  }
303  : PDBSelector("OrPDBSelector%1%"), a_(a), b_(b) {}
304 };
305 
306 //! Select atoms which not selected by a given selector
307 /** To use do something like
308  \code
309  read_pdb(name, m, NotPDBSelector(PPDBSelector()));
310  \endcode
311  */
312 class NotPDBSelector : public PDBSelector {
314 
315  public:
316  bool get_is_selected(const std::string &pdb_line) const {
317  return !a_->get_is_selected(pdb_line);
318  }
320  NotPDBSelector(PDBSelector *a) : PDBSelector("NotPDBSelector%1%"), a_(a) {}
321 };
322 
323 /** @name PDB Reading
324  \anchor pdb_in
325  The read PDB methods produce a hierarchy that looks as follows:
326  - One Atom per ATOM or HETATM record in the PDB.
327  - All Atom particles have a parent which is a Residue.
328  - All Residue particles have a parent which is a Chain.
329 
330  Waters are currently dropped if they are ATOM records. This can be fixed.
331 
332  The read_pdb() functions should successfully parse all valid pdb files. It
333  can produce warnings on files which are not valid. It will attempt to read
334  such files, but all bets are off.
335 
336  When reading PDBs, PDBSelector objects can be used to choose to only process
337  certain record types. See the class documentation for more information.
338  When no PDB selector is supplied for reading, the
339  NonWaterPDBSelector is used.
340 
341  Set the IMP::LogLevel to VERBOSE to see details of parse errors.
342 */
343 //!@{
344 
345 inline PDBSelector *get_default_pdb_selector() {
346  return new NonWaterPDBSelector();
347 }
348 
349 /** Read a all the molecules in the first model of the
350  pdb file.
351  */
352 IMPATOMEXPORT Hierarchy
353  read_pdb(base::TextInput input, kernel::Model *model,
354  PDBSelector *selector = get_default_pdb_selector(),
355  bool select_first_model = true
356 #ifndef IMP_DOXYGEN
357  ,
358  bool no_radii = false
359 #endif
360  );
361 
362 /** Rewrite the coordinates of the passed hierarchy based
363  on the contents of the first model in the pdb file.
364 
365  The hierarchy must have been created by reading from a pdb
366  file and the atom numbers must correspond between the files.
367  These are not really checked.
368 
369  A ValueException is thrown if there are insufficient models
370  in the file.
371 
372  core::RigidMember particles are handled by updating the
373  core::RigidBody algebra::ReferenceFrame3D to align with the
374  loaded particles. Bad things will happen if the loaded coordinates
375  are not a rigid transform of the prior coordinates.
376  */
377 IMPATOMEXPORT void read_pdb(base::TextInput input, int model, Hierarchy h);
378 
379 /** Read all models from the pdb file.
380  */
381 IMPATOMEXPORT Hierarchies
382  read_multimodel_pdb(base::TextInput input, kernel::Model *model,
383  PDBSelector *selector = get_default_pdb_selector()
384 #ifndef IMP_DOXYGEN
385  ,
386  bool noradii = false
387 #endif
388  );
389 
390 /** @name PDB Writing
391  \anchor pdb_out
392  The methods to write a PDBs expects a Hierarchy that looks as follows:
393  - all leaves are Atom particles
394  - all Atom particles have Residue particles as parents
395 
396  All Residue particles that have a Chain particle as an ancestor
397  are considered part of a protein, DNA or RNA, ones without are
398  considered heterogens.
399 
400  The functions produce files that are not valid PDB files,
401  eg only ATOM/HETATM lines are printed for all Atom particles
402  in the hierarchy. Complain if your favorite program can't read them and
403  we might fix it.
404 */
405 //!@{
406 
407 /** Write some atoms to a PDB.
408 */
409 IMPATOMEXPORT void write_pdb(const Selection &mhd, base::TextOutput out,
410  unsigned int model = 1);
411 
412 /** \brief Write a hierarchy to a pdb as C_alpha atoms.
413 
414  This method is used to write a non-atomic hierarchy into a pdb in a way
415  that can be read by most programs. If the leaves are Residue particles
416  then the index and residue type will be read from them. Otherwise default
417  values will be used so that each leaf ends up in a separate residue.
418 */
419 IMPATOMEXPORT void write_pdb_of_c_alphas(const Selection &mhd,
420  base::TextOutput out,
421  unsigned int model = 1);
422 
423 /** Write the hierarchies one per frame.
424 */
425 IMPATOMEXPORT void write_multimodel_pdb(const Hierarchies &mhd,
426  base::TextOutput out);
427 /** @} */
428 
429 #ifndef IMP_DOXYGEN
430 
431 /**
432  This function returns a string in PDB ATOM format
433 */
434 IMPATOMEXPORT std::string get_pdb_string(
435  const algebra::Vector3D &v, int index = -1, AtomType at = AT_CA,
436  ResidueType rt = atom::ALA, char chain = ' ', int res_index = 1,
437  char res_icode = ' ', double occpancy = 1.00, double tempFactor = 0.00,
438  Element e = C);
439 
440 /**
441  This function returns a connectivity string in PDB format
442  /note The CONECT records specify connectivity between atoms for which
443  coordinates are supplied. The connectivity is described using
444  the atom serial number as found in the entry.
445  /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
446 */
447 IMPATOMEXPORT std::string get_pdb_conect_record_string(int, int);
448 #endif
449 
450 /** \class WritePDBOptimizerState
451  This writes a PDB file at the specified interval during optimization.
452  If the file name contains %1% then a new file is written each time
453  with the %1% replaced by the index. Otherwise a new model is written
454  each time to the same file.
455 */
456 class IMPATOMEXPORT WritePDBOptimizerState : public kernel::OptimizerState {
457  std::string filename_;
459 
460  public:
463  std::string filename);
464  WritePDBOptimizerState(const atom::Hierarchies mh, std::string filename);
465 
466  protected:
467  virtual void do_update(unsigned int call) IMP_OVERRIDE;
468  virtual kernel::ModelObjectsTemp do_get_inputs() const IMP_OVERRIDE;
470 };
471 
472 IMPATOM_END_NAMESPACE
473 
474 #endif /* IMPATOM_PDB_H */
Select non water and non hydrogen atoms.
Definition: pdb.h:197
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:316
ChainPDBSelector(const std::string &chains, std::string name="ChainPDBSelector%1%")
The chain id can be any character in chains.
Definition: pdb.h:156
Define the elements used in IMP.
Hierarchies read_multimodel_pdb(base::TextInput input, kernel::Model *model, PDBSelector *selector=get_default_pdb_selector())
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:55
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:170
Select all backbone (N,CA,C,O) ATOM records.
Definition: pdb.h:238
Select all non-water ATOM and HETATMrecords.
Definition: pdb.h:165
virtual void do_update(unsigned int)
Select all P (= phosphate) ATOM records.
Definition: pdb.h:256
const AtomType AT_CA
Select atoms which are selected by both selectors.
Definition: pdb.h:276
A smart pointer to a ref-counted Object that is a class memeber.
Definition: base/Pointer.h:147
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:261
Shared optimizer state that is invoked upon commitment of new coordinates.
Simple atom decorator.
Select all N ATOM records.
Definition: pdb.h:118
void write_multimodel_pdb(const Hierarchies &mhd, base::TextOutput out)
virtual ModelObjectsTemp do_get_inputs() const
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:123
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:68
Select all C (not CA or CB) ATOM records.
Definition: pdb.h:104
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:145
Decorator for helping deal with a hierarchy of molecules.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:81
Select all CB ATOM records.
Definition: pdb.h:90
Select all ATOM and HETATM records which are not alternatives.
Definition: pdb.h:50
Select all non-alternative ATOM records.
Definition: pdb.h:63
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:109
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:280
Object(std::string name)
Construct an object with the given name.
void write_pdb_of_c_alphas(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Write a hierarchy to a pdb as C_alpha atoms.
IMP::kernel::Model Model
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:95
Shared optimizer state.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:223
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:201
Storage of a model, its restraints, constraints and particles.
Common base class for heavy weight IMP objects.
Definition: base/Object.h:106
Classes to handle individual model particles.
#define IMP_OBJECTS(Name, PluralName)
Define the types for storing sets of objects.
Defines a selector that will pick every ATOM and HETATM record.
Definition: pdb.h:132
Various important macros for implementing decorators.
Select atoms which not selected by a given selector.
Definition: pdb.h:312
VectorD< 3 > Vector3D
Definition: VectorD.h:395
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:136
Select all non-water non-alternative ATOM and HETATM records.
Definition: pdb.h:219
Select atoms which are selected by either selector.
Definition: pdb.h:294
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:243
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:298
Handling of file input/output.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:189
Select all hydrogen ATOM and HETATM records.
Definition: pdb.h:182
Select all CA ATOM records.
Definition: pdb.h:76
void read_pdb(base::TextInput input, int model, Hierarchy h)
Element
The various elements currently supported/known.
Definition: element.h:23
Select which atoms to read from a PDB file.
Definition: pdb.h:39
A set of useful functionality on IMP::atom::Hierarchy decorators.
Select all ATOM and HETATMrecords with the given chain ids.
Definition: pdb.h:143
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:72