IMP  2.3.0
The Integrative Modeling Platform
pdb.h
Go to the documentation of this file.
1 /**
2  * \file IMP/atom/pdb.h
3  * \brief Functions to read PDBs
4  *
5  * Copyright 2007-2014 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPATOM_PDB_H
10 #define IMPATOM_PDB_H
11 
12 #include <IMP/atom/atom_config.h>
13 #include "Hierarchy.h"
14 #include "Atom.h"
15 #include "element.h"
16 #include "internal/pdb.h"
17 #include "atom_macros.h"
18 #include <IMP/base/file.h>
19 #include "Selection.h"
20 #include <IMP/kernel/Model.h>
21 #include <IMP/kernel/Particle.h>
23 #include <IMP/kernel/internal/utility.h>
24 #include <boost/format.hpp>
25 
26 IMPATOM_BEGIN_NAMESPACE
27 
28 //! Select which atoms to read from a PDB file
29 /** Selector is a general purpose class used to select records from a PDB
30  file. Using descendants of this class one may implement arbitrary
31  selection functions with operator() and pass them to PDB reading functions
32  for object selection. Simple selectors can be used to build more complicated
33  ones. Inheritance means "AND" unless otherwise noted (that is, the
34  CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
35  NonAlternativePDBSelector).
36 
37  \see read_pdb
38 */
39 class IMPATOMEXPORT PDBSelector : public IMP::base::Object {
40  public:
41  PDBSelector(std::string name) : Object(name) {}
42  //! Return true if the line should be processed
43  virtual bool get_is_selected(const std::string &pdb_line) const = 0;
44  virtual ~PDBSelector();
45 };
46 
48 
49 //! Select all ATOM and HETATM records which are not alternatives
51  public:
52  NonAlternativePDBSelector(std::string name = "NonAlternativePDBSelector%1%")
53  : PDBSelector(name) {}
54 
55  bool get_is_selected(const std::string &pdb_line) const {
56  return (internal::atom_alt_loc_indicator(pdb_line) == ' ' ||
57  internal::atom_alt_loc_indicator(pdb_line) == 'A');
58  }
60 };
61 
62 //! Select all non-alternative ATOM records
64  public:
65  ATOMPDBSelector(std::string name = "ATOMPDBSelector%1%")
66  : NonAlternativePDBSelector(name) {}
67 
68  bool get_is_selected(const std::string &pdb_line) const {
69  return (NonAlternativePDBSelector::get_is_selected(pdb_line) &&
70  internal::is_ATOM_rec(pdb_line));
71  }
73 };
74 
75 //! Select all CA ATOM records
77  public:
78  CAlphaPDBSelector(std::string name = "CAlphaPDBSelector%1%")
79  : NonAlternativePDBSelector(name) {}
80 
81  bool get_is_selected(const std::string &pdb_line) const {
82  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
83  const std::string type = internal::atom_type(pdb_line);
84  return (type[1] == 'C' && type[2] == 'A' && type[3] == ' ');
85  }
87 };
88 
89 //! Select all CB ATOM records
91  public:
92  CBetaPDBSelector(std::string name = "CBetaPDBSelector%1%")
93  : NonAlternativePDBSelector(name) {}
94 
95  bool get_is_selected(const std::string &pdb_line) const {
96  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
97  const std::string type = internal::atom_type(pdb_line);
98  return (type[1] == 'C' && type[2] == 'B' && type[3] == ' ');
99  }
101 };
102 
103 //! Select all C (not CA or CB) ATOM records
105  public:
106  CPDBSelector(std::string name = "CPDBSelector%1%")
107  : NonAlternativePDBSelector(name) {}
108 
109  bool get_is_selected(const std::string &pdb_line) const {
110  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
111  const std::string type = internal::atom_type(pdb_line);
112  return (type[1] == 'C' && type[2] == ' ' && type[3] == ' ');
113  }
115 };
116 
117 //! Select all N ATOM records
119  public:
120  NPDBSelector(std::string name = "NPDBSelector%1%")
121  : NonAlternativePDBSelector(name) {}
122 
123  bool get_is_selected(const std::string &pdb_line) const {
124  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
125  const std::string type = internal::atom_type(pdb_line);
126  return (type[1] == 'N' && type[2] == ' ' && type[3] == ' ');
127  }
129 };
130 
131 //! Defines a selector that will pick every ATOM and HETATM record
132 class AllPDBSelector : public PDBSelector {
133  public:
134  AllPDBSelector(std::string name = "AllPDBSelector%1%") : PDBSelector(name) {}
135 
136  bool get_is_selected(const std::string &pdb_line) const {
137  return (true || pdb_line.empty());
138  }
140 };
141 
142 //! Select all ATOM and HETATM records with the given chain ids
144  public:
145  bool get_is_selected(const std::string &pdb_line) const {
146  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
147  return false;
148  }
149  for (int i = 0; i < (int)chains_.length(); i++) {
150  if (internal::atom_chain_id(pdb_line) == chains_[i]) return true;
151  }
152  return false;
153  }
155  //! The chain id can be any character in chains
156  ChainPDBSelector(const std::string &chains,
157  std::string name = "ChainPDBSelector%1%")
158  : NonAlternativePDBSelector(name), chains_(chains) {}
159 
160  private:
161  std::string chains_;
162 };
163 
164 //! Select all non-water ATOM and HETATM records
166  public:
167  WaterPDBSelector(std::string name = "WaterPDBSelector%1%")
168  : NonAlternativePDBSelector(name) {}
169 
170  bool get_is_selected(const std::string &pdb_line) const {
171  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
172  return false;
173  }
174  const std::string res_name = internal::atom_residue_name(pdb_line);
175  return ((res_name[0] == 'H' && res_name[1] == 'O' && res_name[2] == 'H') ||
176  (res_name[0] == 'D' && res_name[1] == 'O' && res_name[2] == 'D'));
177  }
179 };
180 
181 //! Select all hydrogen ATOM and HETATM records
182 class IMPATOMEXPORT HydrogenPDBSelector : public NonAlternativePDBSelector {
183  bool is_hydrogen(std::string pdb_line) const;
184 
185  public:
186  HydrogenPDBSelector(std::string name = "HydrogenPDBSelector%1%")
187  : NonAlternativePDBSelector(name) {}
188 
189  bool get_is_selected(const std::string &pdb_line) const {
190  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
191  return is_hydrogen(pdb_line);
192  }
194 };
195 
196 //! Select non water and non hydrogen atoms
199 
200  public:
201  bool get_is_selected(const std::string &pdb_line) const {
202  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
203  return false;
204  }
205  return (!ws_->get_is_selected(pdb_line) && !hs_->get_is_selected(pdb_line));
206  }
208  NonWaterNonHydrogenPDBSelector(std::string name)
210  ws_(new WaterPDBSelector()),
211  hs_(new HydrogenPDBSelector()) {}
213  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
214  ws_(new WaterPDBSelector()),
215  hs_(new HydrogenPDBSelector()) {}
216 };
217 
218 //! Select all non-water non-alternative ATOM and HETATM records
221 
222  public:
223  bool get_is_selected(const std::string &pdb_line) const {
224  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
225  return false;
226  }
227  return (!ws_->get_is_selected(pdb_line));
228  }
230  NonWaterPDBSelector(std::string name)
231  : NonAlternativePDBSelector(name), ws_(new WaterPDBSelector()) {}
233  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
234  ws_(new WaterPDBSelector()) {}
235 };
236 
237 //! Select all backbone (N,CA,C,O) ATOM records
239  public:
240  BackbonePDBSelector(std::string name = "BackbonePDBSelector%1%")
242 
243  bool get_is_selected(const std::string &pdb_line) const {
244  if (!NonWaterNonHydrogenPDBSelector::get_is_selected(pdb_line))
245  return false;
246  const std::string type = internal::atom_type(pdb_line);
247  return ((type[1] == 'N' && type[2] == ' ' && type[3] == ' ') ||
248  (type[1] == 'C' && type[2] == 'A' && type[3] == ' ') ||
249  (type[1] == 'C' && type[2] == ' ' && type[3] == ' ') ||
250  (type[1] == 'O' && type[2] == ' ' && type[3] == ' '));
251  }
253 };
254 
255 //! Select all P (= phosphate) ATOM records
257  public:
258  PPDBSelector(std::string name = "PPDBSelector%1%")
259  : NonAlternativePDBSelector(name) {}
260 
261  bool get_is_selected(const std::string &pdb_line) const {
262  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
263  const std::string type = internal::atom_type(pdb_line);
264  return (type[1] == 'P' && type[2] == ' ' && type[3] == ' ');
265  }
267 };
268 
269 //! Select atoms which are selected by both selectors
270 /** To use do something like
271  \code
272  read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
273  \endcode
274  */
275 class AndPDBSelector : public PDBSelector {
277 
278  public:
279  bool get_is_selected(const std::string &pdb_line) const {
280  return a_->get_is_selected(pdb_line) && b_->get_is_selected(pdb_line);
281  }
284  : PDBSelector("AndPDBSelector%1%"), a_(a), b_(b) {}
285 };
286 
287 //! Select atoms which are selected by either selector
288 /** To use do something like
289  \code
290  read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
291  \endcode
292  */
293 class OrPDBSelector : public PDBSelector {
295 
296  public:
297  bool get_is_selected(const std::string &pdb_line) const {
298  return a_->get_is_selected(pdb_line) || b_->get_is_selected(pdb_line);
299  }
302  : PDBSelector("OrPDBSelector%1%"), a_(a), b_(b) {}
303 };
304 
305 //! Select atoms which are not selected by a given selector
306 /** To use do something like
307  \code
308  read_pdb(name, m, NotPDBSelector(PPDBSelector()));
309  \endcode
310  */
311 class NotPDBSelector : public PDBSelector {
313 
314  public:
315  bool get_is_selected(const std::string &pdb_line) const {
316  return !a_->get_is_selected(pdb_line);
317  }
319  NotPDBSelector(PDBSelector *a) : PDBSelector("NotPDBSelector%1%"), a_(a) {}
320 };
321 
322 /** @name PDB Reading
323  \anchor pdb_in
324  The read PDB methods produce a hierarchy that looks as follows:
325  - One Atom per ATOM or HETATM record in the PDB.
326  - All Atom particles have a parent which is a Residue.
327  - All Residue particles have a parent which is a Chain.
328 
329  Waters are currently dropped if they are ATOM records. This can be fixed.
330 
331  The read_pdb() functions should successfully parse all valid PDB files. It
332  can produce warnings on files which are not valid. It will attempt to read
333  such files, but all bets are off.
334 
335  When reading PDBs, PDBSelector objects can be used to choose to only process
336  certain record types. See the class documentation for more information.
337  When no PDB selector is supplied for reading, the
338  NonWaterPDBSelector is used.
339 
340  Set the IMP::LogLevel to VERBOSE to see details of parse errors.
341 */
342 //!@{
343 
344 inline PDBSelector *get_default_pdb_selector() {
345  return new NonWaterPDBSelector();
346 }
347 
348 /** Read a all the molecules in the first model of the
349  PDB file.
350  */
351 IMPATOMEXPORT Hierarchy
352  read_pdb(base::TextInput input, kernel::Model *model,
353  PDBSelector *selector = get_default_pdb_selector(),
354  bool select_first_model = true
355 #ifndef IMP_DOXYGEN
356  ,
357  bool no_radii = false
358 #endif
359  );
360 
361 /** Rewrite the coordinates of the passed hierarchy based
362  on the contents of the first model in the PDB file.
363 
364  The hierarchy must have been created by reading from a PDB
365  file and the atom numbers must correspond between the files.
366  These are not really checked.
367 
368  A ValueException is thrown if there are insufficient models
369  in the file.
370 
371  core::RigidMember particles are handled by updating the
372  core::RigidBody algebra::ReferenceFrame3D to align with the
373  loaded particles. Bad things will happen if the loaded coordinates
374  are not a rigid transform of the prior coordinates.
375  */
376 IMPATOMEXPORT void read_pdb(base::TextInput input, int model, Hierarchy h);
377 
378 /** Read all models from the PDB file.
379  */
380 IMPATOMEXPORT Hierarchies
381  read_multimodel_pdb(base::TextInput input, kernel::Model *model,
382  PDBSelector *selector = get_default_pdb_selector()
383 #ifndef IMP_DOXYGEN
384  ,
385  bool noradii = false
386 #endif
387  );
388 
389 /** @name PDB Writing
390  \anchor pdb_out
391  The methods to write a PDB expects a Hierarchy that looks as follows:
392  - all leaves are Atom particles
393  - all Atom particles have Residue particles as parents
394 
395  All Residue particles that have a Chain particle as an ancestor
396  are considered part of a protein, DNA or RNA, ones without are
397  considered heterogens.
398 
399  The functions produce files that are not valid PDB files,
400  eg only ATOM/HETATM lines are printed for all Atom particles
401  in the hierarchy. Complain if your favorite program can't read them and
402  we might fix it.
403 */
404 //!@{
405 
406 /** Write some atoms to a PDB.
407 */
408 IMPATOMEXPORT void write_pdb(const Selection &mhd, base::TextOutput out,
409  unsigned int model = 1);
410 
411 /** \brief Write a hierarchy to a PDB as C_alpha atoms.
412 
413  This method is used to write a non-atomic hierarchy into a PDB in a way
414  that can be read by most programs. If the leaves are Residue particles
415  then the index and residue type will be read from them. Otherwise default
416  values will be used so that each leaf ends up in a separate residue.
417 */
418 IMPATOMEXPORT void write_pdb_of_c_alphas(const Selection &mhd,
419  base::TextOutput out,
420  unsigned int model = 1);
421 
422 /** Write the hierarchies one per frame.
423 */
424 IMPATOMEXPORT void write_multimodel_pdb(const Hierarchies &mhd,
425  base::TextOutput out);
426 /** @} */
427 
428 #ifndef IMP_DOXYGEN
429 
430 /**
431  This function returns a string in PDB ATOM format
432 */
433 IMPATOMEXPORT std::string get_pdb_string(
434  const algebra::Vector3D &v, int index = -1, AtomType at = AT_CA,
435  ResidueType rt = atom::ALA, char chain = ' ', int res_index = 1,
436  char res_icode = ' ', double occpancy = 1.00, double tempFactor = 0.00,
437  Element e = C);
438 
439 /**
440  This function returns a connectivity string in PDB format
441  /note The CONECT records specify connectivity between atoms for which
442  coordinates are supplied. The connectivity is described using
443  the atom serial number as found in the entry.
444  /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
445 */
446 IMPATOMEXPORT std::string get_pdb_conect_record_string(int, int);
447 #endif
448 
449 /** \class WritePDBOptimizerState
450  This writes a PDB file at the specified interval during optimization.
451  If the file name contains %1% then a new file is written each time
452  with the %1% replaced by the index. Otherwise a new model is written
453  each time to the same file.
454 */
455 class IMPATOMEXPORT WritePDBOptimizerState : public kernel::OptimizerState {
456  std::string filename_;
458 
459  public:
462  std::string filename);
463  WritePDBOptimizerState(const atom::Hierarchies mh, std::string filename);
464 
465  protected:
466  virtual void do_update(unsigned int call) IMP_OVERRIDE;
469 };
470 
471 IMPATOM_END_NAMESPACE
472 
473 #endif /* IMPATOM_PDB_H */
Select non water and non hydrogen atoms.
Definition: pdb.h:197
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:315
ChainPDBSelector(const std::string &chains, std::string name="ChainPDBSelector%1%")
The chain id can be any character in chains.
Definition: pdb.h:156
Define the elements used in IMP.
Hierarchies read_multimodel_pdb(base::TextInput input, kernel::Model *model, PDBSelector *selector=get_default_pdb_selector())
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:55
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:170
Select all backbone (N,CA,C,O) ATOM records.
Definition: pdb.h:238
Select all non-water ATOM and HETATM records.
Definition: pdb.h:165
virtual void do_update(unsigned int)
Select all P (= phosphate) ATOM records.
Definition: pdb.h:256
const AtomType AT_CA
Select atoms which are selected by both selectors.
Definition: pdb.h:275
A smart pointer to a ref-counted Object that is a class member.
Definition: Pointer.h:147
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:261
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
Definition: object_macros.h:25
Shared optimizer state that is invoked upon commitment of new coordinates.
Simple atom decorator.
Select all N ATOM records.
Definition: pdb.h:118
Handling of file input/output.
void write_multimodel_pdb(const Hierarchies &mhd, base::TextOutput out)
virtual ModelObjectsTemp do_get_inputs() const
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:123
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:68
Select all C (not CA or CB) ATOM records.
Definition: pdb.h:104
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:145
Decorator for helping deal with a hierarchy of molecules.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:81
Select all CB ATOM records.
Definition: pdb.h:90
Select all ATOM and HETATM records which are not alternatives.
Definition: pdb.h:50
Select all non-alternative ATOM records.
Definition: pdb.h:63
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:109
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:279
Object(std::string name)
Construct an object with the given name.
void write_pdb_of_c_alphas(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Write a hierarchy to a PDB as C_alpha atoms.
IMP::kernel::Model Model
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:95
Shared optimizer state.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:223
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:201
Storage of a model, its restraints, constraints and particles.
Common base class for heavy weight IMP objects.
Definition: Object.h:106
Classes to handle individual model particles. (Note that implementation of inline functions in in int...
#define IMP_OBJECTS(Name, PluralName)
Define the types for storing sets of objects.
Definition: object_macros.h:52
Defines a selector that will pick every ATOM and HETATM record.
Definition: pdb.h:132
Various important macros for implementing decorators.
Select atoms which are not selected by a given selector.
Definition: pdb.h:311
VectorD< 3 > Vector3D
Definition: VectorD.h:395
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:136
Select all non-water non-alternative ATOM and HETATM records.
Definition: pdb.h:219
Select atoms which are selected by either selector.
Definition: pdb.h:293
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:243
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:297
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:189
Select all hydrogen ATOM and HETATM records.
Definition: pdb.h:182
Select all CA ATOM records.
Definition: pdb.h:76
void read_pdb(base::TextInput input, int model, Hierarchy h)
Element
The various elements currently supported/known.
Definition: element.h:23
Select which atoms to read from a PDB file.
Definition: pdb.h:39
Select a subset of a hierarchy.
Select all ATOM and HETATM records with the given chain ids.
Definition: pdb.h:143
#define IMP_OVERRIDE
Cause a compile error if this method does not override a parent method.
Class for storing model, its restraints, constraints, and particles.
Definition: kernel/Model.h:73