IMP  2.1.1
The Integrative Modeling Platform
pdb.h
Go to the documentation of this file.
1 /**
2  * \file IMP/atom/pdb.h
3  * \brief Functions to read pdbs
4  *
5  * Copyright 2007-2013 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPATOM_PDB_H
10 #define IMPATOM_PDB_H
11 
12 #include <IMP/atom/atom_config.h>
13 #include "Hierarchy.h"
14 #include "Atom.h"
15 #include "element.h"
16 #include "internal/pdb.h"
17 #include "atom_macros.h"
18 #include <IMP/base/file.h>
19 #include "Selection.h"
20 #include <IMP/kernel/Model.h>
21 #include <IMP/kernel/Particle.h>
23 #include <IMP/kernel/internal/utility.h>
24 #include <boost/format.hpp>
25 
26 IMPATOM_BEGIN_NAMESPACE
27 
28 //! Select which atoms to read from a PDB file
29 /** Selector is a general purpose class used to select records from a PDB
30  file. Using descendants of this class one may implement arbitrary
31  selection functions with operator() and pass them to PDB reading functions
32  for object selection. Simple selectors can be used to build more complicated
33  ones. Inheritence means "AND" unless otherwise noted (that is, the
34  CAlphaPDBSelector takes all non-alternate C-alphas since it inherits from
35  NonAlternativePDBSelector).
36 
37  \see read_pdb
38 */
39 class IMPATOMEXPORT PDBSelector : public IMP::base::Object {
40  public:
41  PDBSelector(std::string name) : Object(name) {}
42  //! Return true if the line should be processed
43  virtual bool get_is_selected(const std::string &pdb_line) const = 0;
44  virtual ~PDBSelector();
45 };
46 
48 
49 //! Select all ATOM and HETATM records which are not alternatives
51  public:
52  NonAlternativePDBSelector(std::string name = "NonAlternativePDBSelector%1%") :
53  PDBSelector(name) {}
54 
55  bool get_is_selected(const std::string& pdb_line) const {
56  return (internal::atom_alt_loc_indicator(pdb_line) == ' ' ||
57  internal::atom_alt_loc_indicator(pdb_line) == 'A');
58  }
60 };
61 
62 //! Select all non-alternative ATOM records
64  public:
65  ATOMPDBSelector(std::string name = "ATOMPDBSelector%1%") :
67 
68  bool get_is_selected(const std::string& pdb_line) const {
69  return (NonAlternativePDBSelector::get_is_selected(pdb_line) &&
70  internal::is_ATOM_rec(pdb_line));
71  }
73 };
74 
75 //! Select all CA ATOM records
77  public:
78  CAlphaPDBSelector(std::string name = "CAlphaPDBSelector%1%") :
80 
81  bool get_is_selected(const std::string& pdb_line) const {
82  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
83  const std::string type = internal::atom_type(pdb_line);
84  return (type[1] == 'C' && type[2] == 'A' && type[3] == ' ');
85  }
87 };
88 
89 //! Select all CB ATOM records
91  public:
92  CBetaPDBSelector(std::string name = "CBetaPDBSelector%1%") :
94 
95  bool get_is_selected(const std::string& pdb_line) const {
96  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
97  const std::string type = internal::atom_type(pdb_line);
98  return (type[1] == 'C' && type[2] == 'B' && type[3] == ' ');
99  }
101 };
102 
103 //! Select all C (not CA or CB) ATOM records
105  public:
106  CPDBSelector(std::string name = "CPDBSelector%1%") :
108 
109  bool get_is_selected(const std::string& pdb_line) const {
110  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
111  const std::string type = internal::atom_type(pdb_line);
112  return (type[1] == 'C' && type[2] == ' ' && type[3] == ' ');
113  }
115 };
116 
117 //! Select all N ATOM records
119  public:
120  NPDBSelector(std::string name = "NPDBSelector%1%") :
122 
123  bool get_is_selected(const std::string& pdb_line) const {
124  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
125  const std::string type = internal::atom_type(pdb_line);
126  return (type[1] == 'N' && type[2] == ' ' && type[3] == ' ');
127  }
129 };
130 
131 
132 //! Defines a selector that will pick every ATOM and HETATM record
133 class AllPDBSelector : public PDBSelector {
134  public:
135  AllPDBSelector(std::string name = "AllPDBSelector%1%") : PDBSelector(name) {}
136 
137  bool get_is_selected(const std::string& pdb_line) const {
138  return (true || pdb_line.empty());
139  }
141 };
142 
143 //! Select all ATOM and HETATMrecords with the given chain ids
145  public:
146  bool get_is_selected(const std::string &pdb_line) const {
147  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
148  return false;
149  }
150  for (int i = 0; i < (int)chains_.length(); i++) {
151  if (internal::atom_chain_id(pdb_line) == chains_[i]) return true;
152  }
153  return false;
154  }
156  //! The chain id can be any character in chains
157  ChainPDBSelector(const std::string &chains,
158  std::string name = "ChainPDBSelector%1%")
159  : NonAlternativePDBSelector(name), chains_(chains) {}
160 
161  private:
162  std::string chains_;
163 };
164 
165 //! Select all non-water ATOM and HETATMrecords
167  public:
168  WaterPDBSelector(std::string name = "WaterPDBSelector%1%") :
170 
171  bool get_is_selected(const std::string& pdb_line) const {
172  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
173  return false;
174  }
175  const std::string res_name = internal::atom_residue_name(pdb_line);
176  return ((res_name[0] == 'H' && res_name[1] == 'O' && res_name[2] == 'H') ||
177  (res_name[0] == 'D' && res_name[1] == 'O' && res_name[2] == 'D'));
178  }
180 };
181 
182 //! Select all hydrogen ATOM and HETATM records
183 class IMPATOMEXPORT HydrogenPDBSelector : public NonAlternativePDBSelector {
184  bool is_hydrogen(std::string pdb_line) const;
185 
186  public:
187  HydrogenPDBSelector(std::string name = "HydrogenPDBSelector%1%") :
189 
190  bool get_is_selected(const std::string& pdb_line) const {
191  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
192  return is_hydrogen(pdb_line);
193  }
195 };
196 
197 //! Select non water and non hydrogen atoms
200 
201  public:
202  bool get_is_selected(const std::string &pdb_line) const {
203  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
204  return false;
205  }
206  return (!ws_->get_is_selected(pdb_line) && !hs_->get_is_selected(pdb_line));
207  }
209  NonWaterNonHydrogenPDBSelector(std::string name)
211  ws_(new WaterPDBSelector()),
212  hs_(new HydrogenPDBSelector()) {}
214  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
215  ws_(new WaterPDBSelector()),
216  hs_(new HydrogenPDBSelector()) {}
217 };
218 
219 //! Select all non-water non-alternative ATOM and HETATM records
222 
223  public:
224  bool get_is_selected(const std::string &pdb_line) const {
225  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) {
226  return false;
227  }
228  return (!ws_->get_is_selected(pdb_line));
229  }
231  NonWaterPDBSelector(std::string name)
232  : NonAlternativePDBSelector(name), ws_(new WaterPDBSelector()) {}
234  : NonAlternativePDBSelector("NonWaterPDBSelector%1%"),
235  ws_(new WaterPDBSelector()) {}
236 };
237 
238 //! Select all backbone (N,CA,C,O) ATOM records
240  public:
241  BackbonePDBSelector(std::string name = "BackbonePDBSelector%1%") :
243 
244  bool get_is_selected(const std::string& pdb_line) const {
245  if(!NonWaterNonHydrogenPDBSelector::get_is_selected(pdb_line)) return false;
246  const std::string type = internal::atom_type(pdb_line);
247  return ((type[1] == 'N' && type[2] == ' ' && type[3] == ' ') ||
248  (type[1] == 'C' && type[2] == 'A' && type[3] == ' ') ||
249  (type[1] == 'C' && type[2] == ' ' && type[3] == ' ') ||
250  (type[1] == 'O' && type[2] == ' ' && type[3] == ' '));
251  }
253 };
254 
255 //! Select all P (= phosphate) ATOM records
257  public:
258  PPDBSelector(std::string name = "PPDBSelector%1%") :
260 
261  bool get_is_selected(const std::string& pdb_line) const {
262  if (!NonAlternativePDBSelector::get_is_selected(pdb_line)) return false;
263  const std::string type = internal::atom_type(pdb_line);
264  return (type[1] == 'P' && type[2] == ' ' && type[3] == ' ');
265  }
267 };
268 
269 // these do not work in python as the wrapped selectors get cleaned up
270 //! Select atoms which are selected by both selectors
271 /** To use do something like
272  \code
273  read_pdb(name, m, AndPDBSelector(PPDBSelector(), WaterPDBSelector()));
274  \endcode
275  */
276 class AndPDBSelector : public PDBSelector {
278 
279  public:
280  bool get_is_selected(const std::string &pdb_line) const {
281  return a_->get_is_selected(pdb_line) && b_->get_is_selected(pdb_line);
282  }
285  : PDBSelector("AndPDBSelector%1%"), a_(a), b_(b) {}
286 };
287 
288 //! Select atoms which are selected by either selector
289 /** To use do something like
290  \code
291  read_pdb(name, m, OrPDBSelector(PPDBSelector(), WaterPDBSelector()));
292  \endcode
293  */
294 class OrPDBSelector : public PDBSelector {
296 
297  public:
298  bool get_is_selected(const std::string &pdb_line) const {
299  return a_->get_is_selected(pdb_line) || b_->get_is_selected(pdb_line);
300  }
303  : PDBSelector("OrPDBSelector%1%"), a_(a), b_(b) {}
304 };
305 
306 //! Select atoms which not selected by a given selector
307 /** To use do something like
308  \code
309  read_pdb(name, m, NotPDBSelector(PPDBSelector()));
310  \endcode
311  */
312 class NotPDBSelector : public PDBSelector {
314 
315  public:
316  bool get_is_selected(const std::string &pdb_line) const {
317  return !a_->get_is_selected(pdb_line);
318  }
320  NotPDBSelector(PDBSelector *a) : PDBSelector("NotPDBSelector%1%"), a_(a) {}
321 };
322 
323 /** @name PDB Reading
324  \anchor pdb_in
325  The read PDB methods produce a hierarchy that looks as follows:
326  - One Atom per ATOM or HETATM record in the PDB.
327  - All Atom particles have a parent which is a Residue.
328  - All Residue particles have a parent which is a Chain.
329 
330  Waters are currently dropped if they are ATOM records. This can be fixed.
331 
332  The read_pdb() functions should successfully parse all valid pdb files. It
333  can produce warnings on files which are not valid. It will attempt to read
334  such files, but all bets are off.
335 
336  When reading PDBs, PDBSelector objects can be used to choose to only process
337  certain record types. See the class documentation for more information.
338  When no PDB selector is supplied for reading, the
339  NonWaterPDBSelector is used.
340 
341  Set the IMP::LogLevel to VERBOSE to see details of parse errors.
342 */
343 //!@{
344 
345 inline PDBSelector* get_default_pdb_selector() {
346  return new NonWaterPDBSelector();
347 }
348 
349 /** Read a all the molecules in the first model of the
350  pdb file.
351  */
352 IMPATOMEXPORT Hierarchy read_pdb(base::TextInput input, kernel::Model *model,
353  PDBSelector *selector =
354  get_default_pdb_selector(),
355  bool select_first_model = true
356 #ifndef IMP_DOXYGEN
357  ,
358  bool no_radii = false
359 #endif
360  );
361 
362 /** Rewrite the coordinates of the passed hierarchy based
363  on the contents of the first model in the pdb file.
364 
365  The hierarchy must have been created by reading from a pdb
366  file and the atom numbers must correspond between the files.
367  These are not really checked.
368 
369  A ValueException is thrown if there are insufficient models
370  in the file.
371 
372  core::RigidMember particles are handled by updating the
373  core::RigidBody algebra::ReferenceFrame3D to align with the
374  loaded particles. Bad things will happen if the loaded coordinates
375  are not a rigid transform of the prior coordinates.
376  */
377 IMPATOMEXPORT void read_pdb(base::TextInput input, int model, Hierarchy h);
378 
379 /** Read all models from the pdb file.
380  */
381 IMPATOMEXPORT Hierarchies read_multimodel_pdb(base::TextInput input,
382  kernel::Model *model,
383  PDBSelector *selector =
384  get_default_pdb_selector()
385 #ifndef IMP_DOXYGEN
386  ,
387  bool noradii = false
388 #endif
389  );
390 
391 /** @name PDB Writing
392  \anchor pdb_out
393  The methods to write a PDBs expects a Hierarchy that looks as follows:
394  - all leaves are Atom particles
395  - all Atom particles have Residue particles as parents
396 
397  All Residue particles that have a Chain particle as an ancestor
398  are considered part of a protein, DNA or RNA, ones without are
399  considered heterogens.
400 
401  The functions produce files that are not valid PDB files,
402  eg only ATOM/HETATM lines are printed for all Atom particles
403  in the hierarchy. Complain if your favorite program can't read them and
404  we might fix it.
405 */
406 //!@{
407 
408 /** Write some atoms to a PDB.
409 */
410 IMPATOMEXPORT void write_pdb(const Selection &mhd, base::TextOutput out,
411  unsigned int model = 1);
412 
413 /** \brief Write a hierarchy to a pdb as C_alpha atoms.
414 
415  This method is used to write a non-atomic hierarchy into a pdb in a way
416  that can be read by most programs. If the leaves are Residue particles
417  then the index and residue type will be read from them. Otherwise default
418  values will be used so that each leaf ends up in a separate residue.
419 */
420 IMPATOMEXPORT void write_pdb_of_c_alphas(const Selection &mhd,
421  base::TextOutput out,
422  unsigned int model = 1);
423 
424 /** Write the hierarchies one per frame.
425 */
426 IMPATOMEXPORT void write_multimodel_pdb(const Hierarchies &mhd,
427  base::TextOutput out);
428 /** @} */
429 
430 #ifndef IMP_DOXYGEN
431 
432 /**
433  This function returns a string in PDB ATOM format
434 */
435 IMPATOMEXPORT std::string get_pdb_string(
436  const algebra::Vector3D &v, int index = -1, AtomType at = AT_C,
437  ResidueType rt = atom::ALA, char chain = ' ', int res_index = 1,
438  char res_icode = ' ', double occpancy = 1.00, double tempFactor = 0.00,
439  Element e = C);
440 
441 /**
442  This function returns a connectivity string in PDB format
443  /note The CONECT records specify connectivity between atoms for which
444  coordinates are supplied. The connectivity is described using
445  the atom serial number as found in the entry.
446  /note http://www.bmsc.washington.edu/CrystaLinks/man/pdb/guide2.2_frame.html
447 */
448 IMPATOMEXPORT std::string get_pdb_conect_record_string(int, int);
449 #endif
450 
451 /** \class WritePDBOptimizerState
452  This writes a PDB file at the specified interval during optimization.
453  If the file name contains %1% then a new file is written each time
454  with the %1% replaced by the index. Otherwise a new model is written
455  each time to the same file.
456 */
457 class IMPATOMEXPORT WritePDBOptimizerState : public kernel::OptimizerState {
458  std::string filename_;
460 
461  public:
464  std::string filename);
465  WritePDBOptimizerState(const atom::Hierarchies mh, std::string filename);
466 
467  protected:
468  virtual void do_update(unsigned int call) IMP_OVERRIDE;
469  virtual kernel::ModelObjectsTemp do_get_inputs() const IMP_OVERRIDE;
471 };
472 
473 IMPATOM_END_NAMESPACE
474 
475 #endif /* IMPATOM_PDB_H */
Select non water and non hydrogen atoms.
Definition: pdb.h:198
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:316
ChainPDBSelector(const std::string &chains, std::string name="ChainPDBSelector%1%")
The chain id can be any character in chains.
Definition: pdb.h:157
Define the elements used in IMP.
void write_pdb(const Selection &mhd, base::TextOutput out, unsigned int model=1)
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:55
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:171
Select all backbone (N,CA,C,O) ATOM records.
Definition: pdb.h:239
Select all non-water ATOM and HETATMrecords.
Definition: pdb.h:166
virtual void do_update(unsigned int)
Hierarchies read_multimodel_pdb(base::TextInput input, kernel::Model *model, PDBSelector *selector=)
Select all P (= phosphate) ATOM records.
Definition: pdb.h:256
Select atoms which are selected by both selectors.
Definition: pdb.h:276
A smart pointer to a ref-counted Object that is a class memeber.
Definition: base/Pointer.h:147
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:261
Shared optimizer state that is invoked upon commitment of new coordinates.
Select all N ATOM records.
Definition: pdb.h:118
void write_multimodel_pdb(const Hierarchies &mhd, base::TextOutput out)
virtual ModelObjectsTemp do_get_inputs() const
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:123
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:68
Select all C (not CA or CB) ATOM records.
Definition: pdb.h:104
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:146
Decorator for helping deal with a hierarchy of molecules.
Hierarchy read_pdb(base::TextInput input, kernel::Model *model, PDBSelector *selector=get_default_pdb_selector(), bool select_first_model=true)
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:81
Select all CB ATOM records.
Definition: pdb.h:90
Select all ATOM and HETATM records which are not alternatives.
Definition: pdb.h:50
Select all non-alternative ATOM records.
Definition: pdb.h:63
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:109
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:280
void write_pdb_of_c_alphas(const Selection &mhd, base::TextOutput out, unsigned int model=1)
Write a hierarchy to a pdb as C_alpha atoms.
IMP::kernel::Model Model
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:95
Shared optimizer state.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:224
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:202
Storage of a model, its restraints, constraints and particles.
Common base class for heavy weight IMP objects.
Classes to handle individual model particles.
#define IMP_OBJECTS(Name, PluralName)
Define the types for storing sets of objects.
Defines a selector that will pick every ATOM and HETATM record.
Definition: pdb.h:133
Various important macros for implementing decorators.
Select atoms which not selected by a given selector.
Definition: pdb.h:312
VectorD< 3 > Vector3D
Definition: VectorD.h:587
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:137
Select all non-water non-alternative ATOM and HETATM records.
Definition: pdb.h:220
Select atoms which are selected by either selector.
Definition: pdb.h:294
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:244
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:298
Handling of file input/output.
bool get_is_selected(const std::string &pdb_line) const
Return true if the line should be processed.
Definition: pdb.h:190
Select all hydrogen ATOM and HETATM records.
Definition: pdb.h:183
Select all CA ATOM records.
Definition: pdb.h:76
Element
The various elements currently supported/known.
Definition: element.h:23
Select which atoms to read from a PDB file.
Definition: pdb.h:39
A set of useful functionality on IMP::atom::Hierarchy decorators.
Select all ATOM and HETATMrecords with the given chain ids.
Definition: pdb.h:144
Class for storing model, its restraints, constraints, and particles.