IMP logo
IMP Reference Guide  develop.50fdd7fa33,2025/09/05
The Integrative Modeling Platform
get_database_for_one_emdb_using_parts.py
1 import glob
2 import os
3 import argparse
4 import IMP
5 import IMP.em
6 import IMP.core
7 from . import tools
8 from . import config
9 from . import imp_tools
10 
11 '''
12 This script takes in an MRC file and a set of PDB structure_element files and
13 extracts the density values around the residue as defined in config.py
14 '''
15 
16 IMP.set_log_level(IMP.SILENT)
17 
18 parser = argparse.ArgumentParser(
19  description='Given an MRC file and set of PDB structure_element files, '
20  'extracts the density values around the residue')
21 parser.add_argument(
22  "--database_home", dest="database_home", type=str,
23  help="Directory containing all data files used in the protocol",
24  default=".")
25 parser.add_argument(
26  "--reference_pdb", dest="refpdb", type=str,
27  help="Reference PDB", default="reference/ref.pdb")
28 parser.add_argument('emdb', type=str, help='EMDB id')
29 parser.add_argument('database_file', type=str,
30  help='Name of the database file to write')
31 parser.add_argument('resolution', type=float,
32  help='Resolution of the map')
33 
34 args = parser.parse_args()
35 
36 # EMDBID
37 emdb = args.emdb
38 
39 # name of the database file to write
40 database_file = args.database_file
41 
42 # Name of the EM map and pdb to use (in 0system)
43 map_file = os.path.join(args.database_home, emdb, "0system",
44  "emdb_normalized_new.map")
45 emdb_path = os.path.join(args.database_home, emdb)
46 # Get all SE pdb files
47 pdbs = glob.glob(emdb_path+"/1structure_elements/*.pdb")
48 print(pdbs)
49 if len(pdbs) == 0:
50  print("No structure elements for this system")
51  exit()
52 try:
53  xml = config.get_xml(emdb) # Get header file
54  resolution = tools.get_resolution_from_xml(xml)
55 except: # noqa: E722
56  # resolution of the map
57  resolution = args.resolution
58 
59 # Read the reference pdb
60 m = IMP.Model()
61 ref_mh = IMP.atom.read_pdb(args.refpdb, m)
62 
63 resis = IMP.atom.get_by_type(ref_mh, IMP.atom.RESIDUE_TYPE)
64 refr = IMP.atom.Residue(resis[0])
65 psref = [IMP.atom.get_atom(refr, IMP.atom.AT_C),
66  IMP.atom.get_atom(refr, IMP.atom.AT_CA),
67  IMP.atom.get_atom(refr, IMP.atom.AT_N),
68  IMP.atom.get_atom(refr, IMP.atom.AT_CB)]
69 xyzref = [IMP.core.XYZ(p).get_coordinates() for p in psref]
70 xyzrefcb = xyzref[-1]
71 
72 # Make reference density file; the dimension of the bonding box is defined
73 # in the config.py file
74 res_bounding_box = IMP.algebra.BoundingBox3D(
75  IMP.algebra.Vector3D(config.res_bounding_points[0]),
76  IMP.algebra.Vector3D(config.res_bounding_points[1]))
77 
78 # Open the experimental map
80 try:
81  mrc = IMP.em.read_map(map_file, mrw)
82 except: # noqa: E722
83  raise Exception("MRC, "+map_file+", cannot be read")
84 
85 mrc.get_header_writable().set_resolution(resolution)
86 
87 # resample density if not the same
88 if mrc.get_header().get_spacing() != config.ref_voxel_size:
89  mrc = IMP.em.get_resampled(mrc, config.ref_voxel_size)
90 
91 ##############################################
92 # Loop over structure elements
93 ##############################################
94 of = open(database_file, "w")
95 for pdb in pdbs:
96  # Get STRIDE secondary structure designation
97  pdbname = pdb.split('/')[-1].split('.')[0]
98  # SS = pdb[-5]
99  if pdbname.startswith('h'):
100  SS = 'H'
101  else:
102  SS = 'S'
103 
104  # ipa = is_poly_ala(pdb)
105  ilc = tools.is_length_correct_for_parts(pdb)
106  # Ignore poly-alanines or if length is not correct
107  if not ilc:
108  print("SE", os.path.basename(pdb),
109  "skipped: | is_length_correct:", ilc)
110  continue
111  else:
112  print("SE", os.path.basename(pdb), "getting residue densities")
113 
114  # Open model coordinates
115  mh = IMP.atom.read_pdb(pdb, m)
116  residues = IMP.atom.get_by_type(mh, IMP.atom.RESIDUE_TYPE)
117 
118  skip = False
119 
120  # Ensure there are CBs in here, otherwise you get a segfault
121  for res in residues:
122  ats = [IMP.atom.Atom(a).get_atom_type() for a in res.get_children()]
123  print(res, ats)
124  if (IMP.atom.AT_CB not in ats and
125  IMP.atom.Residue(res).get_residue_type().get_string()
126  != "GLY"):
127  skip = True
128 
129  if skip:
130  continue
131 
132  # Cycle over all of the residues in the pdb
133  for r in residues:
134  # print(r)
135  res = IMP.atom.Residue(r)
136  rid = res.get_index()
137  chain = IMP.atom.Chain(r.get_parent()).get_id()
138  resname = res.get_residue_type().get_string()
139 
140  # Mutate GLY to ALA so we have a CB atom to superimpose
141  if resname == "GLY":
142  imp_tools.mutate_residue(res, "ALA")
143 
144  C_atom = IMP.atom.get_atom(res, IMP.atom.AT_C)
145  CA_atom = IMP.atom.get_atom(res, IMP.atom.AT_CA)
146  N_atom = IMP.atom.get_atom(res, IMP.atom.AT_N)
147 
148  psx = [C_atom, CA_atom, N_atom, IMP.atom.get_atom(res, IMP.atom.AT_CB)]
149 
150  psbb = [C_atom, CA_atom, N_atom, IMP.atom.get_atom(res, IMP.atom.AT_O)]
151 
152  xyzx = [IMP.core.XYZ(p).get_coordinates() for p in psx]
153  # Align backbone atoms
155  xyzx, xyzref)
156  IMP.atom.transform(mh, xform1)
157  m.update()
158  xyzx = [IMP.core.XYZ(p).get_coordinates() for p in psx]
159 
160  # Superimpose CA residues
161  xform2 = IMP.algebra.Transformation3D(xyzref[1]-xyzx[1])
162  IMP.atom.transform(mh, xform2)
163  m.update()
164 
165  xform_tot = xform1*xform2
166 
167  res_map = IMP.em.create_density_map(res_bounding_box,
168  config.ref_voxel_size)
169  for v in range(res_map.get_number_of_voxels()):
170  point = res_map.get_location_by_voxel(v)
171 
172  # Find where this point in the EM map via xform
173  xf_point = xform_tot.get_inverse().get_transformed(point)
174  # Get the density at this point
175  val = IMP.em.get_density(mrc, xf_point)
176 
177  # Set the new map value to this
178  res_map.set_value(point[0], point[1], point[2], val)
179 
180  # Move the structure back to where it came from
181  IMP.atom.transform(mh, xform_tot.get_inverse())
182  # Get sum and max of densities
183  sc_string = tools.get_voxel_value_string(res_map)
184  print("current PDB name", pdbname)
185  outstring = tools.catstring([emdb, resolution, pdbname, chain, rid,
186  resname, SS]) + " " + sc_string
187  # print(outstring)
188  of.write(outstring+"\n")
189 
190 of.close()
Simple 3D transformation class.
Atom get_atom(Residue rd, AtomType at)
Return a particle atom from the residue.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
void transform(Hierarchy h, const algebra::Transformation3D &tr)
Transform a hierarchy. This is aware of rigid bodies.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:678
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Definition: XYZ.h:30
void set_log_level(LogLevel l)
Set the current global log level.
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
VectorD< 3 > Vector3D
Definition: VectorD.h:408
Store info for a chain of a protein.
Definition: Chain.h:61
Transformation3D get_transformation_aligning_first_to_second(Vector3Ds a, Vector3Ds b)