IMP logo
IMP Reference Guide  develop.50fdd7fa33,2025/09/05
The Integrative Modeling Platform
imp_tools.py
1 import IMP
2 import IMP.atom
3 import IMP.core
4 import IMP.pmi
5 import IMP.pmi.topology
6 import numpy
7 from . import config
8 
10  IMP.atom.get_data_path("par.lib"))
11 
12 
13 def get_backbone_particles(pdb):
14  m = IMP.Model()
16  return IMP.atom.get_leaves(rh)
17 
18 
19 def compute_ccc_of_particle_set(dmap, ps, resolution, threshold=2.0):
20 
21  # Get a cropped map from the particle set
22  dc = dmap.get_cropped(ps, threshold**2)
23  dc_t = IMP.em.get_threshold_map(dc, 0.0)
24  dc_t.std_normalize()
25 
27  dmap.get_header().get_spacing())
28  smap_c = IMP.em.CoarseL2Norm.get_density_from_particle(
29  smap, ps, resolution)
30  smap_tc = IMP.em.get_threshold_map(smap_c, 0.0)
31 
32  # Normalize the intensities via histogram matching
33  IMP.em.CoarseL2Norm.get_normalized_intensities(smap_tc, dc_t, resolution)
34  # mm = IMP.em.approximate_molecular_mass(dc_t, 0.0)
35  ccc = IMP.em.CoarseCC()
36  try:
37  return ccc.cross_correlation_coefficient(dc_t, smap_tc, 0.0, True)
38  except: # noqa: E722
39  return -1
40 
41 
42 def make_reference_density_map():
44  config.res_bounding_points[0], config.res_bounding_points[1])
45 
46  dmap = IMP.em.create_density_map(bbox, config.ref_voxel_size)
47  return dmap
48 
49 
50 def get_voxel_coordinates(dmap):
51  voxels = {}
52  for v in range(dmap.get_number_of_voxels()):
53  point = dmap.get_location_by_voxel(v)
54  voxels[v] = (point[0], point[1], point[2])
55  return voxels
56 
57 
58 def mutate_residue(residue_particle, new_aa):
59  # Given a residue and a new amino acid, change the hierarchy to
60  rph = IMP.atom.Hierarchy(residue_particle)
61  res = IMP.atom.Residue(residue_particle)
62  atoms = rph.get_children()
63 
64  # Delete all non backbone atoms
65  for a in atoms:
66  if IMP.atom.Atom(a).get_atom_type() not in [IMP.atom.AT_CA,
67  IMP.atom.AT_N,
68  IMP.atom.AT_O,
69  IMP.atom.AT_C]:
70  p = a.get_particle()
71  rph.remove_child(a)
72  residue_particle.get_model().remove_particle(p)
73 
74  # Get new residue atoms
75  res.set_residue_type(IMP.atom.ResidueType(new_aa))
76  res.set_name(new_aa)
77 
78  # Use ff to add new residue atoms
79  topology = ff.create_topology(rph)
80  topology.add_atom_types(rph)
81  topology.add_missing_atoms(rph)
82  topology.add_coordinates(rph)
83 
84  bonds = topology.add_bonds(rph)
85  ff.create_angles(bonds)
86  ff.create_dihedrals(bonds)
87  topology.add_impropers(rph)
88 
89 
90 def parse_configuration(config_file):
91 
92  f = open(config_file, "r")
93  config = {}
94  for line in f.readlines():
95  if len(line.strip()) == 0 or line[0] == "#":
96  continue
97 
98  if "=" in line:
99  key = line.split("=")[0].strip()
100  value = line.split("=")[1].strip().split(" ")[0].strip()
101  value = value.split("#")[0].strip()
102 
103  # If there is a period, it's a float
104  if "." in value and "/" not in value:
105  config[key] = float(value)
106 
107  # Else see if it should be an int
108  else:
109  try:
110  config[key] = int(value)
111  except: # noqa: E722
112  config[key] = value
113  return config
114 
115 
116 def get_psipred_dictionary_from_files(files):
117  # Given a set of files .ss2 with name something.A.something
118  # where A is the chain name
119  # Open each up and return a dictionary with key chain_id and
120  # a subdictionary of residue numbers and HEC tuples
121 
122  psipred = {}
123  for pp in files:
124 
125  chain = pp.split(".")[-2]
126  print("DHSIUDHSA", pp, chain)
127  psipred[chain] = open_ss2_file(pp)
128  return psipred
129 
130 
131 def open_ss2_file(filename):
132  f = open(filename, "r")
133 
134  outdict = {}
135  for line in f.readlines():
136  if line.strip() == "" or line[0] == "#":
137  continue
138  fields = line.split()
139  resid = int(fields[0])
140  outdict[resid] = (float(fields[4]), float(fields[3]),
141  float(fields[5].strip()))
142 
143  return outdict
144 
145 
146 def compare_sequences_and_return_offset(fasta, query):
147  # Sequences should be in dictionary form {resnum:res}
148  # Returns offset with an integer offset if they are the same
149  # Returns False if they are not the same
150  # Gaps are ok
151 
152  if compare_sequences(fasta, query):
153  return 0
154 
155  # Available range of offsets i
156  minoffset = 1-min(query.keys())
157  maxoffset = max(fasta.keys())-max(query.keys())
158 
159  for i in range(minoffset, maxoffset+1):
160  if compare_sequences(fasta, query, i):
161  return i
162  return False
163 
164 
165 def compare_sequences(seq1, seq2, offset=0):
166  minres = numpy.min(list(seq1.keys()))
167  maxres = numpy.max(list(seq1.keys()))
168  for r in range(minres, maxres+1):
169  if r+offset not in seq2.keys():
170  pass
171  elif seq1[r] != seq2[r+offset]:
172  return False
173  return True
174 
175 
176 def read_sequences_from_rcsb(sequence_file):
177  sequences = IMP.pmi.topology.Sequences(sequence_file)
178  seqs = {}
179  for s in sequences.sequences.keys():
180  s_cs = s.split("_")[-1].split(" ")[0].strip()
181  '''
182  if "Chains" in s_cs:
183  chains = s_cs.split()[1].split(",")
184  else:
185  chains = s_cs.split()
186  for c in chains:
187  seqs[c]=sequences[s]
188  '''
189  seqs[s_cs] = sequences[s]
190  return seqs
191 
192 
193 def read_sequences(sequence_file):
194  sequences = IMP.pmi.topology.Sequences(sequence_file)
195  seqs = {}
196  for s in sequences.sequences.keys():
197  s_cs = s.split("|")[-1].strip()
198  if "Chains" in s_cs:
199  chains = s_cs.split()[1].split(",")
200  else:
201  chains = s_cs.split()
202  for c in chains:
203  seqs[c] = sequences[s]
204  return seqs
205 
206 
207 def get_pdb_offset(pdb, sequences):
208  # Find the sequence offset between the pdb file and fasta file
209  # The fasta file is always the reference
210 
211  alpha = IMP.pmi.alphabets.amino_acid
212  # Make an IMP hierarchy and compare that sequence to the fasta
213  m = IMP.Model()
214  root_hier = IMP.atom.read_pdb(pdb, m, IMP.atom.ATOMPDBSelector())
215  pdbseq = {}
216  pdb_offsets = {}
217 
218  # First, get all the residues in each chain.
219  for c in IMP.atom.get_by_type(root_hier, IMP.atom.CHAIN_TYPE):
220  chain = IMP.atom.Chain(c).get_id()
221  pdbseq[chain] = {}
222 
223  # Put PDB residues into a dictionary
224  for r in IMP.atom.get_by_type(c, IMP.atom.RESIDUE_TYPE):
225  res = IMP.atom.Residue(r)
226  rt = res.get_residue_type().get_string()
227  pdbseq[chain][res.get_index()] = \
228  alpha.get_one_letter_code_from_residue_type(rt)
229 
230  # Now, compare the fasta to PDB
231  for chain in pdbseq.keys():
232  offset = compare_sequences_and_return_offset(
233  string_to_dict(sequences[chain]), pdbseq[chain])
234  pdb_offsets[chain] = offset
235 
236  return pdb_offsets
237 
238 
239 def string_to_dict(string, start=1):
240  seqdict = {}
241  for i in range(len(string)):
242  seqdict[i+start] = string[i]
243  return seqdict
244 
245 
246 def dict_to_string(seqdict):
247  alpha = IMP.pmi.alphabets.amino_acid
248  lastres = max(seqdict.keys())
249  seq = ""
250  for i in range(lastres):
251  if i+1 in seqdict.keys():
252  if len(seqdict[i+1]) == 3:
253  seq += alpha.get_one_letter_code_from_residue_type(
254  seqdict[i+1])
255  else:
256  seq += seqdict[i+1]
257  else:
258  seq += "-"
259  return seq
Select all backbone (N,CA,C,O) ATOM records.
Definition: pdb.h:375
Set of Python classes to create a multi-state, multi-resolution IMP hierarchy.
DensityMap * get_threshold_map(const DensityMap *orig_map, float threshold)
Return a map with 0 for all voxels below the threshold.
void read_pdb(TextInput input, int model, Hierarchy h)
CHARMM force field parameters.
Class for storing model, its restraints, constraints, and particles.
Definition: Model.h:86
Select all non-alternative ATOM records.
Definition: pdb.h:128
The standard decorator for manipulating molecular structures.
DensityMap * create_density_map(const IMP::algebra::GridD< 3, S, V, E > &arg)
Create a density map from an arbitrary IMP::algebra::GridD.
Definition: DensityMap.h:678
A decorator for a particle representing an atom.
Definition: atom/Atom.h:238
The type for a residue.
algebra::BoundingBoxD< 3 > get_bounding_box(const DensityMap *m)
Definition: DensityMap.h:509
A decorator for a residue.
Definition: Residue.h:137
Basic functionality that is expected to be used by a wide variety of IMP users.
Store info for a chain of a protein.
Definition: Chain.h:61
Python classes to represent, score, sample and analyze models.
A dictionary-like wrapper for reading and storing sequence data.
std::string get_data_path(std::string file_name)
Return the full path to one of this module's data files.
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)