1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
5 from __future__
import print_function
17 from collections
import defaultdict
24 """Container for restraints shown in the RMF file and in Chimera"""
26 def get_static_info(self):
29 ri.add_string(
"type",
"IMP.pmi.CrossLinkingMassSpectrometryRestraint")
30 ri.add_float(
"linker_length", self.length)
31 ri.add_float(
"slope", self.slope)
32 ri.add_filename(
"filename", self.filename
or "")
34 ri.add_string(
"linker_auth_name", self.linker.auth_name)
35 if self.linker.smiles:
36 ri.add_string(
"linker_smiles", self.linker.smiles)
41 """Setup cross-link distance restraints from mass spectrometry data.
42 The noise in the data and the structural uncertainty of cross-linked
43 amino-acids is inferred using Bayes theory of probability
44 @note Wraps an IMP::isd::CrossLinkMSRestraint
46 _include_in_rmf =
True
48 def __init__(self, root_hier, database=None, length=10.0, resolution=None,
49 slope=0.02, label=
None, filelabel=
"None",
50 attributes_for_label=
None, linker=
None, weight=1.):
52 @param root_hier The canonical hierarchy containing all the states
53 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
54 object that contains the cross-link dataset
55 @param length maximal cross-linker length (including the residue
57 @param resolution what representation resolution should the cross-link
58 restraint be applied to.
59 @param slope The slope of a distance-linear scoring function that
60 funnels the score when the particles are
61 too far away. Suggested value 0.02.
62 @param label the extra text to label the restraint so that it is
63 searchable in the output
64 @param filelabel automatically generated file containing
65 missing/included/excluded
66 cross-links will be labeled using this text
67 @param attributes_for_label
68 @param weight Weight of restraint
69 @param linker description of the chemistry of the linker itself, as
70 an ihm.ChemDescriptor object
71 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
72 Common cross-linkers can be found in the `ihm.cross_linkers`
76 model = root_hier.get_model()
78 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
79 model, weight=weight, label=label,
80 restraint_set_class=_DataRestraintSet)
83 raise Exception(
"You must pass a database")
86 "CrossLinkingMassSpectrometryRestraint: database should "
87 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
88 self.database = database
90 if resolution == 0
or resolution
is None:
91 raise Exception(
"You must pass a resolution and it can't be zero")
93 indb = open(
"included." + filelabel +
".xl.db",
"w")
94 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
95 midb = open(
"missing." + filelabel +
".xl.db",
"w")
100 "No linker chemistry specified; this will be guessed from the "
101 "label (%s). It is recommended to specify a linker as an "
102 "ihm.ChemDescriptor object (see the "
103 "CrossLinkingMassSpectrometryRestraint documentation)."
105 self.rs.set_name(self.rs.get_name() +
"_Data")
106 self.rspsi = self._create_restraint_set(
"PriorPsi")
107 self.rssig = self._create_restraint_set(
"PriorSig")
108 self.rslin = self._create_restraint_set(
"Linear")
110 self.rs.filename = self.database.name
111 self.rs.length = length
112 self.rs.slope = slope
113 self.rs.linker = linker
117 self.linear.set_slope(0.0)
120 self.psi_is_sampled =
True
121 self.sigma_is_sampled =
True
122 self.psi_dictionary = {}
123 self.sigma_dictionary = {}
125 self.outputlevel =
"low"
129 xl_groups = [p.get_cross_link_group(self)
130 for p, state
in IMP.pmi.tools._all_protocol_outputs(
134 copies_to_add = defaultdict(int)
135 print(
'gathering copies')
136 for xlid
in self.database.xlid_iterator():
137 for xl
in self.database[xlid]:
138 r1 = xl[self.database.residue1_key]
139 c1 = xl[self.database.protein1_key]
140 r2 = xl[self.database.residue2_key]
141 c2 = xl[self.database.protein2_key]
142 for c, r
in ((c1, r1), (c2, r2)):
143 if c
in copies_to_add:
146 root_hier, state_index=0, molecule=c, residue_index=r,
147 resolution=resolution).get_selected_particles()
149 copies_to_add[c] = len(sel)-1
151 for molname
in copies_to_add:
152 if copies_to_add[molname] == 0:
155 self.database.protein1_key, operator.eq, molname)
156 self.database.set_value(self.database.protein1_key,
159 self.database.protein2_key, operator.eq, molname)
160 self.database.set_value(self.database.protein2_key,
162 for ncopy
in range(copies_to_add[molname]):
163 self.database.clone_protein(
'%s.0' % molname,
164 '%s.%i' % (molname, ncopy + 1))
165 print(
'done pmi2 prelims')
167 for xlid
in self.database.xlid_iterator():
168 new_contribution =
True
169 for xl
in self.database[xlid]:
171 r1 = xl[self.database.residue1_key]
172 c1 = xl[self.database.protein1_key]
173 r2 = xl[self.database.residue2_key]
174 c2 = xl[self.database.protein2_key]
181 name1, copy1 = c1.split(
'.')
183 name2, copy2 = c2.split(
'.')
187 (p[0].add_experimental_cross_link(r1, name1, r2, name2,
190 IMP.pmi.tools._all_protocol_outputs(root_hier),
193 iterlist = range(len(IMP.atom.get_by_type(
194 root_hier, IMP.atom.STATE_TYPE)))
195 for nstate, r
in enumerate(iterlist):
197 xl[self.database.state_key] = nstate
198 xl[self.database.data_set_name_key] = self.label
201 root_hier, state_index=nstate, molecule=name1,
202 copy_index=int(copy1), residue_index=r1,
203 resolution=resolution).get_selected_particles()
205 root_hier, state_index=nstate, molecule=name2,
206 copy_index=int(copy2), residue_index=r2,
207 resolution=resolution).get_selected_particles()
214 "residue %d of chain %s selects multiple "
215 "particles %s" % (r1, c1, str(ps1)))
218 "CrossLinkingMassSpectrometryRestraint: "
219 "residue %d of chain %s is not there" % (r1, c1),
221 midb.write(str(xl) +
"\n")
226 "residue %d of chain %s selects multiple "
227 "particles %s" % (r2, c2, str(ps2)))
230 "CrossLinkingMassSpectrometryRestraint: "
231 "residue %d of chain %s is not there" % (r2, c2),
233 midb.write(str(xl) +
"\n")
239 if p1 == p2
and r1 == r2:
241 "CrossLinkingMassSpectrometryRestraint: "
242 "same particle and same residue, skipping "
247 print(
"generating a new cross-link restraint")
248 new_contribution =
False
253 restraints.append(dr)
255 if self.database.sigma1_key
not in xl.keys():
257 xl[self.database.sigma1_key] = sigma1name
259 sigma1name = xl[self.database.sigma1_key]
262 if self.database.sigma2_key
not in xl.keys():
264 xl[self.database.sigma2_key] = sigma2name
266 sigma2name = xl[self.database.sigma2_key]
269 if self.database.psi_key
not in xl.keys():
271 xl[self.database.psi_key] = psiname
273 psiname = xl[self.database.psi_key]
277 p1i = p1.get_particle_index()
279 p2i = p2.get_particle_index()
282 xl[
"Particle_sigma1"] = sigma1
284 xl[
"Particle_sigma2"] = sigma2
286 xl[
"Particle_psi"] = psi
288 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
291 print(
"--------------")
292 print(
"CrossLinkingMassSpectrometryRestraint: generating "
293 "cross-link restraint between")
294 print(
"CrossLinkingMassSpectrometryRestraint: residue %d "
295 "of chain %s and residue %d of chain %s"
297 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 "
298 "%s sigma2 %s psi %s"
299 % (sigma1name, sigma2name, psiname))
300 print(
"CrossLinkingMassSpectrometryRestraint: between "
301 "particles %s and %s"
302 % (p1.get_name(), p2.get_name()))
303 print(
"==========================================\n")
304 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
307 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
308 sigma1, sigma2, psi, ex_xl[1])
315 xl[
"IntraRigidBody"] =
True
317 xl[
"IntraRigidBody"] =
False
319 xl_label = self.database.get_short_cross_link_string(xl)
320 xl[
"ShortLabel"] = xl_label
321 dr.set_name(xl_label)
326 pr.set_name(xl_label)
327 self.rslin.add_restraint(pr)
329 self.xl_list.append(xl)
331 indb.write(str(xl) +
"\n")
333 if len(self.xl_list) == 0:
335 "CrossLinkingMassSpectrometryRestraint: no cross-link "
337 self.xl_restraints = restraints
339 self.rs.add_restraint(lw)
341 def __set_dataset(self, ds):
342 self.database.dataset = ds
343 dataset = property(
lambda self: self.database.dataset, __set_dataset)
346 """ get the hierarchy """
350 """ get the restraint set """
354 """ get the restraints in a list """
355 return self.xl_restraints
358 """ get the dummy restraints to be displayed in the rmf file """
359 return self.rs, self.rssig, self.rspsi
362 """ Get a list of tuples containing the particle pairs """
364 for i
in range(len(self.pairs)):
365 p0 = self.pairs[i][0]
366 p1 = self.pairs[i][1]
367 ppairs.append((p0, p1))
371 """ Set the output level of the output """
372 self.outputlevel = level
375 """ Switch on/off the sampling of psi particles """
376 self.psi_is_sampled = is_sampled
379 """ Switch on/off the sampling of sigma particles """
380 self.sigma_is_sampled = is_sampled
383 """ This is called internally. Creates a nuisance
384 on the structural uncertainty """
385 if name
in self.sigma_dictionary:
386 return self.sigma_dictionary[name][0]
389 sigmaminnuis = 0.0000001
390 sigmamaxnuis = 1000.0
394 sigma = IMP.pmi.tools.SetupNuisance(
395 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
396 self.sigma_is_sampled, name=name).get_particle()
397 self.sigma_dictionary[name] = (
400 self.sigma_is_sampled)
401 self.rssig.add_restraint(
411 """ This is called internally. Creates a nuisance
412 on the data uncertainty """
413 if name
in self.psi_dictionary:
414 return self.psi_dictionary[name][0]
417 psiminnuis = 0.0000001
418 psimaxnuis = 0.4999999
422 psi = IMP.pmi.tools.SetupNuisance(
423 self.model, psiinit, psiminnuis, psimaxnuis,
424 self.psi_is_sampled, name=name).get_particle()
425 self.psi_dictionary[name] = (psi, psitrans, self.psi_is_sampled)
427 self.rspsi.add_restraint(
435 """Get the output of the restraint to be used by the IMP.pmi.output
437 output = super(CrossLinkingMassSpectrometryRestraint,
440 for xl
in self.xl_list:
442 xl_label = xl[
"ShortLabel"]
446 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
447 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
451 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
454 for psiname
in self.psi_dictionary:
455 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
456 str(psiname) + self._label_suffix] = str(
457 self.psi_dictionary[psiname][0].get_scale())
459 for sigmaname
in self.sigma_dictionary:
460 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
461 str(sigmaname) + self._label_suffix] = str(
462 self.sigma_dictionary[sigmaname][0].get_scale())
467 """ Get all need data to construct a mover in IMP.pmi.dof class"""
469 if self.sigma_is_sampled:
470 for sigmaname
in self.sigma_dictionary:
472 "Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" \
473 + str(sigmaname) +
"_" + self.label
474 particle = self.sigma_dictionary[sigmaname][0]
475 maxstep = (self.sigma_dictionary[sigmaname][1])
479 mv.set_name(mover_name)
482 if self.psi_is_sampled:
483 for psiname
in self.psi_dictionary:
485 "Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + \
486 str(psiname) +
"_" + self.label
487 particle = self.psi_dictionary[psiname][0]
488 maxstep = (self.psi_dictionary[psiname][1])
492 mv.set_name(mover_name)
498 """ Get the particles to be sampled by the IMP.pmi.sampler object """
500 if self.sigma_is_sampled:
501 for sigmaname
in self.sigma_dictionary:
502 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
503 str(sigmaname) + self._label_suffix] =\
504 ([self.sigma_dictionary[sigmaname][0]],
505 self.sigma_dictionary[sigmaname][1])
507 if self.psi_is_sampled:
508 for psiname
in self.psi_dictionary:
509 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
510 str(psiname) + self._label_suffix] = \
511 ([self.psi_dictionary[psiname][0]],
512 self.psi_dictionary[psiname][1])
518 """Setup cross-link distance restraints at atomic level
519 The "atomic" aspect is that it models the particle uncertainty with
520 a Gaussian. The noise in the data and the structural uncertainty of
521 cross-linked amino-acids is inferred using Bayes' theory of probability
522 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
525 _include_in_rmf =
True
527 def __init__(self, root_hier, xldb, atom_type="NZ", length=10.0,
528 slope=0.01, nstates=
None, label=
None,
529 nuisances_are_optimized=
True, sigma_init=5.0,
530 psi_init=0.01, one_psi=
True, filelabel=
None, weight=1.):
532 Automatically creates one "sigma" per cross-linked residue and one
534 Other nuisance options are available.
535 @note Will return an error if the data+extra_sel don't specify
536 two particles per XL pair.
537 @param root_hier The root hierarchy on which you'll do selection
538 @param xldb CrossLinkDataBase object
539 @param atom_type Can either be "NZ" or "CA"
540 @param length The XL linker length
541 @param slope Linear term to add to the restraint function to
543 @param nstates The number of states to model. Defaults to the
544 number of states in root.
545 @param label The output label for the restraint
546 @param nuisances_are_optimized Whether to optimize nuisances
547 @param sigma_init The initial value for all the sigmas
548 @param psi_init The initial value for all the psis
549 @param one_psi Use a single psi for all restraints (if False,
551 @param filelabel automatically generated file containing
552 missing/included/excluded
553 cross-links will be labeled using this text
554 @param weight Weight of restraint
559 self.root = root_hier
560 rname =
"AtomicXLRestraint"
561 super(AtomicCrossLinkMSRestraint, self).
__init__(
562 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
566 self.sigma_is_sampled = nuisances_are_optimized
567 self.psi_is_sampled = nuisances_are_optimized
569 if atom_type
not in (
"NZ",
"CA"):
570 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
571 self.atom_type = atom_type
572 self.nstates = nstates
574 self.nstates = len(IMP.atom.get_by_type(self.root,
575 IMP.atom.STATE_TYPE))
576 elif nstates != len(IMP.atom.get_by_type(self.root,
577 IMP.atom.STATE_TYPE)):
578 print(
"Warning: nstates is not the same as the number of states "
581 self.rs_psi = self._create_restraint_set(
"psi")
582 self.rs_sig = self._create_restraint_set(
"sigma")
583 self.rs_lin = self._create_restraint_set(
"linear")
585 self.psi_dictionary = {}
586 self.sigma_dictionary = {}
588 self.particles = defaultdict(set)
589 self.one_psi = one_psi
590 self.bonded_pairs = []
592 print(
'creating a single psi for all XLs')
594 print(
'creating one psi for each XL')
597 if filelabel
is not None:
598 indb = open(
"included." + filelabel +
".xl.db",
"w")
599 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
600 midb = open(
"missing." + filelabel +
".xl.db",
"w")
603 self._create_sigma(
'sigma', sigma_init)
605 self._create_psi(
'psi', psi_init)
607 for xlid
in self.xldb.xlid_iterator():
608 self._create_psi(xlid, psi_init)
612 for xlid
in self.xldb.xlid_iterator():
615 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
617 psip = self.psi_dictionary[
618 unique_id][0].get_particle_index()
624 num_contributions = 0
627 for nstate
in self.nstates:
628 for xl
in self.xldb[xlid]:
629 r1 = xl[self.xldb.residue1_key]
630 c1 = xl[self.xldb.protein1_key].strip()
631 r2 = xl[self.xldb.residue2_key]
632 c2 = xl[self.xldb.protein2_key].strip()
637 self.root, state_index=nstate,
640 residue_index=r1).get_selected_particles()
642 self.root, state_index=nstate,
645 residue_index=r2).get_selected_particles()
647 warnings.warn(
"AtomicXLRestraint: residue %d of "
648 "chain %s is not there" % (r1, c1),
650 if filelabel
is not None:
651 midb.write(str(xl) +
"\n")
655 warnings.warn(
"AtomicXLRestraint: residue %d of "
656 "chain %s is not there" % (r2, c2),
658 if filelabel
is not None:
659 midb.write(str(xl) +
"\n")
664 num1=num_xls_per_res[str(xl.r1)]
665 num2=num_xls_per_res[str(xl.r2)]
666 if num1<sig_threshold:
670 if num2<sig_threshold:
675 sig1 = self.sigma_dictionary[
'sigma'][0]
676 sig2 = self.sigma_dictionary[
'sigma'][0]
679 for p1, p2
in itertools.product(ps1, ps2):
682 self.particles[nstate] |= set((p1, p2))
684 [p1.get_index(), p2.get_index()],
685 [sig1.get_particle_index(),
686 sig2.get_particle_index()])
687 num_contributions += 1
689 if num_contributions > 0:
690 print(
'XL:', xlid,
'num contributions:', num_contributions)
693 raise Exception(
"You didn't create any XL restraints")
694 print(
'created', len(xlrs),
'XL restraints')
695 rname = self.rs.get_name()
697 self.rs.set_name(rname)
698 self.rs.set_weight(self.weight)
699 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
701 def get_hierarchy(self):
704 def _create_sigma(self, name, sigmainit):
705 """ This is called internally. Creates a nuisance
706 on the structural uncertainty """
707 if name
in self.sigma_dictionary:
708 return self.sigma_dictionary[name][0]
710 sigmaminnuis = 0.0000001
711 sigmamaxnuis = 1000.0
715 sigma = IMP.pmi.tools.SetupNuisance(
716 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
717 self.sigma_is_sampled).get_particle()
718 self.sigma_dictionary[name] = (
719 sigma, sigmatrans, self.sigma_is_sampled)
721 self.model, sigma, 1000000000.0, sigmamax, sigmamin))
724 def _create_psi(self, name, psiinit):
725 """ This is called internally. Creates a nuisance
726 on the data uncertainty """
727 if name
in self.psi_dictionary:
728 return self.psi_dictionary[name][0]
730 psiminnuis = 0.0000001
731 psimaxnuis = 0.4999999
735 psi = IMP.pmi.tools.SetupNuisance(self.model,
739 self.psi_is_sampled).get_particle()
740 self.psi_dictionary[name] = (
745 self.rs_psi.add_restraint(
757 """ create dummy harmonic restraints for each XL but don't add to model
758 Makes it easy to see each contribution to each XL in RMF
760 class MyGetRestraint(object):
764 def get_restraint_for_rmf(self):
770 for nxl
in range(self.get_number_of_restraints()):
771 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
772 self.rs.get_restraint(nxl))
774 for ncontr
in range(xl.get_number_of_contributions()):
775 ps = xl.get_contribution(ncontr)
777 hps, [self.model.get_particle(p)
for p
in ps],
778 'xl%i_contr%i' % (nxl, ncontr))
780 dummy_rs.append(MyGetRestraint(rs))
784 """ Get the particles to be sampled by the IMP.pmi.sampler object """
786 if self.sigma_is_sampled:
787 for sigmaname
in self.sigma_dictionary:
788 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
789 str(sigmaname) + self._label_suffix] = \
790 ([self.sigma_dictionary[sigmaname][0]],
791 self.sigma_dictionary[sigmaname][1])
792 if self.psi_is_sampled:
793 for psiname
in self.psi_dictionary:
794 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
795 str(psiname) + self._label_suffix] = \
796 ([self.psi_dictionary[psiname][0]],
797 self.psi_dictionary[psiname][1])
800 def get_bonded_pairs(self):
801 return self.bonded_pairs
803 def get_number_of_restraints(self):
804 return self.rs.get_number_of_restraints()
807 return 'XL restraint with ' + \
808 str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
812 """Read a stat file and load all the sigmas.
813 This is potentially quite stupid.
814 It's also a hack since the sigmas should be stored in the RMF file.
815 Also, requires one sigma and one psi for ALL XLs.
818 sig_val = float(subprocess.check_output(
819 [
"process_output.py",
"-f", in_fn,
"-s",
820 "AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
821 psi_val = float(subprocess.check_output(
822 [
"process_output.py",
"-f", in_fn,
"-s",
823 "AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
824 for nxl
in range(self.rs.get_number_of_restraints()):
825 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
826 self.rs.get_restraint(nxl))
829 for contr
in range(xl.get_number_of_contributions()):
830 sig1, sig2 = xl.get_contribution_sigmas(contr)
833 print(
'loaded nuisances from file')
836 min_dist_for_violation=1e9, coarsen=
False,
837 limit_to_chains=
None, exclude_chains=
''):
838 """Create CMM files, one for each state, of all cross-links.
839 will draw in GREEN if non-violated in all states (or if only one state)
840 will draw in PURPLE if non-violated only in a subset of states
841 (draws nothing elsewhere)
842 will draw in RED in ALL states if all violated
843 (if only one state, you'll only see green and red)
845 @param out_prefix Output xlink files prefix
846 @param max_prob_for_violation It's a violation if the probability
848 @param min_dist_for_violation It's a violation if the min dist
850 @param coarsen Use CA positions
851 @param limit_to_chains Try to visualize just these chains
852 @param exclude_to_chains Try to NOT visualize these chains
854 print(
'going to calculate violations and plot CMM files')
856 all_dists = [s[
"low_dist"]
for s
in all_stats]
862 cmds = defaultdict(set)
863 for nstate
in range(self.nstates):
864 outf = open(out_prefix+str(nstate)+
'.cmm',
'w')
865 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
868 print(
'will limit to', limit_to_chains)
869 print(
'will exclude', exclude_chains)
874 for nxl
in range(self.rs.get_number_of_restraints()):
878 for nstate
in range(self.nstates):
879 prob = state_info[nstate][nxl][
"prob"]
880 low_dist = state_info[nstate][nxl][
"low_dist"]
881 if (prob < max_prob_for_violation
or
882 low_dist > min_dist_for_violation):
890 if len(npass) == self.nstates:
892 elif len(nviol) == self.nstates:
896 print(nxl,
'state dists:',
897 [state_info[nstate][nxl][
"low_dist"]
898 for nstate
in range(self.nstates)],
899 'viol states:', nviol,
'all viol?', all_viol)
900 for nstate
in range(self.nstates):
919 pp = state_info[nstate][nxl][
"low_pp"]
930 cmds[nstate].add((ch1, r1))
931 cmds[nstate].add((ch2, r2))
933 outf = out_fns[nstate]
935 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
936 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
937 % (nv, c1[0], c1[1], c1[2], r, g, b))
938 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
939 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
940 % (nv+1, c2[0], c2[1], c2[2], r, g, b))
941 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
942 'r="%.2f" g="%.2f" b="%.2f"/> \n'
943 % (nv, nv+1, r, g, b))
946 for nstate
in range(self.nstates):
947 out_fns[nstate].write(
'</marker_set>\n')
948 out_fns[nstate].close()
950 for ch, r
in cmds[nstate]:
951 cmd +=
'#%i:%i.%s ' % (nstate, r, ch)
956 def _get_contribution_info(self, xl, ncontr, use_CA=False):
957 """Return the particles at that contribution. If requested will
958 return CA's instead"""
959 idx1 = xl.get_contribution(ncontr)[0]
960 idx2 = xl.get_contribution(ncontr)[1]
965 idx1 = sel.get_selected_particle_indexes()[0]
969 idx2 = sel.get_selected_particle_indexes()[0]
973 return idx1, idx2, dist
976 exclude_chains=
'', use_CA=
False):
977 ''' return the probability, best distance, two coords, and possibly
979 @param limit_to_state Only examine contributions from one state
980 @param limit_to_chains Returns the particles for certain "easy
982 @param exclude_chains Even if you limit, don't let one end be in
983 this list. Only works if you also limit chains
984 @param use_CA Limit to CA particles
987 for nxl
in range(self.rs.get_number_of_restraints()):
989 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
990 self.rs.get_restraint(nxl))
996 for contr
in range(xl.get_number_of_contributions()):
997 pp = xl.get_contribution(contr)
1002 idx1 = sel.get_selected_particle_indexes()[0]
1006 idx2 = sel.get_selected_particle_indexes()[0]
1008 if limit_to_state
is not None:
1011 if nstate != limit_to_state:
1013 state_contrs.append(contr)
1016 if limit_to_chains
is not None:
1021 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1022 c1
not in exclude_chains
1023 and c2
not in exclude_chains):
1024 if dist < low_dist_lim:
1030 if limit_to_state
is not None:
1031 this_info[
"prob"] = xl.evaluate_for_contributions(
1034 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1035 if limit_to_chains
is not None:
1036 this_info[
"low_pp"] = low_pp_lim
1038 this_info[
"low_pp"] = low_pp
1040 this_info[
"low_dist"] = low_dist
1041 if not self.one_psi:
1043 this_info[
"psi"] = pval
1044 ret.append(this_info)
1047 def print_stats(self):
1049 for nxl, s
in enumerate(stats):
1050 print(s[
"low_dist"])
1052 def get_output(self):
1053 output = super(AtomicCrossLinkMSRestraint, self).get_output()
1064 for nxl, s
in enumerate(stats):
1065 if s[
'low_dist'] > 20.0:
1067 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"Prob")] = str(s[
'prob'])
1068 output[
"AtomicXLRestraint_%i_%s"
1069 % (nxl,
"BestDist")] = str(s[
'low_dist'])
1070 if not self.one_psi:
1071 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"psi")] \
1073 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1077 class CysteineCrossLinkRestraint(object):
1078 def __init__(self, root_hier, filename, cbeta=False,
1079 betatuple=(0.03, 0.1),
1080 disttuple=(0.0, 25.0, 1000),
1081 omegatuple=(1.0, 1000.0, 50),
1082 sigmatuple=(0.3, 0.3, 1),
1084 sigmaissampled=
False,
1085 weightissampled=
True,
1086 epsilonissampled=
True):
1093 self.m = root_hier.get_model()
1096 self.epsilonmaxtrans = 0.01
1097 self.sigmamaxtrans = 0.1
1098 self.betamaxtrans = 0.01
1099 self.weightmaxtrans = 0.1
1101 self.outputlevel =
"low"
1102 self.betaissampled = betaissampled
1103 self.sigmaissampled = sigmaissampled
1104 self.weightissampled = weightissampled
1105 self.epsilonissampled = epsilonissampled
1107 betalower = betatuple[0]
1108 betaupper = betatuple[1]
1114 self.beta = IMP.pmi.tools.SetupNuisance(
1119 betaissampled).get_particle(
1122 self.sigma = IMP.pmi.tools.SetupNuisance(
1127 sigmaissampled).get_particle()
1129 self.weight = IMP.pmi.tools.SetupWeight(
1131 isoptimized=
False).get_particle(
1135 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1144 if t[5]
in self.epsilons:
1145 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1146 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1148 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(
1149 self.m, 0.01, 0.01, 1.0 - float(t[4]),
1150 epsilonissampled).get_particle()
1151 up = self.epsilons[t[5]].get_upper()
1152 low = self.epsilons[t[5]].get_lower()
1154 self.epsilons[t[5]].set_upper(low)
1156 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1160 crossdata = IMP.pmi.tools.get_cross_link_data(
1161 "cysteine",
"cysteine_CA_FES.txt.standard",
1162 disttuple, omegatuple, sigmatuple, disttuple[1],
1165 crossdata = IMP.pmi.tools.get_cross_link_data(
1166 "cysteine",
"cysteine_CB_FES.txt.standard",
1167 disttuple, omegatuple, sigmatuple, disttuple[1],
1171 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1172 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1173 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1176 print(
"--------------")
1177 print(
"CysteineCrossLink: attempting to create a restraint "
1198 self.epsilons[epslabel],
1209 molecule=chain1, residue_index=resid1,
1211 p1 = p1.get_selected_particles()
1218 molecule=chain2, residue_index=resid2,
1220 p2 = p2.get_selected_particles()
1230 for t
in range(-1, 2):
1232 molecule=chain1, copy_index=0,
1233 residue_index=resid1 + t)
1234 p = p.get_selected_particles()
1240 "CysteineCrossLink: missing representation for "
1241 "residue %d of chain %s" % (resid1 + t, chain1),
1245 molecule=chain2, copy_index=0,
1246 residue_index=resid2 + t)
1247 p = p.get_selected_particles()
1253 "CysteineCrossLink: missing representation for "
1254 "residue %d of chain %s" % (resid2 + t, chain2),
1258 if (p1
is not None and p2
is not None):
1259 ccl.add_contribution(p1, p2)
1263 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" +
1264 str(resid2) +
"_" + chain2,
1268 if (len(p1) == 3
and len(p2) == 3):
1269 p11n = p1[0].get_name()
1270 p12n = p1[1].get_name()
1271 p13n = p1[2].get_name()
1272 p21n = p2[0].get_name()
1273 p22n = p2[1].get_name()
1274 p23n = p2[2].get_name()
1276 print(
"CysteineCrossLink: generating CB cysteine "
1277 "cross-link restraint between")
1278 print(
"CysteineCrossLink: residue %d of chain %s and "
1279 "residue %d of chain %s"
1280 % (resid1, chain1, resid2, chain2))
1281 print(
"CysteineCrossLink: between particles %s %s %s and "
1282 "%s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1284 ccl.add_contribution(p1, p2)
1287 self.rs.add_restraint(ccl)
1288 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1289 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1292 self.weight.get_particle()
1293 ).set_weights_are_optimized(weightissampled)
1295 def set_label(self, label):
1298 def add_to_model(self):
1303 if self.epsilonissampled:
1304 for eps
in self.epsilons.keys():
1305 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
1306 "_" + self.label] = ([self.epsilons[eps]],
1307 self.epsilonmaxtrans)
1308 if self.betaissampled:
1309 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1310 self.label] = ([self.beta], self.betamaxtrans)
1311 if self.weightissampled:
1312 ps[
"Weights_CysteineCrossLinkRestraint_" +
1313 self.label] = ([self.weight], self.weightmaxtrans)
1314 if self.sigmaissampled:
1315 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1316 self.label] = ([self.sigma], self.sigmamaxtrans)
1319 def set_output_level(self, level="low"):
1321 self.outputlevel = level
1323 def get_restraint(self):
1326 def get_sigma(self):
1329 def get_output(self):
1331 score = self.rs.unprotected_evaluate(
None)
1332 output[
"_TotalScore"] = str(score)
1333 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1334 output[
"CysteineCrossLinkRestraint_sigma_" +
1335 self.label] = str(self.sigma.get_scale())
1336 for eps
in self.epsilons.keys():
1337 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1338 self.label] = str(self.epsilons[eps].get_scale())
1339 output[
"CysteineCrossLinkRestraint_beta_" +
1340 self.label] = str(self.beta.get_scale())
1341 for n
in range(self.weight.get_number_of_weights()):
1342 output[
"CysteineCrossLinkRestraint_weights_" +
1343 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1345 if self.outputlevel ==
"high":
1346 for rst
in self.rs.get_restraints():
1347 cclr = IMP.isd.CysteineCrossLinkRestraint.get_from(rst)
1348 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1350 "_" + self.label] = cclr.get_model_frequency()
1351 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1352 cclr.get_name() +
"_" +
1353 self.label] = cclr.get_standard_error()
1357 class DisulfideCrossLinkRestraint(object):
1358 def __init__(self, representation_or_hier,
1366 self.m = representation_or_hier.get_model()
1369 resolution=resolution)
1372 resolution=resolution)
1379 self.linear.set_slope(0.0)
1383 self.psi_dictionary = {}
1384 self.sigma_dictionary = {}
1385 self.psi_is_sampled =
False
1386 self.sigma_is_sampled =
False
1389 if len(ps1) > 1
or len(ps1) == 0:
1391 "DisulfideBondRestraint: ERROR> first selection pattern "
1392 "selects multiple particles or sero particles")
1394 if len(ps2) > 1
or len(ps2) == 0:
1396 "DisulfideBondRestraint: ERROR> second selection pattern "
1397 "selects multiple particles or sero particles")
1402 sigma = self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1403 psi = self.create_psi(
"PSI_DISULFIDE_BOND")
1405 p1i = p1.get_index()
1406 p2i = p2.get_index()
1412 dr.add_contribution((p1i, p2i), (si, si), psii)
1416 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1417 self.rslin.add_restraint(pr)
1420 self.rs.add_restraint(lw)
1422 self.xl[
"Particle1"] = p1
1423 self.xl[
"Particle2"] = p2
1424 self.xl[
"Sigma"] = sigma
1425 self.xl[
"Psi"] = psi
1427 def add_to_model(self):
1430 self.m, self.rslin, add_to_rmf=
True)
1432 def get_hierarchies(self):
1435 def get_restraint_sets(self):
1438 def get_restraint(self):
1441 def get_restraint_for_rmf(self):
1444 def get_restraints(self):
1446 for r
in self.rs.get_restraints():
1447 rlist.append(IMP.core.PairRestraint.get_from(r))
1450 def set_psi_is_sampled(self, is_sampled=True):
1451 self.psi_is_sampled = is_sampled
1453 def set_sigma_is_sampled(self, is_sampled=True):
1454 self.sigma_is_sampled = is_sampled
1456 def create_sigma(self, name):
1457 ''' a nuisance on the structural uncertainty '''
1458 if name
in self.sigma_dictionary:
1459 return self.sigma_dictionary[name][0]
1462 sigmaminnuis = 0.0000001
1463 sigmamaxnuis = 1000.0
1465 sigma = IMP.pmi.tools.SetupNuisance(
1466 self.m, sigmainit, sigmaminnuis, sigmamaxnuis,
1467 self.sigma_is_sampled).get_particle()
1468 self.sigma_dictionary[name] = (
1471 self.sigma_is_sampled)
1475 def create_psi(self, name):
1476 ''' a nuisance on the inconsistency '''
1477 if name
in self.psi_dictionary:
1478 return self.psi_dictionary[name][0]
1481 psiminnuis = 0.0000001
1482 psimaxnuis = 0.4999999
1484 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1485 psiminnuis, psimaxnuis,
1486 self.psi_is_sampled).get_particle()
1487 self.psi_dictionary[name] = (
1490 self.psi_is_sampled)
1494 def get_output(self):
1496 score = self.rs.unprotected_evaluate(
None)
1497 output[
"_TotalScore"] = str(score)
1498 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1499 output[
"DisulfideBondRestraint_Linear_Score_" +
1500 self.label] = self.rslin.unprotected_evaluate(
None)
1504 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
Report key:value information on restraints.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the partic...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Warning for probably incorrect input parameters.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.