1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
5 from __future__
import print_function
17 from collections
import defaultdict
24 """Setup cross-link distance restraints from mass spectrometry data.
25 The noise in the data and the structural uncertainty of cross-linked
26 amino-acids is inferred using Bayes theory of probability
27 @note Wraps an IMP::isd::CrossLinkMSRestraint
29 _include_in_rmf =
True
31 def __init__(self, root_hier, database=None, length=10.0, resolution=None,
32 slope=0.02, label=
None, filelabel=
"None",
33 attributes_for_label=
None, linker=
None, weight=1.):
35 @param root_hier The canonical hierarchy containing all the states
36 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
37 object that contains the cross-link dataset
38 @param length maximal cross-linker length (including the residue
40 @param resolution what representation resolution should the cross-link
41 restraint be applied to.
42 @param slope The slope of a distance-linear scoring function that
43 funnels the score when the particles are
44 too far away. Suggested value 0.02.
45 @param label the extra text to label the restraint so that it is
46 searchable in the output
47 @param filelabel automatically generated file containing
48 missing/included/excluded
49 cross-links will be labeled using this text
50 @param attributes_for_label
51 @param weight Weight of restraint
52 @param linker description of the chemistry of the linker itself, as
53 an ihm.ChemDescriptor object
54 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
55 Common cross-linkers can be found in the `ihm.cross_linkers`
59 model = root_hier.get_model()
61 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
62 model, weight=weight, label=label,
66 raise Exception(
"You must pass a database")
69 "CrossLinkingMassSpectrometryRestraint: database should "
70 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
71 self.database = database
73 if resolution == 0
or resolution
is None:
74 raise Exception(
"You must pass a resolution and it can't be zero")
76 indb = open(
"included." + filelabel +
".xl.db",
"w")
77 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
78 midb = open(
"missing." + filelabel +
".xl.db",
"w")
83 "No linker chemistry specified. A linker must be given, as an "
84 "ihm.ChemDescriptor object (see the "
85 "CrossLinkingMassSpectrometryRestraint documentation).")
86 self.rs.set_name(self.rs.get_name() +
"_Data")
87 self.rspsi = self._create_restraint_set(
"PriorPsi")
88 self.rssig = self._create_restraint_set(
"PriorSig")
89 self.rslin = self._create_restraint_set(
"Linear")
91 self.rs.set_metadata(self.database.name, length, slope)
93 self.rs.set_linker_auth_name(linker.auth_name)
94 for attr
in (
'chemical_name',
'smiles',
'smiles_canonical',
95 'inchi',
'inchi_key'):
96 val = getattr(linker, attr)
98 getattr(self.rs,
"set_linker_" + attr)(val)
102 self.linear.set_slope(0.0)
105 self.psi_is_sampled =
True
106 self.sigma_is_sampled =
True
107 self.psi_dictionary = {}
108 self.sigma_dictionary = {}
110 self.outputlevel =
"low"
114 xl_groups = [p.get_cross_link_group(self)
115 for p, state
in IMP.pmi.tools._all_protocol_outputs(
119 copies_to_add = defaultdict(int)
120 print(
'gathering copies')
121 for xlid
in self.database.xlid_iterator():
122 for xl
in self.database[xlid]:
123 r1 = xl[self.database.residue1_key]
124 c1 = xl[self.database.protein1_key]
125 r2 = xl[self.database.residue2_key]
126 c2 = xl[self.database.protein2_key]
127 for c, r
in ((c1, r1), (c2, r2)):
128 if c
in copies_to_add:
131 root_hier, state_index=0, molecule=c, residue_index=r,
132 resolution=resolution).get_selected_particles()
134 copies_to_add[c] = len(sel)-1
136 for molname
in copies_to_add:
137 if copies_to_add[molname] == 0:
140 self.database.protein1_key, operator.eq, molname)
141 self.database.set_value(self.database.protein1_key,
144 self.database.protein2_key, operator.eq, molname)
145 self.database.set_value(self.database.protein2_key,
147 for ncopy
in range(copies_to_add[molname]):
148 self.database.clone_protein(
'%s.0' % molname,
149 '%s.%i' % (molname, ncopy + 1))
150 print(
'done pmi2 prelims')
152 for xlid
in self.database.xlid_iterator():
153 new_contribution =
True
154 for xl
in self.database[xlid]:
156 r1 = xl[self.database.residue1_key]
157 c1 = xl[self.database.protein1_key]
158 r2 = xl[self.database.residue2_key]
159 c2 = xl[self.database.protein2_key]
166 name1, copy1 = c1.split(
'.')
168 name2, copy2 = c2.split(
'.')
172 (p[0].add_experimental_cross_link(r1, name1, r2, name2,
175 IMP.pmi.tools._all_protocol_outputs(root_hier),
178 iterlist = range(len(IMP.atom.get_by_type(
179 root_hier, IMP.atom.STATE_TYPE)))
180 for nstate, r
in enumerate(iterlist):
182 xl[self.database.state_key] = nstate
183 xl[self.database.data_set_name_key] = self.label
186 root_hier, state_index=nstate, molecule=name1,
187 copy_index=int(copy1), residue_index=r1,
188 resolution=resolution).get_selected_particles()
190 root_hier, state_index=nstate, molecule=name2,
191 copy_index=int(copy2), residue_index=r2,
192 resolution=resolution).get_selected_particles()
199 "residue %d of chain %s selects multiple "
200 "particles %s" % (r1, c1, str(ps1)))
203 "CrossLinkingMassSpectrometryRestraint: "
204 "residue %d of chain %s is not there" % (r1, c1),
206 midb.write(str(xl) +
"\n")
211 "residue %d of chain %s selects multiple "
212 "particles %s" % (r2, c2, str(ps2)))
215 "CrossLinkingMassSpectrometryRestraint: "
216 "residue %d of chain %s is not there" % (r2, c2),
218 midb.write(str(xl) +
"\n")
224 if p1 == p2
and r1 == r2:
226 "CrossLinkingMassSpectrometryRestraint: "
227 "same particle and same residue, skipping "
232 print(
"generating a new cross-link restraint")
233 new_contribution =
False
238 dr.set_source_protein1(c1)
239 dr.set_source_protein2(c2)
240 dr.set_source_residue1(r1)
241 dr.set_source_residue2(r2)
242 restraints.append(dr)
244 if self.database.sigma1_key
not in xl.keys():
246 xl[self.database.sigma1_key] = sigma1name
248 sigma1name = xl[self.database.sigma1_key]
251 if self.database.sigma2_key
not in xl.keys():
253 xl[self.database.sigma2_key] = sigma2name
255 sigma2name = xl[self.database.sigma2_key]
258 if self.database.psi_key
not in xl.keys():
260 xl[self.database.psi_key] = psiname
262 psiname = xl[self.database.psi_key]
266 p1i = p1.get_particle_index()
268 p2i = p2.get_particle_index()
271 xl[
"Particle_sigma1"] = sigma1
273 xl[
"Particle_sigma2"] = sigma2
275 xl[
"Particle_psi"] = psi
277 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
280 print(
"--------------")
281 print(
"CrossLinkingMassSpectrometryRestraint: generating "
282 "cross-link restraint between")
283 print(
"CrossLinkingMassSpectrometryRestraint: residue %d "
284 "of chain %s and residue %d of chain %s"
286 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 "
287 "%s sigma2 %s psi %s"
288 % (sigma1name, sigma2name, psiname))
289 print(
"CrossLinkingMassSpectrometryRestraint: between "
290 "particles %s and %s"
291 % (p1.get_name(), p2.get_name()))
292 print(
"==========================================\n")
293 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
296 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
297 sigma1, sigma2, psi, ex_xl[1])
304 xl[
"IntraRigidBody"] =
True
306 xl[
"IntraRigidBody"] =
False
308 xl_label = self.database.get_short_cross_link_string(xl)
309 xl[
"ShortLabel"] = xl_label
310 dr.set_name(xl_label)
315 pr.set_name(xl_label)
316 self.rslin.add_restraint(pr)
318 self.xl_list.append(xl)
320 indb.write(str(xl) +
"\n")
322 if len(self.xl_list) == 0:
324 "CrossLinkingMassSpectrometryRestraint: no cross-link "
326 self.xl_restraints = restraints
328 self.rs.add_restraint(lw)
333 def __set_dataset(self, ds):
334 self.database.dataset = ds
335 dataset = property(
lambda self: self.database.dataset, __set_dataset)
338 """ get the hierarchy """
342 """ get the restraint set """
346 """ get the restraints in a list """
347 return self.xl_restraints
350 """ get the dummy restraints to be displayed in the rmf file """
351 return self.rs, self.rssig, self.rspsi
354 """ Get a list of tuples containing the particle pairs """
356 for i
in range(len(self.pairs)):
357 p0 = self.pairs[i][0]
358 p1 = self.pairs[i][1]
359 ppairs.append((p0, p1))
363 """ Set the output level of the output """
364 self.outputlevel = level
367 """ Switch on/off the sampling of psi particles """
368 self.psi_is_sampled = is_sampled
371 """ Switch on/off the sampling of sigma particles """
372 self.sigma_is_sampled = is_sampled
375 """ This is called internally. Creates a nuisance
376 on the structural uncertainty """
377 if name
in self.sigma_dictionary:
378 return self.sigma_dictionary[name][0]
381 sigmaminnuis = 0.0000001
382 sigmamaxnuis = 1000.0
386 sigma = IMP.pmi.tools.SetupNuisance(
387 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
388 self.sigma_is_sampled, name=name).get_particle()
389 self.sigma_dictionary[name] = (
392 self.sigma_is_sampled)
393 self.rssig.add_restraint(
403 """ This is called internally. Creates a nuisance
404 on the data uncertainty """
405 if name
in self.psi_dictionary:
406 return self.psi_dictionary[name][0]
409 psiminnuis = 0.0000001
410 psimaxnuis = 0.4999999
414 psi = IMP.pmi.tools.SetupNuisance(
415 self.model, psiinit, psiminnuis, psimaxnuis,
416 self.psi_is_sampled, name=name).get_particle()
417 self.psi_dictionary[name] = (psi, psitrans, self.psi_is_sampled)
419 self.rspsi.add_restraint(
427 """Get the output of the restraint to be used by the IMP.pmi.output
429 output = super(CrossLinkingMassSpectrometryRestraint,
432 for xl
in self.xl_list:
434 xl_label = xl[
"ShortLabel"]
438 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
439 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
443 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
446 for psiname
in self.psi_dictionary:
447 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
448 str(psiname) + self._label_suffix] = str(
449 self.psi_dictionary[psiname][0].get_scale())
451 for sigmaname
in self.sigma_dictionary:
452 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
453 str(sigmaname) + self._label_suffix] = str(
454 self.sigma_dictionary[sigmaname][0].get_scale())
459 """Get the unweighted likelihood of the restraint"""
461 for restraint
in self.xl_restraints:
462 likelihood *= restraint.get_probability()
467 """ Get all need data to construct a mover in IMP.pmi.dof class"""
469 if self.sigma_is_sampled:
470 for sigmaname
in self.sigma_dictionary:
472 "Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" \
473 + str(sigmaname) +
"_" + self.label
474 particle = self.sigma_dictionary[sigmaname][0]
475 maxstep = (self.sigma_dictionary[sigmaname][1])
479 mv.set_name(mover_name)
482 if self.psi_is_sampled:
483 for psiname
in self.psi_dictionary:
485 "Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + \
486 str(psiname) +
"_" + self.label
487 particle = self.psi_dictionary[psiname][0]
488 maxstep = (self.psi_dictionary[psiname][1])
492 mv.set_name(mover_name)
498 """ Get the particles to be sampled by the IMP.pmi.sampler object """
500 if self.sigma_is_sampled:
501 for sigmaname
in self.sigma_dictionary:
502 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
503 str(sigmaname) + self._label_suffix] =\
504 ([self.sigma_dictionary[sigmaname][0]],
505 self.sigma_dictionary[sigmaname][1])
507 if self.psi_is_sampled:
508 for psiname
in self.psi_dictionary:
509 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
510 str(psiname) + self._label_suffix] = \
511 ([self.psi_dictionary[psiname][0]],
512 self.psi_dictionary[psiname][1])
518 """Setup cross-link distance restraints at atomic level
519 The "atomic" aspect is that it models the particle uncertainty with
520 a Gaussian. The noise in the data and the structural uncertainty of
521 cross-linked amino-acids is inferred using Bayes' theory of probability
522 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
525 _include_in_rmf =
True
527 def __init__(self, root_hier, xldb, atom_type="NZ", length=10.0,
528 slope=0.01, nstates=
None, label=
None,
529 nuisances_are_optimized=
True, sigma_init=5.0,
530 psi_init=0.01, one_psi=
True, filelabel=
None, weight=1.):
532 Automatically creates one "sigma" per cross-linked residue and one
534 Other nuisance options are available.
535 @note Will return an error if the data+extra_sel don't specify
536 two particles per XL pair.
537 @param root_hier The root hierarchy on which you'll do selection
538 @param xldb CrossLinkDataBase object
539 @param atom_type Can either be "NZ" or "CA"
540 @param length The XL linker length
541 @param slope Linear term to add to the restraint function to
543 @param nstates The number of states to model. Defaults to the
544 number of states in root.
545 @param label The output label for the restraint
546 @param nuisances_are_optimized Whether to optimize nuisances
547 @param sigma_init The initial value for all the sigmas
548 @param psi_init The initial value for all the psis
549 @param one_psi Use a single psi for all restraints (if False,
551 @param filelabel automatically generated file containing
552 missing/included/excluded
553 cross-links will be labeled using this text
554 @param weight Weight of restraint
559 self.root = root_hier
560 rname =
"AtomicXLRestraint"
561 super(AtomicCrossLinkMSRestraint, self).
__init__(
562 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
566 self.sigma_is_sampled = nuisances_are_optimized
567 self.psi_is_sampled = nuisances_are_optimized
569 if atom_type
not in (
"NZ",
"CA"):
570 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
571 self.atom_type = atom_type
572 self.nstates = nstates
574 self.nstates = len(IMP.atom.get_by_type(self.root,
575 IMP.atom.STATE_TYPE))
576 elif nstates != len(IMP.atom.get_by_type(self.root,
577 IMP.atom.STATE_TYPE)):
578 print(
"Warning: nstates is not the same as the number of states "
581 self.rs_psi = self._create_restraint_set(
"psi")
582 self.rs_sig = self._create_restraint_set(
"sigma")
583 self.rs_lin = self._create_restraint_set(
"linear")
585 self.psi_dictionary = {}
586 self.sigma_dictionary = {}
588 self.particles = defaultdict(set)
589 self.one_psi = one_psi
590 self.bonded_pairs = []
592 print(
'creating a single psi for all XLs')
594 print(
'creating one psi for each XL')
597 if filelabel
is not None:
598 indb = open(
"included." + filelabel +
".xl.db",
"w")
599 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
600 midb = open(
"missing." + filelabel +
".xl.db",
"w")
603 self._create_sigma(
'sigma', sigma_init)
605 self._create_psi(
'psi', psi_init)
607 for xlid
in self.xldb.xlid_iterator():
608 self._create_psi(xlid, psi_init)
612 for xlid
in self.xldb.xlid_iterator():
615 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
617 psip = self.psi_dictionary[
618 unique_id][0].get_particle_index()
624 num_contributions = 0
627 for nstate
in self.nstates:
628 for xl
in self.xldb[xlid]:
629 r1 = xl[self.xldb.residue1_key]
630 c1 = xl[self.xldb.protein1_key].strip()
631 r2 = xl[self.xldb.residue2_key]
632 c2 = xl[self.xldb.protein2_key].strip()
637 self.root, state_index=nstate,
640 residue_index=r1).get_selected_particles()
642 self.root, state_index=nstate,
645 residue_index=r2).get_selected_particles()
647 warnings.warn(
"AtomicXLRestraint: residue %d of "
648 "chain %s is not there" % (r1, c1),
650 if filelabel
is not None:
651 midb.write(str(xl) +
"\n")
655 warnings.warn(
"AtomicXLRestraint: residue %d of "
656 "chain %s is not there" % (r2, c2),
658 if filelabel
is not None:
659 midb.write(str(xl) +
"\n")
664 num1=num_xls_per_res[str(xl.r1)]
665 num2=num_xls_per_res[str(xl.r2)]
666 if num1<sig_threshold:
670 if num2<sig_threshold:
675 sig1 = self.sigma_dictionary[
'sigma'][0]
676 sig2 = self.sigma_dictionary[
'sigma'][0]
679 for p1, p2
in itertools.product(ps1, ps2):
682 self.particles[nstate] |= set((p1, p2))
684 [p1.get_index(), p2.get_index()],
685 [sig1.get_particle_index(),
686 sig2.get_particle_index()])
687 num_contributions += 1
689 if num_contributions > 0:
690 print(
'XL:', xlid,
'num contributions:', num_contributions)
693 raise Exception(
"You didn't create any XL restraints")
694 print(
'created', len(xlrs),
'XL restraints')
695 rname = self.rs.get_name()
697 self.rs.set_name(rname)
698 self.rs.set_weight(self.weight)
699 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
701 def get_hierarchy(self):
704 def _create_sigma(self, name, sigmainit):
705 """ This is called internally. Creates a nuisance
706 on the structural uncertainty """
707 if name
in self.sigma_dictionary:
708 return self.sigma_dictionary[name][0]
710 sigmaminnuis = 0.0000001
711 sigmamaxnuis = 1000.0
715 sigma = IMP.pmi.tools.SetupNuisance(
716 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
717 self.sigma_is_sampled).get_particle()
718 self.sigma_dictionary[name] = (
719 sigma, sigmatrans, self.sigma_is_sampled)
721 self.model, sigma, 1000000000.0, sigmamax, sigmamin))
724 def _create_psi(self, name, psiinit):
725 """ This is called internally. Creates a nuisance
726 on the data uncertainty """
727 if name
in self.psi_dictionary:
728 return self.psi_dictionary[name][0]
730 psiminnuis = 0.0000001
731 psimaxnuis = 0.4999999
735 psi = IMP.pmi.tools.SetupNuisance(self.model,
739 self.psi_is_sampled).get_particle()
740 self.psi_dictionary[name] = (
745 self.rs_psi.add_restraint(
757 """ create dummy harmonic restraints for each XL but don't add to model
758 Makes it easy to see each contribution to each XL in RMF
760 class MyGetRestraint(object):
764 def get_restraint_for_rmf(self):
770 for nxl
in range(self.get_number_of_restraints()):
771 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
772 self.rs.get_restraint(nxl))
774 for ncontr
in range(xl.get_number_of_contributions()):
775 ps = xl.get_contribution(ncontr)
777 hps, [self.model.get_particle(p)
for p
in ps],
778 'xl%i_contr%i' % (nxl, ncontr))
780 dummy_rs.append(MyGetRestraint(rs))
784 """ Get the particles to be sampled by the IMP.pmi.sampler object """
786 if self.sigma_is_sampled:
787 for sigmaname
in self.sigma_dictionary:
788 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
789 str(sigmaname) + self._label_suffix] = \
790 ([self.sigma_dictionary[sigmaname][0]],
791 self.sigma_dictionary[sigmaname][1])
792 if self.psi_is_sampled:
793 for psiname
in self.psi_dictionary:
794 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
795 str(psiname) + self._label_suffix] = \
796 ([self.psi_dictionary[psiname][0]],
797 self.psi_dictionary[psiname][1])
800 def get_bonded_pairs(self):
801 return self.bonded_pairs
803 def get_number_of_restraints(self):
804 return self.rs.get_number_of_restraints()
807 return 'XL restraint with ' + \
808 str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
812 """Read a stat file and load all the sigmas.
813 This is potentially quite stupid.
814 It's also a hack since the sigmas should be stored in the RMF file.
815 Also, requires one sigma and one psi for ALL XLs.
818 sig_val = float(subprocess.check_output(
819 [
"process_output.py",
"-f", in_fn,
"-s",
820 "AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
821 psi_val = float(subprocess.check_output(
822 [
"process_output.py",
"-f", in_fn,
"-s",
823 "AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
824 for nxl
in range(self.rs.get_number_of_restraints()):
825 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
826 self.rs.get_restraint(nxl))
829 for contr
in range(xl.get_number_of_contributions()):
830 sig1, sig2 = xl.get_contribution_sigmas(contr)
833 print(
'loaded nuisances from file')
836 min_dist_for_violation=1e9, coarsen=
False,
837 limit_to_chains=
None, exclude_chains=
''):
838 """Create CMM files, one for each state, of all cross-links.
839 will draw in GREEN if non-violated in all states (or if only one state)
840 will draw in PURPLE if non-violated only in a subset of states
841 (draws nothing elsewhere)
842 will draw in RED in ALL states if all violated
843 (if only one state, you'll only see green and red)
845 @param out_prefix Output xlink files prefix
846 @param max_prob_for_violation It's a violation if the probability
848 @param min_dist_for_violation It's a violation if the min dist
850 @param coarsen Use CA positions
851 @param limit_to_chains Try to visualize just these chains
852 @param exclude_chains Try to NOT visualize these chains
854 print(
'going to calculate violations and plot CMM files')
856 all_dists = [s[
"low_dist"]
for s
in all_stats]
862 cmds = defaultdict(set)
863 for nstate
in range(self.nstates):
864 outf = open(out_prefix+str(nstate)+
'.cmm',
'w')
865 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
868 print(
'will limit to', limit_to_chains)
869 print(
'will exclude', exclude_chains)
874 for nxl
in range(self.rs.get_number_of_restraints()):
878 for nstate
in range(self.nstates):
879 prob = state_info[nstate][nxl][
"prob"]
880 low_dist = state_info[nstate][nxl][
"low_dist"]
881 if (prob < max_prob_for_violation
or
882 low_dist > min_dist_for_violation):
890 if len(npass) == self.nstates:
892 elif len(nviol) == self.nstates:
896 print(nxl,
'state dists:',
897 [state_info[nstate][nxl][
"low_dist"]
898 for nstate
in range(self.nstates)],
899 'viol states:', nviol,
'all viol?', all_viol)
900 for nstate
in range(self.nstates):
919 pp = state_info[nstate][nxl][
"low_pp"]
930 cmds[nstate].add((ch1, r1))
931 cmds[nstate].add((ch2, r2))
933 outf = out_fns[nstate]
935 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
936 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
937 % (nv, c1[0], c1[1], c1[2], r, g, b))
938 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
939 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
940 % (nv+1, c2[0], c2[1], c2[2], r, g, b))
941 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
942 'r="%.2f" g="%.2f" b="%.2f"/> \n'
943 % (nv, nv+1, r, g, b))
946 for nstate
in range(self.nstates):
947 out_fns[nstate].write(
'</marker_set>\n')
948 out_fns[nstate].close()
950 for ch, r
in cmds[nstate]:
951 cmd +=
'#%i:%i.%s ' % (nstate, r, ch)
956 def _get_contribution_info(self, xl, ncontr, use_CA=False):
957 """Return the particles at that contribution. If requested will
958 return CA's instead"""
959 idx1 = xl.get_contribution(ncontr)[0]
960 idx2 = xl.get_contribution(ncontr)[1]
965 idx1 = sel.get_selected_particle_indexes()[0]
969 idx2 = sel.get_selected_particle_indexes()[0]
973 return idx1, idx2, dist
976 exclude_chains=
'', use_CA=
False):
977 ''' return the probability, best distance, two coords, and possibly
979 @param limit_to_state Only examine contributions from one state
980 @param limit_to_chains Returns the particles for certain "easy
982 @param exclude_chains Even if you limit, don't let one end be in
983 this list. Only works if you also limit chains
984 @param use_CA Limit to CA particles
987 for nxl
in range(self.rs.get_number_of_restraints()):
989 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
990 self.rs.get_restraint(nxl))
996 for contr
in range(xl.get_number_of_contributions()):
997 pp = xl.get_contribution(contr)
1002 idx1 = sel.get_selected_particle_indexes()[0]
1006 idx2 = sel.get_selected_particle_indexes()[0]
1008 if limit_to_state
is not None:
1011 if nstate != limit_to_state:
1013 state_contrs.append(contr)
1016 if limit_to_chains
is not None:
1021 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1022 c1
not in exclude_chains
1023 and c2
not in exclude_chains):
1024 if dist < low_dist_lim:
1030 if limit_to_state
is not None:
1031 this_info[
"prob"] = xl.evaluate_for_contributions(
1034 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1035 if limit_to_chains
is not None:
1036 this_info[
"low_pp"] = low_pp_lim
1038 this_info[
"low_pp"] = low_pp
1040 this_info[
"low_dist"] = low_dist
1041 if not self.one_psi:
1043 this_info[
"psi"] = pval
1044 ret.append(this_info)
1047 def print_stats(self):
1049 for nxl, s
in enumerate(stats):
1050 print(s[
"low_dist"])
1052 def get_output(self):
1053 output = super(AtomicCrossLinkMSRestraint, self).get_output()
1064 for nxl, s
in enumerate(stats):
1065 if s[
'low_dist'] > 20.0:
1067 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"Prob")] = str(s[
'prob'])
1068 output[
"AtomicXLRestraint_%i_%s"
1069 % (nxl,
"BestDist")] = str(s[
'low_dist'])
1070 if not self.one_psi:
1071 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"psi")] \
1073 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1077 class CysteineCrossLinkRestraint(object):
1078 def __init__(self, root_hier, filename, cbeta=False,
1079 betatuple=(0.03, 0.1),
1080 disttuple=(0.0, 25.0, 1000),
1081 omegatuple=(1.0, 1000.0, 50),
1082 sigmatuple=(0.3, 0.3, 1),
1084 sigmaissampled=
False,
1085 weightissampled=
True,
1086 epsilonissampled=
True):
1093 self.m = root_hier.get_model()
1096 self.epsilonmaxtrans = 0.01
1097 self.sigmamaxtrans = 0.1
1098 self.betamaxtrans = 0.01
1099 self.weightmaxtrans = 0.1
1101 self.outputlevel =
"low"
1102 self.betaissampled = betaissampled
1103 self.sigmaissampled = sigmaissampled
1104 self.weightissampled = weightissampled
1105 self.epsilonissampled = epsilonissampled
1107 betalower = betatuple[0]
1108 betaupper = betatuple[1]
1114 self.beta = IMP.pmi.tools.SetupNuisance(
1119 betaissampled).get_particle(
1122 self.sigma = IMP.pmi.tools.SetupNuisance(
1127 sigmaissampled).get_particle()
1129 self.weight = IMP.pmi.tools.SetupWeight(
1131 isoptimized=
False).get_particle(
1135 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1144 if t[5]
in self.epsilons:
1145 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1146 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1148 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(
1149 self.m, 0.01, 0.01, 1.0 - float(t[4]),
1150 epsilonissampled).get_particle()
1151 up = self.epsilons[t[5]].get_upper()
1152 low = self.epsilons[t[5]].get_lower()
1154 self.epsilons[t[5]].set_upper(low)
1156 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1160 crossdata = IMP.pmi.tools.get_cross_link_data(
1161 "cysteine",
"cysteine_CA_FES.txt.standard",
1162 disttuple, omegatuple, sigmatuple, disttuple[1],
1165 crossdata = IMP.pmi.tools.get_cross_link_data(
1166 "cysteine",
"cysteine_CB_FES.txt.standard",
1167 disttuple, omegatuple, sigmatuple, disttuple[1],
1171 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1172 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1173 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1176 print(
"--------------")
1177 print(
"CysteineCrossLink: attempting to create a restraint "
1198 self.epsilons[epslabel],
1209 molecule=chain1, residue_index=resid1,
1211 p1 = p1.get_selected_particles()
1218 molecule=chain2, residue_index=resid2,
1220 p2 = p2.get_selected_particles()
1230 for t
in range(-1, 2):
1232 molecule=chain1, copy_index=0,
1233 residue_index=resid1 + t)
1234 p = p.get_selected_particles()
1240 "CysteineCrossLink: missing representation for "
1241 "residue %d of chain %s" % (resid1 + t, chain1),
1245 molecule=chain2, copy_index=0,
1246 residue_index=resid2 + t)
1247 p = p.get_selected_particles()
1253 "CysteineCrossLink: missing representation for "
1254 "residue %d of chain %s" % (resid2 + t, chain2),
1258 if (p1
is not None and p2
is not None):
1259 ccl.add_contribution(p1, p2)
1263 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" +
1264 str(resid2) +
"_" + chain2,
1268 if (len(p1) == 3
and len(p2) == 3):
1269 p11n = p1[0].get_name()
1270 p12n = p1[1].get_name()
1271 p13n = p1[2].get_name()
1272 p21n = p2[0].get_name()
1273 p22n = p2[1].get_name()
1274 p23n = p2[2].get_name()
1276 print(
"CysteineCrossLink: generating CB cysteine "
1277 "cross-link restraint between")
1278 print(
"CysteineCrossLink: residue %d of chain %s and "
1279 "residue %d of chain %s"
1280 % (resid1, chain1, resid2, chain2))
1281 print(
"CysteineCrossLink: between particles %s %s %s and "
1282 "%s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1284 ccl.add_contribution(p1, p2)
1287 self.rs.add_restraint(ccl)
1288 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1289 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1292 self.weight.get_particle()
1293 ).set_weights_are_optimized(weightissampled)
1295 def set_label(self, label):
1298 def add_to_model(self):
1303 if self.epsilonissampled:
1304 for eps
in self.epsilons.keys():
1305 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
1306 "_" + self.label] = ([self.epsilons[eps]],
1307 self.epsilonmaxtrans)
1308 if self.betaissampled:
1309 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1310 self.label] = ([self.beta], self.betamaxtrans)
1311 if self.weightissampled:
1312 ps[
"Weights_CysteineCrossLinkRestraint_" +
1313 self.label] = ([self.weight], self.weightmaxtrans)
1314 if self.sigmaissampled:
1315 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1316 self.label] = ([self.sigma], self.sigmamaxtrans)
1319 def set_output_level(self, level="low"):
1321 self.outputlevel = level
1323 def get_restraint(self):
1326 def get_sigma(self):
1329 def get_output(self):
1331 score = self.rs.unprotected_evaluate(
None)
1332 output[
"_TotalScore"] = str(score)
1333 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1334 output[
"CysteineCrossLinkRestraint_sigma_" +
1335 self.label] = str(self.sigma.get_scale())
1336 for eps
in self.epsilons.keys():
1337 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1338 self.label] = str(self.epsilons[eps].get_scale())
1339 output[
"CysteineCrossLinkRestraint_beta_" +
1340 self.label] = str(self.beta.get_scale())
1341 for n
in range(self.weight.get_number_of_weights()):
1342 output[
"CysteineCrossLinkRestraint_weights_" +
1343 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1345 if self.outputlevel ==
"high":
1346 for rst
in self.rs.get_restraints():
1347 cclr = IMP.isd.CysteineCrossLinkRestraint.get_from(rst)
1348 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1350 "_" + self.label] = cclr.get_model_frequency()
1351 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1352 cclr.get_name() +
"_" +
1353 self.label] = cclr.get_standard_error()
1357 class DisulfideCrossLinkRestraint(object):
1358 def __init__(self, representation_or_hier,
1366 self.m = representation_or_hier.get_model()
1369 resolution=resolution)
1372 resolution=resolution)
1379 self.linear.set_slope(0.0)
1383 self.psi_dictionary = {}
1384 self.sigma_dictionary = {}
1385 self.psi_is_sampled =
False
1386 self.sigma_is_sampled =
False
1389 if len(ps1) > 1
or len(ps1) == 0:
1391 "DisulfideBondRestraint: ERROR> first selection pattern "
1392 "selects multiple particles or sero particles")
1394 if len(ps2) > 1
or len(ps2) == 0:
1396 "DisulfideBondRestraint: ERROR> second selection pattern "
1397 "selects multiple particles or sero particles")
1402 sigma = self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1403 psi = self.create_psi(
"PSI_DISULFIDE_BOND")
1405 p1i = p1.get_index()
1406 p2i = p2.get_index()
1412 dr.add_contribution((p1i, p2i), (si, si), psii)
1416 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1417 self.rslin.add_restraint(pr)
1420 self.rs.add_restraint(lw)
1422 self.xl[
"Particle1"] = p1
1423 self.xl[
"Particle2"] = p2
1424 self.xl[
"Sigma"] = sigma
1425 self.xl[
"Psi"] = psi
1427 def add_to_model(self):
1430 self.m, self.rslin, add_to_rmf=
True)
1432 def get_hierarchies(self):
1435 def get_restraint_sets(self):
1438 def get_restraint(self):
1441 def get_restraint_for_rmf(self):
1444 def get_restraints(self):
1446 for r
in self.rs.get_restraints():
1447 rlist.append(IMP.core.PairRestraint.get_from(r))
1450 def set_psi_is_sampled(self, is_sampled=True):
1451 self.psi_is_sampled = is_sampled
1453 def set_sigma_is_sampled(self, is_sampled=True):
1454 self.sigma_is_sampled = is_sampled
1456 def create_sigma(self, name):
1457 ''' a nuisance on the structural uncertainty '''
1458 if name
in self.sigma_dictionary:
1459 return self.sigma_dictionary[name][0]
1462 sigmaminnuis = 0.0000001
1463 sigmamaxnuis = 1000.0
1465 sigma = IMP.pmi.tools.SetupNuisance(
1466 self.m, sigmainit, sigmaminnuis, sigmamaxnuis,
1467 self.sigma_is_sampled).get_particle()
1468 self.sigma_dictionary[name] = (
1471 self.sigma_is_sampled)
1475 def create_psi(self, name):
1476 ''' a nuisance on the inconsistency '''
1477 if name
in self.psi_dictionary:
1478 return self.psi_dictionary[name][0]
1481 psiminnuis = 0.0000001
1482 psimaxnuis = 0.4999999
1484 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1485 psiminnuis, psimaxnuis,
1486 self.psi_is_sampled).get_particle()
1487 self.psi_dictionary[name] = (
1490 self.psi_is_sampled)
1494 def get_output(self):
1496 score = self.rs.unprotected_evaluate(
None)
1497 output[
"_TotalScore"] = str(score)
1498 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1499 output[
"DisulfideBondRestraint_Linear_Score_" +
1500 self.label] = self.rslin.unprotected_evaluate(
None)
1504 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
def get_likelihood
Get the unweighted likelihood of the restraint.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Score distance between two particle centers using a harmonic function.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
A RestraintSet subclass to track cross-link metadata.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Score a pair of particles based on the distance between their centers.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the partic...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.