1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
16 from collections
import defaultdict
23 """Setup cross-link distance restraints from mass spectrometry data.
24 The noise in the data and the structural uncertainty of cross-linked
25 amino-acids is inferred using Bayes theory of probability
26 @note Wraps an IMP::isd::CrossLinkMSRestraint
28 _include_in_rmf =
True
30 def __init__(self, root_hier, database=None, length=10.0, resolution=None,
31 slope=0.02, label=
None, filelabel=
"None",
32 attributes_for_label=
None, linker=
None, weight=1.):
34 @param root_hier The canonical hierarchy containing all the states
35 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
36 object that contains the cross-link dataset
37 @param length maximal cross-linker length (including the residue
39 @param resolution what representation resolution should the cross-link
40 restraint be applied to.
41 @param slope The slope of a distance-linear scoring function that
42 funnels the score when the particles are
43 too far away. Suggested value 0.02.
44 @param label the extra text to label the restraint so that it is
45 searchable in the output
46 @param filelabel automatically generated file containing
47 missing/included/excluded
48 cross-links will be labeled using this text
49 @param attributes_for_label
50 @param weight Weight of restraint
51 @param linker description of the chemistry of the linker itself, as
52 an ihm.ChemDescriptor object
53 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
54 Common cross-linkers can be found in the `ihm.cross_linkers`
58 model = root_hier.get_model()
61 model, weight=weight, label=label,
65 raise Exception(
"You must pass a database")
68 "CrossLinkingMassSpectrometryRestraint: database should "
69 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
70 self.database = database
72 if resolution == 0
or resolution
is None:
73 raise Exception(
"You must pass a resolution and it can't be zero")
75 indb = open(
"included." + filelabel +
".xl.db",
"w")
76 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
77 midb = open(
"missing." + filelabel +
".xl.db",
"w")
82 "No linker chemistry specified. A linker must be given, as an "
83 "ihm.ChemDescriptor object (see the "
84 "CrossLinkingMassSpectrometryRestraint documentation).")
85 self.rs.set_name(self.rs.get_name() +
"_Data")
86 self.rspsi = self._create_restraint_set(
"PriorPsi")
87 self.rssig = self._create_restraint_set(
"PriorSig")
88 self.rslin = self._create_restraint_set(
"Linear")
90 self.rs.set_metadata(self.database.name, length, slope)
92 self.rs.set_linker_auth_name(linker.auth_name)
93 for attr
in (
'chemical_name',
'smiles',
'smiles_canonical',
94 'inchi',
'inchi_key'):
95 val = getattr(linker, attr)
97 getattr(self.rs,
"set_linker_" + attr)(val)
101 self.linear.set_slope(0.0)
104 self.psi_is_sampled =
True
105 self.sigma_is_sampled =
True
106 self.psi_dictionary = {}
107 self.sigma_dictionary = {}
109 self.outputlevel =
"low"
113 xl_groups = [p.get_cross_link_group(self)
114 for p, state
in IMP.pmi.tools._all_protocol_outputs(
118 copies_to_add = defaultdict(int)
119 print(
'gathering copies')
120 for xlid
in self.database.xlid_iterator():
121 for xl
in self.database[xlid]:
122 r1 = xl[self.database.residue1_key]
123 c1 = xl[self.database.protein1_key]
124 r2 = xl[self.database.residue2_key]
125 c2 = xl[self.database.protein2_key]
126 for c, r
in ((c1, r1), (c2, r2)):
127 if c
in copies_to_add:
130 root_hier, state_index=0, molecule=c, residue_index=r,
131 resolution=resolution).get_selected_particles()
133 copies_to_add[c] = len(sel)-1
135 for molname
in copies_to_add:
136 if copies_to_add[molname] == 0:
139 self.database.protein1_key, operator.eq, molname)
140 self.database.set_value(self.database.protein1_key,
143 self.database.protein2_key, operator.eq, molname)
144 self.database.set_value(self.database.protein2_key,
146 for ncopy
in range(copies_to_add[molname]):
147 self.database.clone_protein(
'%s.0' % molname,
148 '%s.%i' % (molname, ncopy + 1))
149 print(
'done pmi2 prelims')
151 for xlid
in self.database.xlid_iterator():
152 new_contribution =
True
153 for xl
in self.database[xlid]:
155 r1 = xl[self.database.residue1_key]
156 c1 = xl[self.database.protein1_key]
157 r2 = xl[self.database.residue2_key]
158 c2 = xl[self.database.protein2_key]
165 name1, copy1 = c1.split(
'.')
167 name2, copy2 = c2.split(
'.')
171 (p[0].add_experimental_cross_link(r1, name1, r2, name2,
174 IMP.pmi.tools._all_protocol_outputs(root_hier),
177 iterlist = range(len(IMP.atom.get_by_type(
178 root_hier, IMP.atom.STATE_TYPE)))
179 for nstate, r
in enumerate(iterlist):
181 xl[self.database.state_key] = nstate
182 xl[self.database.data_set_name_key] = self.label
185 root_hier, state_index=nstate, molecule=name1,
186 copy_index=int(copy1), residue_index=r1,
187 resolution=resolution).get_selected_particles()
189 root_hier, state_index=nstate, molecule=name2,
190 copy_index=int(copy2), residue_index=r2,
191 resolution=resolution).get_selected_particles()
198 "residue %d of chain %s selects multiple "
199 "particles %s" % (r1, c1, str(ps1)))
202 "CrossLinkingMassSpectrometryRestraint: "
203 "residue %d of chain %s is not there" % (r1, c1),
205 midb.write(str(xl) +
"\n")
210 "residue %d of chain %s selects multiple "
211 "particles %s" % (r2, c2, str(ps2)))
214 "CrossLinkingMassSpectrometryRestraint: "
215 "residue %d of chain %s is not there" % (r2, c2),
217 midb.write(str(xl) +
"\n")
223 if p1 == p2
and r1 == r2:
225 "CrossLinkingMassSpectrometryRestraint: "
226 "same particle and same residue, skipping "
231 print(
"generating a new cross-link restraint")
232 new_contribution =
False
237 dr.set_source_protein1(c1)
238 dr.set_source_protein2(c2)
239 dr.set_source_residue1(r1)
240 dr.set_source_residue2(r2)
241 restraints.append(dr)
243 if self.database.sigma1_key
not in xl.keys():
245 xl[self.database.sigma1_key] = sigma1name
247 sigma1name = xl[self.database.sigma1_key]
250 if self.database.sigma2_key
not in xl.keys():
252 xl[self.database.sigma2_key] = sigma2name
254 sigma2name = xl[self.database.sigma2_key]
257 if self.database.psi_key
not in xl.keys():
259 xl[self.database.psi_key] = psiname
261 psiname = xl[self.database.psi_key]
265 p1i = p1.get_particle_index()
267 p2i = p2.get_particle_index()
270 xl[
"Particle_sigma1"] = sigma1
272 xl[
"Particle_sigma2"] = sigma2
274 xl[
"Particle_psi"] = psi
276 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
279 print(
"--------------")
280 print(
"CrossLinkingMassSpectrometryRestraint: generating "
281 "cross-link restraint between")
282 print(
"CrossLinkingMassSpectrometryRestraint: residue %d "
283 "of chain %s and residue %d of chain %s"
285 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 "
286 "%s sigma2 %s psi %s"
287 % (sigma1name, sigma2name, psiname))
288 print(
"CrossLinkingMassSpectrometryRestraint: between "
289 "particles %s and %s"
290 % (p1.get_name(), p2.get_name()))
291 print(
"==========================================\n")
292 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
295 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
296 sigma1, sigma2, psi, ex_xl[1])
303 xl[
"IntraRigidBody"] =
True
305 xl[
"IntraRigidBody"] =
False
307 xl_label = self.database.get_short_cross_link_string(xl)
308 xl[
"ShortLabel"] = xl_label
309 dr.set_name(xl_label)
314 pr.set_name(xl_label)
315 self.rslin.add_restraint(pr)
317 self.xl_list.append(xl)
319 indb.write(str(xl) +
"\n")
321 if len(self.xl_list) == 0:
323 "CrossLinkingMassSpectrometryRestraint: no cross-link "
325 self.xl_restraints = restraints
327 self.rs.add_restraint(lw)
332 def __set_dataset(self, ds):
333 self.database.dataset = ds
334 dataset = property(
lambda self: self.database.dataset, __set_dataset)
337 """ get the hierarchy """
341 """ get the restraint set """
345 """ get the restraints in a list """
346 return self.xl_restraints
349 """ get the dummy restraints to be displayed in the rmf file """
350 return self.rs, self.rssig, self.rspsi
353 """ Get a list of tuples containing the particle pairs """
355 for i
in range(len(self.pairs)):
356 p0 = self.pairs[i][0]
357 p1 = self.pairs[i][1]
358 ppairs.append((p0, p1))
362 """ Set the output level of the output """
363 self.outputlevel = level
366 """ Switch on/off the sampling of psi particles """
367 self.psi_is_sampled = is_sampled
370 """ Switch on/off the sampling of sigma particles """
371 self.sigma_is_sampled = is_sampled
374 """ This is called internally. Creates a nuisance
375 on the structural uncertainty """
376 if name
in self.sigma_dictionary:
377 return self.sigma_dictionary[name][0]
380 sigmaminnuis = 0.0000001
381 sigmamaxnuis = 1000.0
385 sigma = IMP.pmi.tools.SetupNuisance(
386 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
387 self.sigma_is_sampled, name=name).get_particle()
388 self.sigma_dictionary[name] = (
391 self.sigma_is_sampled)
392 self.rssig.add_restraint(
402 """ This is called internally. Creates a nuisance
403 on the data uncertainty """
404 if name
in self.psi_dictionary:
405 return self.psi_dictionary[name][0]
408 psiminnuis = 0.0000001
409 psimaxnuis = 0.4999999
413 psi = IMP.pmi.tools.SetupNuisance(
414 self.model, psiinit, psiminnuis, psimaxnuis,
415 self.psi_is_sampled, name=name).get_particle()
416 self.psi_dictionary[name] = (psi, psitrans, self.psi_is_sampled)
418 self.rspsi.add_restraint(
426 """Get the output of the restraint to be used by the IMP.pmi.output
430 for xl
in self.xl_list:
432 xl_label = xl[
"ShortLabel"]
436 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
437 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
441 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
444 for psiname
in self.psi_dictionary:
445 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
446 str(psiname) + self._label_suffix] = str(
447 self.psi_dictionary[psiname][0].get_scale())
449 for sigmaname
in self.sigma_dictionary:
450 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
451 str(sigmaname) + self._label_suffix] = str(
452 self.sigma_dictionary[sigmaname][0].get_scale())
457 """Get the unweighted likelihood of the restraint"""
459 for restraint
in self.xl_restraints:
460 likelihood *= restraint.get_probability()
465 """ Get all need data to construct a mover in IMP.pmi.dof class"""
467 if self.sigma_is_sampled:
468 for sigmaname
in self.sigma_dictionary:
470 "Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" \
471 + str(sigmaname) +
"_" + self.label
472 particle = self.sigma_dictionary[sigmaname][0]
473 maxstep = (self.sigma_dictionary[sigmaname][1])
477 mv.set_name(mover_name)
480 if self.psi_is_sampled:
481 for psiname
in self.psi_dictionary:
483 "Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + \
484 str(psiname) +
"_" + self.label
485 particle = self.psi_dictionary[psiname][0]
486 maxstep = (self.psi_dictionary[psiname][1])
490 mv.set_name(mover_name)
496 """ Get the particles to be sampled by the IMP.pmi.sampler object """
498 if self.sigma_is_sampled:
499 for sigmaname
in self.sigma_dictionary:
500 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
501 str(sigmaname) + self._label_suffix] =\
502 ([self.sigma_dictionary[sigmaname][0]],
503 self.sigma_dictionary[sigmaname][1])
505 if self.psi_is_sampled:
506 for psiname
in self.psi_dictionary:
507 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
508 str(psiname) + self._label_suffix] = \
509 ([self.psi_dictionary[psiname][0]],
510 self.psi_dictionary[psiname][1])
516 """Setup cross-link distance restraints at atomic level
517 The "atomic" aspect is that it models the particle uncertainty with
518 a Gaussian. The noise in the data and the structural uncertainty of
519 cross-linked amino-acids is inferred using Bayes' theory of probability
520 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
523 _include_in_rmf =
True
525 def __init__(self, root_hier, xldb, atom_type="NZ", length=10.0,
526 slope=0.01, nstates=
None, label=
None,
527 nuisances_are_optimized=
True, sigma_init=5.0,
528 psi_init=0.01, one_psi=
True, filelabel=
None, weight=1.):
530 Automatically creates one "sigma" per cross-linked residue and one
532 Other nuisance options are available.
533 @note Will return an error if the data+extra_sel don't specify
534 two particles per XL pair.
535 @param root_hier The root hierarchy on which you'll do selection
536 @param xldb CrossLinkDataBase object
537 @param atom_type Can either be "NZ" or "CA"
538 @param length The XL linker length
539 @param slope Linear term to add to the restraint function to
541 @param nstates The number of states to model. Defaults to the
542 number of states in root.
543 @param label The output label for the restraint
544 @param nuisances_are_optimized Whether to optimize nuisances
545 @param sigma_init The initial value for all the sigmas
546 @param psi_init The initial value for all the psis
547 @param one_psi Use a single psi for all restraints (if False,
549 @param filelabel automatically generated file containing
550 missing/included/excluded
551 cross-links will be labeled using this text
552 @param weight Weight of restraint
557 self.root = root_hier
558 rname =
"AtomicXLRestraint"
560 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
564 self.sigma_is_sampled = nuisances_are_optimized
565 self.psi_is_sampled = nuisances_are_optimized
567 if atom_type
not in (
"NZ",
"CA"):
568 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
569 self.atom_type = atom_type
570 self.nstates = nstates
572 self.nstates = len(IMP.atom.get_by_type(self.root,
573 IMP.atom.STATE_TYPE))
574 elif nstates != len(IMP.atom.get_by_type(self.root,
575 IMP.atom.STATE_TYPE)):
576 print(
"Warning: nstates is not the same as the number of states "
579 self.rs_psi = self._create_restraint_set(
"psi")
580 self.rs_sig = self._create_restraint_set(
"sigma")
581 self.rs_lin = self._create_restraint_set(
"linear")
583 self.psi_dictionary = {}
584 self.sigma_dictionary = {}
586 self.particles = defaultdict(set)
587 self.one_psi = one_psi
588 self.bonded_pairs = []
590 print(
'creating a single psi for all XLs')
592 print(
'creating one psi for each XL')
595 if filelabel
is not None:
596 indb = open(
"included." + filelabel +
".xl.db",
"w")
597 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
598 midb = open(
"missing." + filelabel +
".xl.db",
"w")
601 self._create_sigma(
'sigma', sigma_init)
603 self._create_psi(
'psi', psi_init)
605 for xlid
in self.xldb.xlid_iterator():
606 self._create_psi(xlid, psi_init)
610 for xlid
in self.xldb.xlid_iterator():
613 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
615 psip = self.psi_dictionary[
616 unique_id][0].get_particle_index()
622 num_contributions = 0
625 for nstate
in self.nstates:
626 for xl
in self.xldb[xlid]:
627 r1 = xl[self.xldb.residue1_key]
628 c1 = xl[self.xldb.protein1_key].strip()
629 r2 = xl[self.xldb.residue2_key]
630 c2 = xl[self.xldb.protein2_key].strip()
635 self.root, state_index=nstate,
638 residue_index=r1).get_selected_particles()
640 self.root, state_index=nstate,
643 residue_index=r2).get_selected_particles()
645 warnings.warn(
"AtomicXLRestraint: residue %d of "
646 "chain %s is not there" % (r1, c1),
648 if filelabel
is not None:
649 midb.write(str(xl) +
"\n")
653 warnings.warn(
"AtomicXLRestraint: residue %d of "
654 "chain %s is not there" % (r2, c2),
656 if filelabel
is not None:
657 midb.write(str(xl) +
"\n")
662 num1=num_xls_per_res[str(xl.r1)]
663 num2=num_xls_per_res[str(xl.r2)]
664 if num1<sig_threshold:
668 if num2<sig_threshold:
673 sig1 = self.sigma_dictionary[
'sigma'][0]
674 sig2 = self.sigma_dictionary[
'sigma'][0]
677 for p1, p2
in itertools.product(ps1, ps2):
680 self.particles[nstate] |= set((p1, p2))
682 [p1.get_index(), p2.get_index()],
683 [sig1.get_particle_index(),
684 sig2.get_particle_index()])
685 num_contributions += 1
687 if num_contributions > 0:
688 print(
'XL:', xlid,
'num contributions:', num_contributions)
691 raise Exception(
"You didn't create any XL restraints")
692 print(
'created', len(xlrs),
'XL restraints')
693 rname = self.rs.get_name()
695 self.rs.set_name(rname)
696 self.rs.set_weight(self.weight)
697 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
699 def get_hierarchy(self):
702 def _create_sigma(self, name, sigmainit):
703 """ This is called internally. Creates a nuisance
704 on the structural uncertainty """
705 if name
in self.sigma_dictionary:
706 return self.sigma_dictionary[name][0]
708 sigmaminnuis = 0.0000001
709 sigmamaxnuis = 1000.0
713 sigma = IMP.pmi.tools.SetupNuisance(
714 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
715 self.sigma_is_sampled).get_particle()
716 self.sigma_dictionary[name] = (
717 sigma, sigmatrans, self.sigma_is_sampled)
719 self.model, sigma, 1000000000.0, sigmamax, sigmamin))
722 def _create_psi(self, name, psiinit):
723 """ This is called internally. Creates a nuisance
724 on the data uncertainty """
725 if name
in self.psi_dictionary:
726 return self.psi_dictionary[name][0]
728 psiminnuis = 0.0000001
729 psimaxnuis = 0.4999999
733 psi = IMP.pmi.tools.SetupNuisance(self.model,
737 self.psi_is_sampled).get_particle()
738 self.psi_dictionary[name] = (
743 self.rs_psi.add_restraint(
755 """ create dummy harmonic restraints for each XL but don't add to model
756 Makes it easy to see each contribution to each XL in RMF
758 class MyGetRestraint:
762 def get_restraint_for_rmf(self):
768 for nxl
in range(self.get_number_of_restraints()):
769 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
770 self.rs.get_restraint(nxl))
772 for ncontr
in range(xl.get_number_of_contributions()):
773 ps = xl.get_contribution(ncontr)
775 hps, [self.model.get_particle(p)
for p
in ps],
776 'xl%i_contr%i' % (nxl, ncontr))
778 dummy_rs.append(MyGetRestraint(rs))
782 """ Get the particles to be sampled by the IMP.pmi.sampler object """
784 if self.sigma_is_sampled:
785 for sigmaname
in self.sigma_dictionary:
786 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
787 str(sigmaname) + self._label_suffix] = \
788 ([self.sigma_dictionary[sigmaname][0]],
789 self.sigma_dictionary[sigmaname][1])
790 if self.psi_is_sampled:
791 for psiname
in self.psi_dictionary:
792 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
793 str(psiname) + self._label_suffix] = \
794 ([self.psi_dictionary[psiname][0]],
795 self.psi_dictionary[psiname][1])
798 def get_bonded_pairs(self):
799 return self.bonded_pairs
801 def get_number_of_restraints(self):
802 return self.rs.get_number_of_restraints()
805 return 'XL restraint with ' + \
806 str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
810 """Read a stat file and load all the sigmas.
811 This is potentially quite stupid.
812 It's also a hack since the sigmas should be stored in the RMF file.
813 Also, requires one sigma and one psi for ALL XLs.
816 sig_val = float(subprocess.check_output(
817 [
"process_output.py",
"-f", in_fn,
"-s",
818 "AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
819 psi_val = float(subprocess.check_output(
820 [
"process_output.py",
"-f", in_fn,
"-s",
821 "AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
822 for nxl
in range(self.rs.get_number_of_restraints()):
823 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
824 self.rs.get_restraint(nxl))
827 for contr
in range(xl.get_number_of_contributions()):
828 sig1, sig2 = xl.get_contribution_sigmas(contr)
831 print(
'loaded nuisances from file')
834 min_dist_for_violation=1e9, coarsen=
False,
835 limit_to_chains=
None, exclude_chains=
''):
836 """Create CMM files, one for each state, of all cross-links.
837 will draw in GREEN if non-violated in all states (or if only one state)
838 will draw in PURPLE if non-violated only in a subset of states
839 (draws nothing elsewhere)
840 will draw in RED in ALL states if all violated
841 (if only one state, you'll only see green and red)
843 @param out_prefix Output xlink files prefix
844 @param max_prob_for_violation It's a violation if the probability
846 @param min_dist_for_violation It's a violation if the min dist
848 @param coarsen Use CA positions
849 @param limit_to_chains Try to visualize just these chains
850 @param exclude_chains Try to NOT visualize these chains
852 print(
'going to calculate violations and plot CMM files')
854 all_dists = [s[
"low_dist"]
for s
in all_stats]
860 cmds = defaultdict(set)
861 for nstate
in range(self.nstates):
862 outf = open(out_prefix+str(nstate)+
'.cmm',
'w')
863 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
866 print(
'will limit to', limit_to_chains)
867 print(
'will exclude', exclude_chains)
872 for nxl
in range(self.rs.get_number_of_restraints()):
876 for nstate
in range(self.nstates):
877 prob = state_info[nstate][nxl][
"prob"]
878 low_dist = state_info[nstate][nxl][
"low_dist"]
879 if (prob < max_prob_for_violation
or
880 low_dist > min_dist_for_violation):
888 if len(npass) == self.nstates:
890 elif len(nviol) == self.nstates:
894 print(nxl,
'state dists:',
895 [state_info[nstate][nxl][
"low_dist"]
896 for nstate
in range(self.nstates)],
897 'viol states:', nviol,
'all viol?', all_viol)
898 for nstate
in range(self.nstates):
917 pp = state_info[nstate][nxl][
"low_pp"]
928 cmds[nstate].add((ch1, r1))
929 cmds[nstate].add((ch2, r2))
931 outf = out_fns[nstate]
933 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
934 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
935 % (nv, c1[0], c1[1], c1[2], r, g, b))
936 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
937 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
938 % (nv+1, c2[0], c2[1], c2[2], r, g, b))
939 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
940 'r="%.2f" g="%.2f" b="%.2f"/> \n'
941 % (nv, nv+1, r, g, b))
944 for nstate
in range(self.nstates):
945 out_fns[nstate].write(
'</marker_set>\n')
946 out_fns[nstate].close()
948 for ch, r
in cmds[nstate]:
949 cmd +=
'#%i:%i.%s ' % (nstate, r, ch)
954 def _get_contribution_info(self, xl, ncontr, use_CA=False):
955 """Return the particles at that contribution. If requested will
956 return CA's instead"""
957 idx1 = xl.get_contribution(ncontr)[0]
958 idx2 = xl.get_contribution(ncontr)[1]
963 idx1 = sel.get_selected_particle_indexes()[0]
967 idx2 = sel.get_selected_particle_indexes()[0]
971 return idx1, idx2, dist
974 exclude_chains=
'', use_CA=
False):
975 ''' return the probability, best distance, two coords, and possibly
977 @param limit_to_state Only examine contributions from one state
978 @param limit_to_chains Returns the particles for certain "easy
980 @param exclude_chains Even if you limit, don't let one end be in
981 this list. Only works if you also limit chains
982 @param use_CA Limit to CA particles
985 for nxl
in range(self.rs.get_number_of_restraints()):
987 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
988 self.rs.get_restraint(nxl))
994 for contr
in range(xl.get_number_of_contributions()):
995 pp = xl.get_contribution(contr)
1000 idx1 = sel.get_selected_particle_indexes()[0]
1004 idx2 = sel.get_selected_particle_indexes()[0]
1006 if limit_to_state
is not None:
1009 if nstate != limit_to_state:
1011 state_contrs.append(contr)
1014 if limit_to_chains
is not None:
1019 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1020 c1
not in exclude_chains
1021 and c2
not in exclude_chains):
1022 if dist < low_dist_lim:
1028 if limit_to_state
is not None:
1029 this_info[
"prob"] = xl.evaluate_for_contributions(
1032 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1033 if limit_to_chains
is not None:
1034 this_info[
"low_pp"] = low_pp_lim
1036 this_info[
"low_pp"] = low_pp
1038 this_info[
"low_dist"] = low_dist
1039 if not self.one_psi:
1041 this_info[
"psi"] = pval
1042 ret.append(this_info)
1045 def print_stats(self):
1047 for nxl, s
in enumerate(stats):
1048 print(s[
"low_dist"])
1050 def get_output(self):
1051 output = super().get_output()
1062 for nxl, s
in enumerate(stats):
1063 if s[
'low_dist'] > 20.0:
1065 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"Prob")] = str(s[
'prob'])
1066 output[
"AtomicXLRestraint_%i_%s"
1067 % (nxl,
"BestDist")] = str(s[
'low_dist'])
1068 if not self.one_psi:
1069 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"psi")] \
1071 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1075 class CysteineCrossLinkRestraint:
1076 def __init__(self, root_hier, filename, cbeta=False,
1077 betatuple=(0.03, 0.1),
1078 disttuple=(0.0, 25.0, 1000),
1079 omegatuple=(1.0, 1000.0, 50),
1080 sigmatuple=(0.3, 0.3, 1),
1082 sigmaissampled=
False,
1083 weightissampled=
True,
1084 epsilonissampled=
True):
1091 self.m = root_hier.get_model()
1094 self.epsilonmaxtrans = 0.01
1095 self.sigmamaxtrans = 0.1
1096 self.betamaxtrans = 0.01
1097 self.weightmaxtrans = 0.1
1099 self.outputlevel =
"low"
1100 self.betaissampled = betaissampled
1101 self.sigmaissampled = sigmaissampled
1102 self.weightissampled = weightissampled
1103 self.epsilonissampled = epsilonissampled
1105 betalower = betatuple[0]
1106 betaupper = betatuple[1]
1112 self.beta = IMP.pmi.tools.SetupNuisance(
1117 betaissampled).get_particle(
1120 self.sigma = IMP.pmi.tools.SetupNuisance(
1125 sigmaissampled).get_particle()
1127 self.weight = IMP.pmi.tools.SetupWeight(
1129 isoptimized=
False).get_particle(
1133 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1142 if t[5]
in self.epsilons:
1143 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1144 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1146 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(
1147 self.m, 0.01, 0.01, 1.0 - float(t[4]),
1148 epsilonissampled).get_particle()
1149 up = self.epsilons[t[5]].get_upper()
1150 low = self.epsilons[t[5]].get_lower()
1152 self.epsilons[t[5]].set_upper(low)
1154 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1158 crossdata = IMP.pmi.tools.get_cross_link_data(
1159 "cysteine",
"cysteine_CA_FES.txt.standard",
1160 disttuple, omegatuple, sigmatuple, disttuple[1],
1163 crossdata = IMP.pmi.tools.get_cross_link_data(
1164 "cysteine",
"cysteine_CB_FES.txt.standard",
1165 disttuple, omegatuple, sigmatuple, disttuple[1],
1169 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1170 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1171 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1174 print(
"--------------")
1175 print(
"CysteineCrossLink: attempting to create a restraint "
1196 self.epsilons[epslabel],
1207 molecule=chain1, residue_index=resid1,
1209 p1 = p1.get_selected_particles()
1216 molecule=chain2, residue_index=resid2,
1218 p2 = p2.get_selected_particles()
1228 for t
in range(-1, 2):
1230 molecule=chain1, copy_index=0,
1231 residue_index=resid1 + t)
1232 p = p.get_selected_particles()
1238 "CysteineCrossLink: missing representation for "
1239 "residue %d of chain %s" % (resid1 + t, chain1),
1243 molecule=chain2, copy_index=0,
1244 residue_index=resid2 + t)
1245 p = p.get_selected_particles()
1251 "CysteineCrossLink: missing representation for "
1252 "residue %d of chain %s" % (resid2 + t, chain2),
1256 if (p1
is not None and p2
is not None):
1257 ccl.add_contribution(p1, p2)
1261 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" +
1262 str(resid2) +
"_" + chain2,
1266 if (len(p1) == 3
and len(p2) == 3):
1267 p11n = p1[0].get_name()
1268 p12n = p1[1].get_name()
1269 p13n = p1[2].get_name()
1270 p21n = p2[0].get_name()
1271 p22n = p2[1].get_name()
1272 p23n = p2[2].get_name()
1274 print(
"CysteineCrossLink: generating CB cysteine "
1275 "cross-link restraint between")
1276 print(
"CysteineCrossLink: residue %d of chain %s and "
1277 "residue %d of chain %s"
1278 % (resid1, chain1, resid2, chain2))
1279 print(
"CysteineCrossLink: between particles %s %s %s and "
1280 "%s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1282 ccl.add_contribution(p1, p2)
1285 self.rs.add_restraint(ccl)
1286 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1287 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1290 self.weight.get_particle()
1291 ).set_weights_are_optimized(weightissampled)
1293 def set_label(self, label):
1296 def add_to_model(self):
1301 if self.epsilonissampled:
1302 for eps
in self.epsilons.keys():
1303 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
1304 "_" + self.label] = ([self.epsilons[eps]],
1305 self.epsilonmaxtrans)
1306 if self.betaissampled:
1307 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1308 self.label] = ([self.beta], self.betamaxtrans)
1309 if self.weightissampled:
1310 ps[
"Weights_CysteineCrossLinkRestraint_" +
1311 self.label] = ([self.weight], self.weightmaxtrans)
1312 if self.sigmaissampled:
1313 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1314 self.label] = ([self.sigma], self.sigmamaxtrans)
1317 def set_output_level(self, level="low"):
1319 self.outputlevel = level
1321 def get_restraint(self):
1324 def get_sigma(self):
1327 def get_output(self):
1329 score = self.rs.unprotected_evaluate(
None)
1330 output[
"_TotalScore"] = str(score)
1331 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1332 output[
"CysteineCrossLinkRestraint_sigma_" +
1333 self.label] = str(self.sigma.get_scale())
1334 for eps
in self.epsilons.keys():
1335 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1336 self.label] = str(self.epsilons[eps].get_scale())
1337 output[
"CysteineCrossLinkRestraint_beta_" +
1338 self.label] = str(self.beta.get_scale())
1339 for n
in range(self.weight.get_number_of_weights()):
1340 output[
"CysteineCrossLinkRestraint_weights_" +
1341 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1343 if self.outputlevel ==
"high":
1344 for rst
in self.rs.get_restraints():
1345 cclr = IMP.isd.CysteineCrossLinkRestraint.get_from(rst)
1346 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1348 "_" + self.label] = cclr.get_model_frequency()
1349 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1350 cclr.get_name() +
"_" +
1351 self.label] = cclr.get_standard_error()
1355 class DisulfideCrossLinkRestraint:
1356 def __init__(self, representation_or_hier,
1364 self.m = representation_or_hier.get_model()
1367 resolution=resolution)
1370 resolution=resolution)
1377 self.linear.set_slope(0.0)
1381 self.psi_dictionary = {}
1382 self.sigma_dictionary = {}
1383 self.psi_is_sampled =
False
1384 self.sigma_is_sampled =
False
1387 if len(ps1) > 1
or len(ps1) == 0:
1389 "DisulfideBondRestraint: ERROR> first selection pattern "
1390 "selects multiple particles or sero particles")
1392 if len(ps2) > 1
or len(ps2) == 0:
1394 "DisulfideBondRestraint: ERROR> second selection pattern "
1395 "selects multiple particles or sero particles")
1400 sigma = self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1401 psi = self.create_psi(
"PSI_DISULFIDE_BOND")
1403 p1i = p1.get_index()
1404 p2i = p2.get_index()
1410 dr.add_contribution((p1i, p2i), (si, si), psii)
1414 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1415 self.rslin.add_restraint(pr)
1418 self.rs.add_restraint(lw)
1420 self.xl[
"Particle1"] = p1
1421 self.xl[
"Particle2"] = p2
1422 self.xl[
"Sigma"] = sigma
1423 self.xl[
"Psi"] = psi
1425 def add_to_model(self):
1428 self.m, self.rslin, add_to_rmf=
True)
1430 def get_hierarchies(self):
1433 def get_restraint_sets(self):
1436 def get_restraint(self):
1439 def get_restraint_for_rmf(self):
1442 def get_restraints(self):
1444 for r
in self.rs.get_restraints():
1445 rlist.append(IMP.core.PairRestraint.get_from(r))
1448 def set_psi_is_sampled(self, is_sampled=True):
1449 self.psi_is_sampled = is_sampled
1451 def set_sigma_is_sampled(self, is_sampled=True):
1452 self.sigma_is_sampled = is_sampled
1454 def create_sigma(self, name):
1455 ''' a nuisance on the structural uncertainty '''
1456 if name
in self.sigma_dictionary:
1457 return self.sigma_dictionary[name][0]
1460 sigmaminnuis = 0.0000001
1461 sigmamaxnuis = 1000.0
1463 sigma = IMP.pmi.tools.SetupNuisance(
1464 self.m, sigmainit, sigmaminnuis, sigmamaxnuis,
1465 self.sigma_is_sampled).get_particle()
1466 self.sigma_dictionary[name] = (
1469 self.sigma_is_sampled)
1473 def create_psi(self, name):
1474 ''' a nuisance on the inconsistency '''
1475 if name
in self.psi_dictionary:
1476 return self.psi_dictionary[name][0]
1479 psiminnuis = 0.0000001
1480 psimaxnuis = 0.4999999
1482 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1483 psiminnuis, psimaxnuis,
1484 self.psi_is_sampled).get_particle()
1485 self.psi_dictionary[name] = (
1488 self.psi_is_sampled)
1492 def get_output(self):
1494 score = self.rs.unprotected_evaluate(
None)
1495 output[
"_TotalScore"] = str(score)
1496 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1497 output[
"DisulfideBondRestraint_Linear_Score_" +
1498 self.label] = self.rslin.unprotected_evaluate(
None)
1502 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
def get_likelihood
Get the unweighted likelihood of the restraint.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Score distance between two particle centers using a harmonic function.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
A RestraintSet subclass to track cross-link metadata.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Score a pair of particles based on the distance between their centers.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the partic...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.