1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
16 from collections
import defaultdict
23 """Setup cross-link distance restraints from mass spectrometry data.
24 The noise in the data and the structural uncertainty of cross-linked
25 amino-acids is inferred using Bayes theory of probability
26 @note Wraps an IMP::isd::CrossLinkMSRestraint
28 _include_in_rmf =
True
30 def __init__(self, root_hier, database=None, length=10.0, resolution=None,
31 slope=0.02, label=
None, filelabel=
"None",
32 attributes_for_label=
None, linker=
None, weight=1.):
34 @param root_hier The canonical hierarchy containing all the states
35 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
36 object that contains the cross-link dataset
37 @param length maximal cross-linker length (including the residue
39 @param resolution what representation resolution should the cross-link
40 restraint be applied to.
41 @param slope The slope of a distance-linear scoring function that
42 funnels the score when the particles are
43 too far away. Suggested value 0.02.
44 @param label the extra text to label the restraint so that it is
45 searchable in the output
46 @param filelabel automatically generated file containing
47 missing/included/excluded
48 cross-links will be labeled using this text
49 @param attributes_for_label
50 @param weight Weight of restraint
51 @param linker description of the chemistry of the linker itself, as
52 an ihm.ChemDescriptor object
53 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
54 Common cross-linkers can be found in the `ihm.cross_linkers`
58 model = root_hier.get_model()
61 model, weight=weight, label=label,
65 raise Exception(
"You must pass a database")
68 "CrossLinkingMassSpectrometryRestraint: database should "
69 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
70 self.database = database
72 if resolution == 0
or resolution
is None:
73 raise Exception(
"You must pass a resolution and it can't be zero")
75 indb = open(
"included." + filelabel +
".xl.db",
"w")
76 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
77 midb = open(
"missing." + filelabel +
".xl.db",
"w")
82 "No linker chemistry specified. A linker must be given, as an "
83 "ihm.ChemDescriptor object (see the "
84 "CrossLinkingMassSpectrometryRestraint documentation).")
85 self.rs.set_name(self.rs.get_name() +
"_Data")
86 self.rspsi = self._create_restraint_set(
"PriorPsi")
87 self.rssig = self._create_restraint_set(
"PriorSig")
88 self.rslin = self._create_restraint_set(
"Linear")
90 self.rs.set_metadata(self.database.name, length, slope)
92 self.rs.set_linker_auth_name(linker.auth_name)
93 for attr
in (
'chemical_name',
'smiles',
'smiles_canonical',
94 'inchi',
'inchi_key'):
95 val = getattr(linker, attr)
97 getattr(self.rs,
"set_linker_" + attr)(val)
101 self.linear.set_slope(0.0)
104 self.psi_is_sampled =
True
105 self.sigma_is_sampled =
True
106 self.psi_dictionary = {}
107 self.sigma_dictionary = {}
109 self.outputlevel =
"low"
113 xl_groups = [p.get_cross_link_group(self)
114 for p, state
in IMP.pmi.tools._all_protocol_outputs(
118 copies_to_add = defaultdict(int)
119 print(
'gathering copies')
120 for xlid
in self.database.xlid_iterator():
121 for xl
in self.database[xlid]:
122 r1 = xl[self.database.residue1_key]
123 c1 = xl[self.database.protein1_key]
124 r2 = xl[self.database.residue2_key]
125 c2 = xl[self.database.protein2_key]
126 for c, r
in ((c1, r1), (c2, r2)):
127 if c
in copies_to_add:
130 root_hier, state_index=0, molecule=c, residue_index=r,
131 resolution=resolution).get_selected_particles()
133 copies_to_add[c] = len(sel)-1
135 for molname
in copies_to_add:
136 if copies_to_add[molname] == 0:
139 self.database.protein1_key, operator.eq, molname)
140 self.database.set_value(self.database.protein1_key,
143 self.database.protein2_key, operator.eq, molname)
144 self.database.set_value(self.database.protein2_key,
146 for ncopy
in range(copies_to_add[molname]):
147 self.database.clone_protein(
'%s.0' % molname,
148 '%s.%i' % (molname, ncopy + 1))
149 print(
'done pmi2 prelims')
151 for xlid
in self.database.xlid_iterator():
152 new_contribution =
True
153 for xl
in self.database[xlid]:
155 r1 = xl[self.database.residue1_key]
156 c1 = xl[self.database.protein1_key]
157 r2 = xl[self.database.residue2_key]
158 c2 = xl[self.database.protein2_key]
165 name1, copy1 = c1.split(
'.')
167 name2, copy2 = c2.split(
'.')
171 (p[0].add_experimental_cross_link(r1, name1, r2, name2,
174 IMP.pmi.tools._all_protocol_outputs(root_hier),
177 iterlist = range(len(IMP.atom.get_by_type(
178 root_hier, IMP.atom.STATE_TYPE)))
179 for nstate, r
in enumerate(iterlist):
181 xl[self.database.state_key] = nstate
182 xl[self.database.data_set_name_key] = self.label
185 root_hier, state_index=nstate, molecule=name1,
186 copy_index=int(copy1), residue_index=r1,
187 resolution=resolution).get_selected_particles()
189 root_hier, state_index=nstate, molecule=name2,
190 copy_index=int(copy2), residue_index=r2,
191 resolution=resolution).get_selected_particles()
198 "residue %d of chain %s selects multiple "
199 "particles %s" % (r1, c1, str(ps1)))
202 "CrossLinkingMassSpectrometryRestraint: "
203 "residue %d of chain %s is not there" % (r1, c1),
205 midb.write(str(xl) +
"\n")
210 "residue %d of chain %s selects multiple "
211 "particles %s" % (r2, c2, str(ps2)))
214 "CrossLinkingMassSpectrometryRestraint: "
215 "residue %d of chain %s is not there" % (r2, c2),
217 midb.write(str(xl) +
"\n")
223 if p1 == p2
and r1 == r2:
225 "CrossLinkingMassSpectrometryRestraint: "
226 "same particle and same residue, skipping "
231 print(
"generating a new cross-link restraint")
232 new_contribution =
False
237 dr.set_source_protein1(c1)
238 dr.set_source_protein2(c2)
239 dr.set_source_residue1(r1)
240 dr.set_source_residue2(r2)
241 restraints.append(dr)
243 if self.database.sigma1_key
not in xl.keys():
245 xl[self.database.sigma1_key] = sigma1name
247 sigma1name = xl[self.database.sigma1_key]
250 if self.database.sigma2_key
not in xl.keys():
252 xl[self.database.sigma2_key] = sigma2name
254 sigma2name = xl[self.database.sigma2_key]
257 if self.database.psi_key
not in xl.keys():
259 xl[self.database.psi_key] = psiname
261 psiname = xl[self.database.psi_key]
265 p1i = p1.get_particle_index()
267 p2i = p2.get_particle_index()
270 xl[
"Particle_sigma1"] = sigma1
272 xl[
"Particle_sigma2"] = sigma2
274 xl[
"Particle_psi"] = psi
276 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
279 print(
"--------------")
280 print(
"CrossLinkingMassSpectrometryRestraint: generating "
281 "cross-link restraint between")
282 print(
"CrossLinkingMassSpectrometryRestraint: residue %d "
283 "of chain %s and residue %d of chain %s"
285 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 "
286 "%s sigma2 %s psi %s"
287 % (sigma1name, sigma2name, psiname))
288 print(
"CrossLinkingMassSpectrometryRestraint: between "
289 "particles %s and %s"
290 % (p1.get_name(), p2.get_name()))
291 print(
"==========================================\n")
292 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
295 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
296 sigma1, sigma2, psi, ex_xl[1])
303 xl[
"IntraRigidBody"] =
True
305 xl[
"IntraRigidBody"] =
False
307 xl_label = self.database.get_short_cross_link_string(xl)
308 xl[
"ShortLabel"] = xl_label
309 dr.set_name(xl_label)
314 pr.set_name(xl_label)
315 self.rslin.add_restraint(pr)
317 self.xl_list.append(xl)
319 indb.write(str(xl) +
"\n")
321 if len(self.xl_list) == 0:
323 "CrossLinkingMassSpectrometryRestraint: no cross-link "
325 self.xl_restraints = restraints
327 self.rs.add_restraint(lw)
332 def __set_dataset(self, ds):
333 self.database.dataset = ds
334 dataset = property(
lambda self: self.database.dataset, __set_dataset)
337 """ get the hierarchy """
341 """ get the restraint set """
345 """ get the restraints in a list """
346 return self.xl_restraints
349 """ get the dummy restraints to be displayed in the rmf file """
350 return self.rs, self.rssig, self.rspsi
353 """ Get a list of tuples containing the particle pairs """
355 for i
in range(len(self.pairs)):
356 p0 = self.pairs[i][0]
357 p1 = self.pairs[i][1]
358 ppairs.append((p0, p1))
362 """ Set the output level of the output """
363 self.outputlevel = level
366 """ Switch on/off the sampling of psi particles """
367 self.psi_is_sampled = is_sampled
370 """ Switch on/off the sampling of sigma particles """
371 self.sigma_is_sampled = is_sampled
374 """ This is called internally. Creates a nuisance
375 on the structural uncertainty """
376 if name
in self.sigma_dictionary:
377 return self.sigma_dictionary[name][0]
380 sigmaminnuis = 0.0000001
381 sigmamaxnuis = 1000.0
385 sigma = IMP.pmi.tools.SetupNuisance(
386 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
387 self.sigma_is_sampled, name=name).get_particle()
388 self.sigma_dictionary[name] = (
391 self.sigma_is_sampled)
392 self.rssig.add_restraint(
402 """ This is called internally. Creates a nuisance
403 on the data uncertainty """
404 if name
in self.psi_dictionary:
405 return self.psi_dictionary[name][0]
408 psiminnuis = 0.0000001
409 psimaxnuis = 0.4999999
413 psi = IMP.pmi.tools.SetupNuisance(
414 self.model, psiinit, psiminnuis, psimaxnuis,
415 self.psi_is_sampled, name=name).get_particle()
416 self.psi_dictionary[name] = (psi, psitrans, self.psi_is_sampled)
418 self.rspsi.add_restraint(
426 """Get the output of the restraint to be used by the IMP.pmi.output
431 for xl
in self.xl_list:
433 xl_label = xl[
"ShortLabel"]
437 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
438 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
442 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
445 for psiname
in self.psi_dictionary:
446 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
447 str(psiname) + self._label_suffix] = str(
448 self.psi_dictionary[psiname][0].get_scale())
450 for sigmaname
in self.sigma_dictionary:
451 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
452 str(sigmaname) + self._label_suffix] = str(
453 self.sigma_dictionary[sigmaname][0].get_scale())
458 """Get the unweighted likelihood of the restraint"""
460 for restraint
in self.xl_restraints:
461 likelihood *= restraint.get_probability()
466 """ Get all need data to construct a mover in IMP.pmi.dof class"""
468 if self.sigma_is_sampled:
469 for sigmaname
in self.sigma_dictionary:
471 "Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" \
472 + str(sigmaname) +
"_" + self.label
473 particle = self.sigma_dictionary[sigmaname][0]
474 maxstep = (self.sigma_dictionary[sigmaname][1])
478 mv.set_name(mover_name)
481 if self.psi_is_sampled:
482 for psiname
in self.psi_dictionary:
484 "Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + \
485 str(psiname) +
"_" + self.label
486 particle = self.psi_dictionary[psiname][0]
487 maxstep = (self.psi_dictionary[psiname][1])
491 mv.set_name(mover_name)
497 """ Get the particles to be sampled by the IMP.pmi.sampler object """
499 if self.sigma_is_sampled:
500 for sigmaname
in self.sigma_dictionary:
501 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
502 str(sigmaname) + self._label_suffix] =\
503 ([self.sigma_dictionary[sigmaname][0]],
504 self.sigma_dictionary[sigmaname][1])
506 if self.psi_is_sampled:
507 for psiname
in self.psi_dictionary:
508 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
509 str(psiname) + self._label_suffix] = \
510 ([self.psi_dictionary[psiname][0]],
511 self.psi_dictionary[psiname][1])
517 """Setup cross-link distance restraints at atomic level
518 The "atomic" aspect is that it models the particle uncertainty with
519 a Gaussian. The noise in the data and the structural uncertainty of
520 cross-linked amino-acids is inferred using Bayes' theory of probability
521 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
524 _include_in_rmf =
True
526 def __init__(self, root_hier, xldb, atom_type="NZ", length=10.0,
527 slope=0.01, nstates=
None, label=
None,
528 nuisances_are_optimized=
True, sigma_init=5.0,
529 psi_init=0.01, one_psi=
True, filelabel=
None, weight=1.):
531 Automatically creates one "sigma" per cross-linked residue and one
533 Other nuisance options are available.
534 @note Will return an error if the data+extra_sel don't specify
535 two particles per XL pair.
536 @param root_hier The root hierarchy on which you'll do selection
537 @param xldb CrossLinkDataBase object
538 @param atom_type Can either be "NZ" or "CA"
539 @param length The XL linker length
540 @param slope Linear term to add to the restraint function to
542 @param nstates The number of states to model. Defaults to the
543 number of states in root.
544 @param label The output label for the restraint
545 @param nuisances_are_optimized Whether to optimize nuisances
546 @param sigma_init The initial value for all the sigmas
547 @param psi_init The initial value for all the psis
548 @param one_psi Use a single psi for all restraints (if False,
550 @param filelabel automatically generated file containing
551 missing/included/excluded
552 cross-links will be labeled using this text
553 @param weight Weight of restraint
558 self.root = root_hier
559 rname =
"AtomicXLRestraint"
561 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
565 self.sigma_is_sampled = nuisances_are_optimized
566 self.psi_is_sampled = nuisances_are_optimized
568 if atom_type
not in (
"NZ",
"CA"):
569 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
570 self.atom_type = atom_type
571 self.nstates = nstates
573 self.nstates = len(IMP.atom.get_by_type(self.root,
574 IMP.atom.STATE_TYPE))
575 elif nstates != len(IMP.atom.get_by_type(self.root,
576 IMP.atom.STATE_TYPE)):
577 print(
"Warning: nstates is not the same as the number of states "
580 self.rs_psi = self._create_restraint_set(
"psi")
581 self.rs_sig = self._create_restraint_set(
"sigma")
582 self.rs_lin = self._create_restraint_set(
"linear")
584 self.psi_dictionary = {}
585 self.sigma_dictionary = {}
587 self.particles = defaultdict(set)
588 self.one_psi = one_psi
589 self.bonded_pairs = []
591 print(
'creating a single psi for all XLs')
593 print(
'creating one psi for each XL')
596 if filelabel
is not None:
597 indb = open(
"included." + filelabel +
".xl.db",
"w")
598 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
599 midb = open(
"missing." + filelabel +
".xl.db",
"w")
602 self._create_sigma(
'sigma', sigma_init)
604 self._create_psi(
'psi', psi_init)
606 for xlid
in self.xldb.xlid_iterator():
607 self._create_psi(xlid, psi_init)
611 for xlid
in self.xldb.xlid_iterator():
614 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
616 psip = self.psi_dictionary[
617 unique_id][0].get_particle_index()
623 num_contributions = 0
626 for nstate
in self.nstates:
627 for xl
in self.xldb[xlid]:
628 r1 = xl[self.xldb.residue1_key]
629 c1 = xl[self.xldb.protein1_key].strip()
630 r2 = xl[self.xldb.residue2_key]
631 c2 = xl[self.xldb.protein2_key].strip()
636 self.root, state_index=nstate,
639 residue_index=r1).get_selected_particles()
641 self.root, state_index=nstate,
644 residue_index=r2).get_selected_particles()
646 warnings.warn(
"AtomicXLRestraint: residue %d of "
647 "chain %s is not there" % (r1, c1),
649 if filelabel
is not None:
650 midb.write(str(xl) +
"\n")
654 warnings.warn(
"AtomicXLRestraint: residue %d of "
655 "chain %s is not there" % (r2, c2),
657 if filelabel
is not None:
658 midb.write(str(xl) +
"\n")
663 num1=num_xls_per_res[str(xl.r1)]
664 num2=num_xls_per_res[str(xl.r2)]
665 if num1<sig_threshold:
669 if num2<sig_threshold:
674 sig1 = self.sigma_dictionary[
'sigma'][0]
675 sig2 = self.sigma_dictionary[
'sigma'][0]
678 for p1, p2
in itertools.product(ps1, ps2):
681 self.particles[nstate] |= set((p1, p2))
683 [p1.get_index(), p2.get_index()],
684 [sig1.get_particle_index(),
685 sig2.get_particle_index()])
686 num_contributions += 1
688 if num_contributions > 0:
689 print(
'XL:', xlid,
'num contributions:', num_contributions)
692 raise Exception(
"You didn't create any XL restraints")
693 print(
'created', len(xlrs),
'XL restraints')
694 rname = self.rs.get_name()
696 self.rs.set_name(rname)
697 self.rs.set_weight(self.weight)
698 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
700 def get_hierarchy(self):
703 def _create_sigma(self, name, sigmainit):
704 """ This is called internally. Creates a nuisance
705 on the structural uncertainty """
706 if name
in self.sigma_dictionary:
707 return self.sigma_dictionary[name][0]
709 sigmaminnuis = 0.0000001
710 sigmamaxnuis = 1000.0
714 sigma = IMP.pmi.tools.SetupNuisance(
715 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
716 self.sigma_is_sampled).get_particle()
717 self.sigma_dictionary[name] = (
718 sigma, sigmatrans, self.sigma_is_sampled)
720 self.model, sigma, 1000000000.0, sigmamax, sigmamin))
723 def _create_psi(self, name, psiinit):
724 """ This is called internally. Creates a nuisance
725 on the data uncertainty """
726 if name
in self.psi_dictionary:
727 return self.psi_dictionary[name][0]
729 psiminnuis = 0.0000001
730 psimaxnuis = 0.4999999
734 psi = IMP.pmi.tools.SetupNuisance(self.model,
738 self.psi_is_sampled).get_particle()
739 self.psi_dictionary[name] = (
744 self.rs_psi.add_restraint(
756 """ create dummy harmonic restraints for each XL but don't add to model
757 Makes it easy to see each contribution to each XL in RMF
759 class MyGetRestraint:
763 def get_restraint_for_rmf(self):
769 for nxl
in range(self.get_number_of_restraints()):
770 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
771 self.rs.get_restraint(nxl))
773 for ncontr
in range(xl.get_number_of_contributions()):
774 ps = xl.get_contribution(ncontr)
776 hps, [self.model.get_particle(p)
for p
in ps],
777 'xl%i_contr%i' % (nxl, ncontr))
779 dummy_rs.append(MyGetRestraint(rs))
783 """ Get the particles to be sampled by the IMP.pmi.sampler object """
785 if self.sigma_is_sampled:
786 for sigmaname
in self.sigma_dictionary:
787 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
788 str(sigmaname) + self._label_suffix] = \
789 ([self.sigma_dictionary[sigmaname][0]],
790 self.sigma_dictionary[sigmaname][1])
791 if self.psi_is_sampled:
792 for psiname
in self.psi_dictionary:
793 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
794 str(psiname) + self._label_suffix] = \
795 ([self.psi_dictionary[psiname][0]],
796 self.psi_dictionary[psiname][1])
799 def get_bonded_pairs(self):
800 return self.bonded_pairs
802 def get_number_of_restraints(self):
803 return self.rs.get_number_of_restraints()
806 return 'XL restraint with ' + \
807 str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
811 """Read a stat file and load all the sigmas.
812 This is potentially quite stupid.
813 It's also a hack since the sigmas should be stored in the RMF file.
814 Also, requires one sigma and one psi for ALL XLs.
817 sig_val = float(subprocess.check_output(
818 [
"process_output.py",
"-f", in_fn,
"-s",
819 "AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
820 psi_val = float(subprocess.check_output(
821 [
"process_output.py",
"-f", in_fn,
"-s",
822 "AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
823 for nxl
in range(self.rs.get_number_of_restraints()):
824 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
825 self.rs.get_restraint(nxl))
828 for contr
in range(xl.get_number_of_contributions()):
829 sig1, sig2 = xl.get_contribution_sigmas(contr)
832 print(
'loaded nuisances from file')
835 min_dist_for_violation=1e9, coarsen=
False,
836 limit_to_chains=
None, exclude_chains=
''):
837 """Create CMM files, one for each state, of all cross-links.
838 will draw in GREEN if non-violated in all states (or if only one state)
839 will draw in PURPLE if non-violated only in a subset of states
840 (draws nothing elsewhere)
841 will draw in RED in ALL states if all violated
842 (if only one state, you'll only see green and red)
844 @param out_prefix Output xlink files prefix
845 @param max_prob_for_violation It's a violation if the probability
847 @param min_dist_for_violation It's a violation if the min dist
849 @param coarsen Use CA positions
850 @param limit_to_chains Try to visualize just these chains
851 @param exclude_chains Try to NOT visualize these chains
853 print(
'going to calculate violations and plot CMM files')
855 all_dists = [s[
"low_dist"]
for s
in all_stats]
861 cmds = defaultdict(set)
862 for nstate
in range(self.nstates):
863 outf = open(out_prefix+str(nstate)+
'.cmm',
'w')
864 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
867 print(
'will limit to', limit_to_chains)
868 print(
'will exclude', exclude_chains)
873 for nxl
in range(self.rs.get_number_of_restraints()):
877 for nstate
in range(self.nstates):
878 prob = state_info[nstate][nxl][
"prob"]
879 low_dist = state_info[nstate][nxl][
"low_dist"]
880 if (prob < max_prob_for_violation
or
881 low_dist > min_dist_for_violation):
889 if len(npass) == self.nstates:
891 elif len(nviol) == self.nstates:
895 print(nxl,
'state dists:',
896 [state_info[nstate][nxl][
"low_dist"]
897 for nstate
in range(self.nstates)],
898 'viol states:', nviol,
'all viol?', all_viol)
899 for nstate
in range(self.nstates):
918 pp = state_info[nstate][nxl][
"low_pp"]
929 cmds[nstate].add((ch1, r1))
930 cmds[nstate].add((ch2, r2))
932 outf = out_fns[nstate]
934 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
935 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
936 % (nv, c1[0], c1[1], c1[2], r, g, b))
937 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
938 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
939 % (nv+1, c2[0], c2[1], c2[2], r, g, b))
940 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
941 'r="%.2f" g="%.2f" b="%.2f"/> \n'
942 % (nv, nv+1, r, g, b))
945 for nstate
in range(self.nstates):
946 out_fns[nstate].write(
'</marker_set>\n')
947 out_fns[nstate].close()
949 for ch, r
in cmds[nstate]:
950 cmd +=
'#%i:%i.%s ' % (nstate, r, ch)
955 def _get_contribution_info(self, xl, ncontr, use_CA=False):
956 """Return the particles at that contribution. If requested will
957 return CA's instead"""
958 idx1 = xl.get_contribution(ncontr)[0]
959 idx2 = xl.get_contribution(ncontr)[1]
964 idx1 = sel.get_selected_particle_indexes()[0]
968 idx2 = sel.get_selected_particle_indexes()[0]
972 return idx1, idx2, dist
975 exclude_chains=
'', use_CA=
False):
976 ''' return the probability, best distance, two coords, and possibly
978 @param limit_to_state Only examine contributions from one state
979 @param limit_to_chains Returns the particles for certain "easy
981 @param exclude_chains Even if you limit, don't let one end be in
982 this list. Only works if you also limit chains
983 @param use_CA Limit to CA particles
986 for nxl
in range(self.rs.get_number_of_restraints()):
988 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
989 self.rs.get_restraint(nxl))
995 for contr
in range(xl.get_number_of_contributions()):
996 pp = xl.get_contribution(contr)
1001 idx1 = sel.get_selected_particle_indexes()[0]
1005 idx2 = sel.get_selected_particle_indexes()[0]
1007 if limit_to_state
is not None:
1010 if nstate != limit_to_state:
1012 state_contrs.append(contr)
1015 if limit_to_chains
is not None:
1020 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1021 c1
not in exclude_chains
1022 and c2
not in exclude_chains):
1023 if dist < low_dist_lim:
1029 if limit_to_state
is not None:
1030 this_info[
"prob"] = xl.evaluate_for_contributions(
1033 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1034 if limit_to_chains
is not None:
1035 this_info[
"low_pp"] = low_pp_lim
1037 this_info[
"low_pp"] = low_pp
1039 this_info[
"low_dist"] = low_dist
1040 if not self.one_psi:
1042 this_info[
"psi"] = pval
1043 ret.append(this_info)
1046 def print_stats(self):
1048 for nxl, s
in enumerate(stats):
1049 print(s[
"low_dist"])
1051 def get_output(self):
1052 output = super().get_output()
1063 for nxl, s
in enumerate(stats):
1064 if s[
'low_dist'] > 20.0:
1066 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"Prob")] = str(s[
'prob'])
1067 output[
"AtomicXLRestraint_%i_%s"
1068 % (nxl,
"BestDist")] = str(s[
'low_dist'])
1069 if not self.one_psi:
1070 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"psi")] \
1072 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1076 class CysteineCrossLinkRestraint:
1077 def __init__(self, root_hier, filename, cbeta=False,
1078 betatuple=(0.03, 0.1),
1079 disttuple=(0.0, 25.0, 1000),
1080 omegatuple=(1.0, 1000.0, 50),
1081 sigmatuple=(0.3, 0.3, 1),
1083 sigmaissampled=
False,
1084 weightissampled=
True,
1085 epsilonissampled=
True):
1092 self.m = root_hier.get_model()
1095 self.epsilonmaxtrans = 0.01
1096 self.sigmamaxtrans = 0.1
1097 self.betamaxtrans = 0.01
1098 self.weightmaxtrans = 0.1
1100 self.outputlevel =
"low"
1101 self.betaissampled = betaissampled
1102 self.sigmaissampled = sigmaissampled
1103 self.weightissampled = weightissampled
1104 self.epsilonissampled = epsilonissampled
1106 betalower = betatuple[0]
1107 betaupper = betatuple[1]
1113 self.beta = IMP.pmi.tools.SetupNuisance(
1118 betaissampled).get_particle(
1121 self.sigma = IMP.pmi.tools.SetupNuisance(
1126 sigmaissampled).get_particle()
1128 self.weight = IMP.pmi.tools.SetupWeight(
1130 isoptimized=
False).get_particle(
1134 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1143 if t[5]
in self.epsilons:
1144 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1145 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1147 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(
1148 self.m, 0.01, 0.01, 1.0 - float(t[4]),
1149 epsilonissampled).get_particle()
1150 up = self.epsilons[t[5]].get_upper()
1151 low = self.epsilons[t[5]].get_lower()
1153 self.epsilons[t[5]].set_upper(low)
1155 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1159 crossdata = IMP.pmi.tools.get_cross_link_data(
1160 "cysteine",
"cysteine_CA_FES.txt.standard",
1161 disttuple, omegatuple, sigmatuple, disttuple[1],
1164 crossdata = IMP.pmi.tools.get_cross_link_data(
1165 "cysteine",
"cysteine_CB_FES.txt.standard",
1166 disttuple, omegatuple, sigmatuple, disttuple[1],
1170 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1171 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1172 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1175 print(
"--------------")
1176 print(
"CysteineCrossLink: attempting to create a restraint "
1197 self.epsilons[epslabel],
1208 molecule=chain1, residue_index=resid1,
1210 p1 = p1.get_selected_particles()
1217 molecule=chain2, residue_index=resid2,
1219 p2 = p2.get_selected_particles()
1229 for t
in range(-1, 2):
1231 molecule=chain1, copy_index=0,
1232 residue_index=resid1 + t)
1233 p = p.get_selected_particles()
1239 "CysteineCrossLink: missing representation for "
1240 "residue %d of chain %s" % (resid1 + t, chain1),
1244 molecule=chain2, copy_index=0,
1245 residue_index=resid2 + t)
1246 p = p.get_selected_particles()
1252 "CysteineCrossLink: missing representation for "
1253 "residue %d of chain %s" % (resid2 + t, chain2),
1257 if (p1
is not None and p2
is not None):
1258 ccl.add_contribution(p1, p2)
1262 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" +
1263 str(resid2) +
"_" + chain2,
1267 if (len(p1) == 3
and len(p2) == 3):
1268 p11n = p1[0].get_name()
1269 p12n = p1[1].get_name()
1270 p13n = p1[2].get_name()
1271 p21n = p2[0].get_name()
1272 p22n = p2[1].get_name()
1273 p23n = p2[2].get_name()
1275 print(
"CysteineCrossLink: generating CB cysteine "
1276 "cross-link restraint between")
1277 print(
"CysteineCrossLink: residue %d of chain %s and "
1278 "residue %d of chain %s"
1279 % (resid1, chain1, resid2, chain2))
1280 print(
"CysteineCrossLink: between particles %s %s %s and "
1281 "%s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1283 ccl.add_contribution(p1, p2)
1286 self.rs.add_restraint(ccl)
1287 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1288 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1291 self.weight.get_particle()
1292 ).set_weights_are_optimized(weightissampled)
1294 def set_label(self, label):
1297 def add_to_model(self):
1302 if self.epsilonissampled:
1303 for eps
in self.epsilons.keys():
1304 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
1305 "_" + self.label] = ([self.epsilons[eps]],
1306 self.epsilonmaxtrans)
1307 if self.betaissampled:
1308 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1309 self.label] = ([self.beta], self.betamaxtrans)
1310 if self.weightissampled:
1311 ps[
"Weights_CysteineCrossLinkRestraint_" +
1312 self.label] = ([self.weight], self.weightmaxtrans)
1313 if self.sigmaissampled:
1314 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1315 self.label] = ([self.sigma], self.sigmamaxtrans)
1318 def set_output_level(self, level="low"):
1320 self.outputlevel = level
1322 def get_restraint(self):
1325 def get_sigma(self):
1328 def get_output(self):
1330 score = self.rs.unprotected_evaluate(
None)
1331 output[
"_TotalScore"] = str(score)
1332 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1333 output[
"CysteineCrossLinkRestraint_sigma_" +
1334 self.label] = str(self.sigma.get_scale())
1335 for eps
in self.epsilons.keys():
1336 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1337 self.label] = str(self.epsilons[eps].get_scale())
1338 output[
"CysteineCrossLinkRestraint_beta_" +
1339 self.label] = str(self.beta.get_scale())
1340 for n
in range(self.weight.get_number_of_weights()):
1341 output[
"CysteineCrossLinkRestraint_weights_" +
1342 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1344 if self.outputlevel ==
"high":
1345 for rst
in self.rs.get_restraints():
1346 cclr = IMP.isd.CysteineCrossLinkRestraint.get_from(rst)
1347 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1349 "_" + self.label] = cclr.get_model_frequency()
1350 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1351 cclr.get_name() +
"_" +
1352 self.label] = cclr.get_standard_error()
1356 class DisulfideCrossLinkRestraint:
1357 def __init__(self, representation_or_hier,
1365 self.m = representation_or_hier.get_model()
1368 resolution=resolution)
1371 resolution=resolution)
1378 self.linear.set_slope(0.0)
1382 self.psi_dictionary = {}
1383 self.sigma_dictionary = {}
1384 self.psi_is_sampled =
False
1385 self.sigma_is_sampled =
False
1388 if len(ps1) > 1
or len(ps1) == 0:
1390 "DisulfideBondRestraint: ERROR> first selection pattern "
1391 "selects multiple particles or sero particles")
1393 if len(ps2) > 1
or len(ps2) == 0:
1395 "DisulfideBondRestraint: ERROR> second selection pattern "
1396 "selects multiple particles or sero particles")
1401 sigma = self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1402 psi = self.create_psi(
"PSI_DISULFIDE_BOND")
1404 p1i = p1.get_index()
1405 p2i = p2.get_index()
1411 dr.add_contribution((p1i, p2i), (si, si), psii)
1415 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1416 self.rslin.add_restraint(pr)
1419 self.rs.add_restraint(lw)
1421 self.xl[
"Particle1"] = p1
1422 self.xl[
"Particle2"] = p2
1423 self.xl[
"Sigma"] = sigma
1424 self.xl[
"Psi"] = psi
1426 def add_to_model(self):
1429 self.m, self.rslin, add_to_rmf=
True)
1431 def get_hierarchies(self):
1434 def get_restraint_sets(self):
1437 def get_restraint(self):
1440 def get_restraint_for_rmf(self):
1443 def get_restraints(self):
1445 for r
in self.rs.get_restraints():
1446 rlist.append(IMP.core.PairRestraint.get_from(r))
1449 def set_psi_is_sampled(self, is_sampled=True):
1450 self.psi_is_sampled = is_sampled
1452 def set_sigma_is_sampled(self, is_sampled=True):
1453 self.sigma_is_sampled = is_sampled
1455 def create_sigma(self, name):
1456 ''' a nuisance on the structural uncertainty '''
1457 if name
in self.sigma_dictionary:
1458 return self.sigma_dictionary[name][0]
1461 sigmaminnuis = 0.0000001
1462 sigmamaxnuis = 1000.0
1464 sigma = IMP.pmi.tools.SetupNuisance(
1465 self.m, sigmainit, sigmaminnuis, sigmamaxnuis,
1466 self.sigma_is_sampled).get_particle()
1467 self.sigma_dictionary[name] = (
1470 self.sigma_is_sampled)
1474 def create_psi(self, name):
1475 ''' a nuisance on the inconsistency '''
1476 if name
in self.psi_dictionary:
1477 return self.psi_dictionary[name][0]
1480 psiminnuis = 0.0000001
1481 psimaxnuis = 0.4999999
1483 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1484 psiminnuis, psimaxnuis,
1485 self.psi_is_sampled).get_particle()
1486 self.psi_dictionary[name] = (
1489 self.psi_is_sampled)
1493 def get_output(self):
1495 score = self.rs.unprotected_evaluate(
None)
1496 output[
"_TotalScore"] = str(score)
1497 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1498 output[
"DisulfideBondRestraint_Linear_Score_" +
1499 self.label] = self.rslin.unprotected_evaluate(
None)
1503 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
def get_likelihood
Get the unweighted likelihood of the restraint.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Score distance between two particle centers using a harmonic function.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
A RestraintSet subclass to track cross-link metadata.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Score a pair of particles based on the distance between their centers.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the partic...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.