1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
5 from __future__
import print_function
17 from collections
import defaultdict
24 """Setup cross-link distance restraints from mass spectrometry data.
25 The noise in the data and the structural uncertainty of cross-linked
26 amino-acids is inferred using Bayes theory of probability
27 @note Wraps an IMP::isd::CrossLinkMSRestraint
29 _include_in_rmf =
True
31 def __init__(self, root_hier, database=None, length=10.0, resolution=None,
32 slope=0.02, label=
None, filelabel=
"None",
33 attributes_for_label=
None, linker=
None, weight=1.):
35 @param root_hier The canonical hierarchy containing all the states
36 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
37 object that contains the cross-link dataset
38 @param length maximal cross-linker length (including the residue
40 @param resolution what representation resolution should the cross-link
41 restraint be applied to.
42 @param slope The slope of a distance-linear scoring function that
43 funnels the score when the particles are
44 too far away. Suggested value 0.02.
45 @param label the extra text to label the restraint so that it is
46 searchable in the output
47 @param filelabel automatically generated file containing
48 missing/included/excluded
49 cross-links will be labeled using this text
50 @param attributes_for_label
51 @param weight Weight of restraint
52 @param linker description of the chemistry of the linker itself, as
53 an ihm.ChemDescriptor object
54 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
55 Common cross-linkers can be found in the `ihm.cross_linkers`
59 model = root_hier.get_model()
61 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
62 model, weight=weight, label=label,
66 raise Exception(
"You must pass a database")
69 "CrossLinkingMassSpectrometryRestraint: database should "
70 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
71 self.database = database
73 if resolution == 0
or resolution
is None:
74 raise Exception(
"You must pass a resolution and it can't be zero")
76 indb = open(
"included." + filelabel +
".xl.db",
"w")
77 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
78 midb = open(
"missing." + filelabel +
".xl.db",
"w")
83 "No linker chemistry specified. A linker must be given, as an "
84 "ihm.ChemDescriptor object (see the "
85 "CrossLinkingMassSpectrometryRestraint documentation).")
86 self.rs.set_name(self.rs.get_name() +
"_Data")
87 self.rspsi = self._create_restraint_set(
"PriorPsi")
88 self.rssig = self._create_restraint_set(
"PriorSig")
89 self.rslin = self._create_restraint_set(
"Linear")
91 self.rs.set_metadata(self.database.name, length, slope)
93 self.rs.set_linker_auth_name(linker.auth_name)
94 for attr
in (
'chemical_name',
'smiles',
'smiles_canonical',
95 'inchi',
'inchi_key'):
96 val = getattr(linker, attr)
98 getattr(self.rs,
"set_linker_" + attr)(val)
102 self.linear.set_slope(0.0)
105 self.psi_is_sampled =
True
106 self.sigma_is_sampled =
True
107 self.psi_dictionary = {}
108 self.sigma_dictionary = {}
110 self.outputlevel =
"low"
114 xl_groups = [p.get_cross_link_group(self)
115 for p, state
in IMP.pmi.tools._all_protocol_outputs(
119 copies_to_add = defaultdict(int)
120 print(
'gathering copies')
121 for xlid
in self.database.xlid_iterator():
122 for xl
in self.database[xlid]:
123 r1 = xl[self.database.residue1_key]
124 c1 = xl[self.database.protein1_key]
125 r2 = xl[self.database.residue2_key]
126 c2 = xl[self.database.protein2_key]
127 for c, r
in ((c1, r1), (c2, r2)):
128 if c
in copies_to_add:
131 root_hier, state_index=0, molecule=c, residue_index=r,
132 resolution=resolution).get_selected_particles()
134 copies_to_add[c] = len(sel)-1
136 for molname
in copies_to_add:
137 if copies_to_add[molname] == 0:
140 self.database.protein1_key, operator.eq, molname)
141 self.database.set_value(self.database.protein1_key,
144 self.database.protein2_key, operator.eq, molname)
145 self.database.set_value(self.database.protein2_key,
147 for ncopy
in range(copies_to_add[molname]):
148 self.database.clone_protein(
'%s.0' % molname,
149 '%s.%i' % (molname, ncopy + 1))
150 print(
'done pmi2 prelims')
152 for xlid
in self.database.xlid_iterator():
153 new_contribution =
True
154 for xl
in self.database[xlid]:
156 r1 = xl[self.database.residue1_key]
157 c1 = xl[self.database.protein1_key]
158 r2 = xl[self.database.residue2_key]
159 c2 = xl[self.database.protein2_key]
166 name1, copy1 = c1.split(
'.')
168 name2, copy2 = c2.split(
'.')
172 (p[0].add_experimental_cross_link(r1, name1, r2, name2,
175 IMP.pmi.tools._all_protocol_outputs(root_hier),
178 iterlist = range(len(IMP.atom.get_by_type(
179 root_hier, IMP.atom.STATE_TYPE)))
180 for nstate, r
in enumerate(iterlist):
182 xl[self.database.state_key] = nstate
183 xl[self.database.data_set_name_key] = self.label
186 root_hier, state_index=nstate, molecule=name1,
187 copy_index=int(copy1), residue_index=r1,
188 resolution=resolution).get_selected_particles()
190 root_hier, state_index=nstate, molecule=name2,
191 copy_index=int(copy2), residue_index=r2,
192 resolution=resolution).get_selected_particles()
199 "residue %d of chain %s selects multiple "
200 "particles %s" % (r1, c1, str(ps1)))
203 "CrossLinkingMassSpectrometryRestraint: "
204 "residue %d of chain %s is not there" % (r1, c1),
206 midb.write(str(xl) +
"\n")
211 "residue %d of chain %s selects multiple "
212 "particles %s" % (r2, c2, str(ps2)))
215 "CrossLinkingMassSpectrometryRestraint: "
216 "residue %d of chain %s is not there" % (r2, c2),
218 midb.write(str(xl) +
"\n")
224 if p1 == p2
and r1 == r2:
226 "CrossLinkingMassSpectrometryRestraint: "
227 "same particle and same residue, skipping "
232 print(
"generating a new cross-link restraint")
233 new_contribution =
False
238 dr.set_source_protein1(c1)
239 dr.set_source_protein2(c2)
240 dr.set_source_residue1(r1)
241 dr.set_source_residue2(r2)
242 restraints.append(dr)
244 if self.database.sigma1_key
not in xl.keys():
246 xl[self.database.sigma1_key] = sigma1name
248 sigma1name = xl[self.database.sigma1_key]
251 if self.database.sigma2_key
not in xl.keys():
253 xl[self.database.sigma2_key] = sigma2name
255 sigma2name = xl[self.database.sigma2_key]
258 if self.database.psi_key
not in xl.keys():
260 xl[self.database.psi_key] = psiname
262 psiname = xl[self.database.psi_key]
266 p1i = p1.get_particle_index()
268 p2i = p2.get_particle_index()
271 xl[
"Particle_sigma1"] = sigma1
273 xl[
"Particle_sigma2"] = sigma2
275 xl[
"Particle_psi"] = psi
277 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
280 print(
"--------------")
281 print(
"CrossLinkingMassSpectrometryRestraint: generating "
282 "cross-link restraint between")
283 print(
"CrossLinkingMassSpectrometryRestraint: residue %d "
284 "of chain %s and residue %d of chain %s"
286 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 "
287 "%s sigma2 %s psi %s"
288 % (sigma1name, sigma2name, psiname))
289 print(
"CrossLinkingMassSpectrometryRestraint: between "
290 "particles %s and %s"
291 % (p1.get_name(), p2.get_name()))
292 print(
"==========================================\n")
293 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
296 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
297 sigma1, sigma2, psi, ex_xl[1])
304 xl[
"IntraRigidBody"] =
True
306 xl[
"IntraRigidBody"] =
False
308 xl_label = self.database.get_short_cross_link_string(xl)
309 xl[
"ShortLabel"] = xl_label
310 dr.set_name(xl_label)
315 pr.set_name(xl_label)
316 self.rslin.add_restraint(pr)
318 self.xl_list.append(xl)
320 indb.write(str(xl) +
"\n")
322 if len(self.xl_list) == 0:
324 "CrossLinkingMassSpectrometryRestraint: no cross-link "
326 self.xl_restraints = restraints
328 self.rs.add_restraint(lw)
333 def __set_dataset(self, ds):
334 self.database.dataset = ds
335 dataset = property(
lambda self: self.database.dataset, __set_dataset)
338 """ get the hierarchy """
342 """ get the restraint set """
346 """ get the restraints in a list """
347 return self.xl_restraints
350 """ get the dummy restraints to be displayed in the rmf file """
351 return self.rs, self.rssig, self.rspsi
354 """ Get a list of tuples containing the particle pairs """
356 for i
in range(len(self.pairs)):
357 p0 = self.pairs[i][0]
358 p1 = self.pairs[i][1]
359 ppairs.append((p0, p1))
363 """ Set the output level of the output """
364 self.outputlevel = level
367 """ Switch on/off the sampling of psi particles """
368 self.psi_is_sampled = is_sampled
371 """ Switch on/off the sampling of sigma particles """
372 self.sigma_is_sampled = is_sampled
375 """ This is called internally. Creates a nuisance
376 on the structural uncertainty """
377 if name
in self.sigma_dictionary:
378 return self.sigma_dictionary[name][0]
381 sigmaminnuis = 0.0000001
382 sigmamaxnuis = 1000.0
386 sigma = IMP.pmi.tools.SetupNuisance(
387 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
388 self.sigma_is_sampled, name=name).get_particle()
389 self.sigma_dictionary[name] = (
392 self.sigma_is_sampled)
393 self.rssig.add_restraint(
403 """ This is called internally. Creates a nuisance
404 on the data uncertainty """
405 if name
in self.psi_dictionary:
406 return self.psi_dictionary[name][0]
409 psiminnuis = 0.0000001
410 psimaxnuis = 0.4999999
414 psi = IMP.pmi.tools.SetupNuisance(
415 self.model, psiinit, psiminnuis, psimaxnuis,
416 self.psi_is_sampled, name=name).get_particle()
417 self.psi_dictionary[name] = (psi, psitrans, self.psi_is_sampled)
419 self.rspsi.add_restraint(
427 """Get the output of the restraint to be used by the IMP.pmi.output
429 output = super(CrossLinkingMassSpectrometryRestraint,
432 for xl
in self.xl_list:
434 xl_label = xl[
"ShortLabel"]
438 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
439 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
443 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
446 for psiname
in self.psi_dictionary:
447 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
448 str(psiname) + self._label_suffix] = str(
449 self.psi_dictionary[psiname][0].get_scale())
451 for sigmaname
in self.sigma_dictionary:
452 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
453 str(sigmaname) + self._label_suffix] = str(
454 self.sigma_dictionary[sigmaname][0].get_scale())
459 """ Get all need data to construct a mover in IMP.pmi.dof class"""
461 if self.sigma_is_sampled:
462 for sigmaname
in self.sigma_dictionary:
464 "Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" \
465 + str(sigmaname) +
"_" + self.label
466 particle = self.sigma_dictionary[sigmaname][0]
467 maxstep = (self.sigma_dictionary[sigmaname][1])
471 mv.set_name(mover_name)
474 if self.psi_is_sampled:
475 for psiname
in self.psi_dictionary:
477 "Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + \
478 str(psiname) +
"_" + self.label
479 particle = self.psi_dictionary[psiname][0]
480 maxstep = (self.psi_dictionary[psiname][1])
484 mv.set_name(mover_name)
490 """ Get the particles to be sampled by the IMP.pmi.sampler object """
492 if self.sigma_is_sampled:
493 for sigmaname
in self.sigma_dictionary:
494 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
495 str(sigmaname) + self._label_suffix] =\
496 ([self.sigma_dictionary[sigmaname][0]],
497 self.sigma_dictionary[sigmaname][1])
499 if self.psi_is_sampled:
500 for psiname
in self.psi_dictionary:
501 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
502 str(psiname) + self._label_suffix] = \
503 ([self.psi_dictionary[psiname][0]],
504 self.psi_dictionary[psiname][1])
510 """Setup cross-link distance restraints at atomic level
511 The "atomic" aspect is that it models the particle uncertainty with
512 a Gaussian. The noise in the data and the structural uncertainty of
513 cross-linked amino-acids is inferred using Bayes' theory of probability
514 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
517 _include_in_rmf =
True
519 def __init__(self, root_hier, xldb, atom_type="NZ", length=10.0,
520 slope=0.01, nstates=
None, label=
None,
521 nuisances_are_optimized=
True, sigma_init=5.0,
522 psi_init=0.01, one_psi=
True, filelabel=
None, weight=1.):
524 Automatically creates one "sigma" per cross-linked residue and one
526 Other nuisance options are available.
527 @note Will return an error if the data+extra_sel don't specify
528 two particles per XL pair.
529 @param root_hier The root hierarchy on which you'll do selection
530 @param xldb CrossLinkDataBase object
531 @param atom_type Can either be "NZ" or "CA"
532 @param length The XL linker length
533 @param slope Linear term to add to the restraint function to
535 @param nstates The number of states to model. Defaults to the
536 number of states in root.
537 @param label The output label for the restraint
538 @param nuisances_are_optimized Whether to optimize nuisances
539 @param sigma_init The initial value for all the sigmas
540 @param psi_init The initial value for all the psis
541 @param one_psi Use a single psi for all restraints (if False,
543 @param filelabel automatically generated file containing
544 missing/included/excluded
545 cross-links will be labeled using this text
546 @param weight Weight of restraint
551 self.root = root_hier
552 rname =
"AtomicXLRestraint"
553 super(AtomicCrossLinkMSRestraint, self).
__init__(
554 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
558 self.sigma_is_sampled = nuisances_are_optimized
559 self.psi_is_sampled = nuisances_are_optimized
561 if atom_type
not in (
"NZ",
"CA"):
562 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
563 self.atom_type = atom_type
564 self.nstates = nstates
566 self.nstates = len(IMP.atom.get_by_type(self.root,
567 IMP.atom.STATE_TYPE))
568 elif nstates != len(IMP.atom.get_by_type(self.root,
569 IMP.atom.STATE_TYPE)):
570 print(
"Warning: nstates is not the same as the number of states "
573 self.rs_psi = self._create_restraint_set(
"psi")
574 self.rs_sig = self._create_restraint_set(
"sigma")
575 self.rs_lin = self._create_restraint_set(
"linear")
577 self.psi_dictionary = {}
578 self.sigma_dictionary = {}
580 self.particles = defaultdict(set)
581 self.one_psi = one_psi
582 self.bonded_pairs = []
584 print(
'creating a single psi for all XLs')
586 print(
'creating one psi for each XL')
589 if filelabel
is not None:
590 indb = open(
"included." + filelabel +
".xl.db",
"w")
591 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
592 midb = open(
"missing." + filelabel +
".xl.db",
"w")
595 self._create_sigma(
'sigma', sigma_init)
597 self._create_psi(
'psi', psi_init)
599 for xlid
in self.xldb.xlid_iterator():
600 self._create_psi(xlid, psi_init)
604 for xlid
in self.xldb.xlid_iterator():
607 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
609 psip = self.psi_dictionary[
610 unique_id][0].get_particle_index()
616 num_contributions = 0
619 for nstate
in self.nstates:
620 for xl
in self.xldb[xlid]:
621 r1 = xl[self.xldb.residue1_key]
622 c1 = xl[self.xldb.protein1_key].strip()
623 r2 = xl[self.xldb.residue2_key]
624 c2 = xl[self.xldb.protein2_key].strip()
629 self.root, state_index=nstate,
632 residue_index=r1).get_selected_particles()
634 self.root, state_index=nstate,
637 residue_index=r2).get_selected_particles()
639 warnings.warn(
"AtomicXLRestraint: residue %d of "
640 "chain %s is not there" % (r1, c1),
642 if filelabel
is not None:
643 midb.write(str(xl) +
"\n")
647 warnings.warn(
"AtomicXLRestraint: residue %d of "
648 "chain %s is not there" % (r2, c2),
650 if filelabel
is not None:
651 midb.write(str(xl) +
"\n")
656 num1=num_xls_per_res[str(xl.r1)]
657 num2=num_xls_per_res[str(xl.r2)]
658 if num1<sig_threshold:
662 if num2<sig_threshold:
667 sig1 = self.sigma_dictionary[
'sigma'][0]
668 sig2 = self.sigma_dictionary[
'sigma'][0]
671 for p1, p2
in itertools.product(ps1, ps2):
674 self.particles[nstate] |= set((p1, p2))
676 [p1.get_index(), p2.get_index()],
677 [sig1.get_particle_index(),
678 sig2.get_particle_index()])
679 num_contributions += 1
681 if num_contributions > 0:
682 print(
'XL:', xlid,
'num contributions:', num_contributions)
685 raise Exception(
"You didn't create any XL restraints")
686 print(
'created', len(xlrs),
'XL restraints')
687 rname = self.rs.get_name()
689 self.rs.set_name(rname)
690 self.rs.set_weight(self.weight)
691 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
693 def get_hierarchy(self):
696 def _create_sigma(self, name, sigmainit):
697 """ This is called internally. Creates a nuisance
698 on the structural uncertainty """
699 if name
in self.sigma_dictionary:
700 return self.sigma_dictionary[name][0]
702 sigmaminnuis = 0.0000001
703 sigmamaxnuis = 1000.0
707 sigma = IMP.pmi.tools.SetupNuisance(
708 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
709 self.sigma_is_sampled).get_particle()
710 self.sigma_dictionary[name] = (
711 sigma, sigmatrans, self.sigma_is_sampled)
713 self.model, sigma, 1000000000.0, sigmamax, sigmamin))
716 def _create_psi(self, name, psiinit):
717 """ This is called internally. Creates a nuisance
718 on the data uncertainty """
719 if name
in self.psi_dictionary:
720 return self.psi_dictionary[name][0]
722 psiminnuis = 0.0000001
723 psimaxnuis = 0.4999999
727 psi = IMP.pmi.tools.SetupNuisance(self.model,
731 self.psi_is_sampled).get_particle()
732 self.psi_dictionary[name] = (
737 self.rs_psi.add_restraint(
749 """ create dummy harmonic restraints for each XL but don't add to model
750 Makes it easy to see each contribution to each XL in RMF
752 class MyGetRestraint(object):
756 def get_restraint_for_rmf(self):
762 for nxl
in range(self.get_number_of_restraints()):
763 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
764 self.rs.get_restraint(nxl))
766 for ncontr
in range(xl.get_number_of_contributions()):
767 ps = xl.get_contribution(ncontr)
769 hps, [self.model.get_particle(p)
for p
in ps],
770 'xl%i_contr%i' % (nxl, ncontr))
772 dummy_rs.append(MyGetRestraint(rs))
776 """ Get the particles to be sampled by the IMP.pmi.sampler object """
778 if self.sigma_is_sampled:
779 for sigmaname
in self.sigma_dictionary:
780 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
781 str(sigmaname) + self._label_suffix] = \
782 ([self.sigma_dictionary[sigmaname][0]],
783 self.sigma_dictionary[sigmaname][1])
784 if self.psi_is_sampled:
785 for psiname
in self.psi_dictionary:
786 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
787 str(psiname) + self._label_suffix] = \
788 ([self.psi_dictionary[psiname][0]],
789 self.psi_dictionary[psiname][1])
792 def get_bonded_pairs(self):
793 return self.bonded_pairs
795 def get_number_of_restraints(self):
796 return self.rs.get_number_of_restraints()
799 return 'XL restraint with ' + \
800 str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
804 """Read a stat file and load all the sigmas.
805 This is potentially quite stupid.
806 It's also a hack since the sigmas should be stored in the RMF file.
807 Also, requires one sigma and one psi for ALL XLs.
810 sig_val = float(subprocess.check_output(
811 [
"process_output.py",
"-f", in_fn,
"-s",
812 "AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
813 psi_val = float(subprocess.check_output(
814 [
"process_output.py",
"-f", in_fn,
"-s",
815 "AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
816 for nxl
in range(self.rs.get_number_of_restraints()):
817 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
818 self.rs.get_restraint(nxl))
821 for contr
in range(xl.get_number_of_contributions()):
822 sig1, sig2 = xl.get_contribution_sigmas(contr)
825 print(
'loaded nuisances from file')
828 min_dist_for_violation=1e9, coarsen=
False,
829 limit_to_chains=
None, exclude_chains=
''):
830 """Create CMM files, one for each state, of all cross-links.
831 will draw in GREEN if non-violated in all states (or if only one state)
832 will draw in PURPLE if non-violated only in a subset of states
833 (draws nothing elsewhere)
834 will draw in RED in ALL states if all violated
835 (if only one state, you'll only see green and red)
837 @param out_prefix Output xlink files prefix
838 @param max_prob_for_violation It's a violation if the probability
840 @param min_dist_for_violation It's a violation if the min dist
842 @param coarsen Use CA positions
843 @param limit_to_chains Try to visualize just these chains
844 @param exclude_chains Try to NOT visualize these chains
846 print(
'going to calculate violations and plot CMM files')
848 all_dists = [s[
"low_dist"]
for s
in all_stats]
854 cmds = defaultdict(set)
855 for nstate
in range(self.nstates):
856 outf = open(out_prefix+str(nstate)+
'.cmm',
'w')
857 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
860 print(
'will limit to', limit_to_chains)
861 print(
'will exclude', exclude_chains)
866 for nxl
in range(self.rs.get_number_of_restraints()):
870 for nstate
in range(self.nstates):
871 prob = state_info[nstate][nxl][
"prob"]
872 low_dist = state_info[nstate][nxl][
"low_dist"]
873 if (prob < max_prob_for_violation
or
874 low_dist > min_dist_for_violation):
882 if len(npass) == self.nstates:
884 elif len(nviol) == self.nstates:
888 print(nxl,
'state dists:',
889 [state_info[nstate][nxl][
"low_dist"]
890 for nstate
in range(self.nstates)],
891 'viol states:', nviol,
'all viol?', all_viol)
892 for nstate
in range(self.nstates):
911 pp = state_info[nstate][nxl][
"low_pp"]
922 cmds[nstate].add((ch1, r1))
923 cmds[nstate].add((ch2, r2))
925 outf = out_fns[nstate]
927 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
928 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
929 % (nv, c1[0], c1[1], c1[2], r, g, b))
930 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" '
931 'radius="0.8" r="%.2f" g="%.2f" b="%.2f"/> \n'
932 % (nv+1, c2[0], c2[1], c2[2], r, g, b))
933 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
934 'r="%.2f" g="%.2f" b="%.2f"/> \n'
935 % (nv, nv+1, r, g, b))
938 for nstate
in range(self.nstates):
939 out_fns[nstate].write(
'</marker_set>\n')
940 out_fns[nstate].close()
942 for ch, r
in cmds[nstate]:
943 cmd +=
'#%i:%i.%s ' % (nstate, r, ch)
948 def _get_contribution_info(self, xl, ncontr, use_CA=False):
949 """Return the particles at that contribution. If requested will
950 return CA's instead"""
951 idx1 = xl.get_contribution(ncontr)[0]
952 idx2 = xl.get_contribution(ncontr)[1]
957 idx1 = sel.get_selected_particle_indexes()[0]
961 idx2 = sel.get_selected_particle_indexes()[0]
965 return idx1, idx2, dist
968 exclude_chains=
'', use_CA=
False):
969 ''' return the probability, best distance, two coords, and possibly
971 @param limit_to_state Only examine contributions from one state
972 @param limit_to_chains Returns the particles for certain "easy
974 @param exclude_chains Even if you limit, don't let one end be in
975 this list. Only works if you also limit chains
976 @param use_CA Limit to CA particles
979 for nxl
in range(self.rs.get_number_of_restraints()):
981 xl = IMP.isd.AtomicCrossLinkMSRestraint.get_from(
982 self.rs.get_restraint(nxl))
988 for contr
in range(xl.get_number_of_contributions()):
989 pp = xl.get_contribution(contr)
994 idx1 = sel.get_selected_particle_indexes()[0]
998 idx2 = sel.get_selected_particle_indexes()[0]
1000 if limit_to_state
is not None:
1003 if nstate != limit_to_state:
1005 state_contrs.append(contr)
1008 if limit_to_chains
is not None:
1013 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1014 c1
not in exclude_chains
1015 and c2
not in exclude_chains):
1016 if dist < low_dist_lim:
1022 if limit_to_state
is not None:
1023 this_info[
"prob"] = xl.evaluate_for_contributions(
1026 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1027 if limit_to_chains
is not None:
1028 this_info[
"low_pp"] = low_pp_lim
1030 this_info[
"low_pp"] = low_pp
1032 this_info[
"low_dist"] = low_dist
1033 if not self.one_psi:
1035 this_info[
"psi"] = pval
1036 ret.append(this_info)
1039 def print_stats(self):
1041 for nxl, s
in enumerate(stats):
1042 print(s[
"low_dist"])
1044 def get_output(self):
1045 output = super(AtomicCrossLinkMSRestraint, self).get_output()
1056 for nxl, s
in enumerate(stats):
1057 if s[
'low_dist'] > 20.0:
1059 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"Prob")] = str(s[
'prob'])
1060 output[
"AtomicXLRestraint_%i_%s"
1061 % (nxl,
"BestDist")] = str(s[
'low_dist'])
1062 if not self.one_psi:
1063 output[
"AtomicXLRestraint_%i_%s" % (nxl,
"psi")] \
1065 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1069 class CysteineCrossLinkRestraint(object):
1070 def __init__(self, root_hier, filename, cbeta=False,
1071 betatuple=(0.03, 0.1),
1072 disttuple=(0.0, 25.0, 1000),
1073 omegatuple=(1.0, 1000.0, 50),
1074 sigmatuple=(0.3, 0.3, 1),
1076 sigmaissampled=
False,
1077 weightissampled=
True,
1078 epsilonissampled=
True):
1085 self.m = root_hier.get_model()
1088 self.epsilonmaxtrans = 0.01
1089 self.sigmamaxtrans = 0.1
1090 self.betamaxtrans = 0.01
1091 self.weightmaxtrans = 0.1
1093 self.outputlevel =
"low"
1094 self.betaissampled = betaissampled
1095 self.sigmaissampled = sigmaissampled
1096 self.weightissampled = weightissampled
1097 self.epsilonissampled = epsilonissampled
1099 betalower = betatuple[0]
1100 betaupper = betatuple[1]
1106 self.beta = IMP.pmi.tools.SetupNuisance(
1111 betaissampled).get_particle(
1114 self.sigma = IMP.pmi.tools.SetupNuisance(
1119 sigmaissampled).get_particle()
1121 self.weight = IMP.pmi.tools.SetupWeight(
1123 isoptimized=
False).get_particle(
1127 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1136 if t[5]
in self.epsilons:
1137 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1138 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1140 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(
1141 self.m, 0.01, 0.01, 1.0 - float(t[4]),
1142 epsilonissampled).get_particle()
1143 up = self.epsilons[t[5]].get_upper()
1144 low = self.epsilons[t[5]].get_lower()
1146 self.epsilons[t[5]].set_upper(low)
1148 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1152 crossdata = IMP.pmi.tools.get_cross_link_data(
1153 "cysteine",
"cysteine_CA_FES.txt.standard",
1154 disttuple, omegatuple, sigmatuple, disttuple[1],
1157 crossdata = IMP.pmi.tools.get_cross_link_data(
1158 "cysteine",
"cysteine_CB_FES.txt.standard",
1159 disttuple, omegatuple, sigmatuple, disttuple[1],
1163 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1164 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1165 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1168 print(
"--------------")
1169 print(
"CysteineCrossLink: attempting to create a restraint "
1190 self.epsilons[epslabel],
1201 molecule=chain1, residue_index=resid1,
1203 p1 = p1.get_selected_particles()
1210 molecule=chain2, residue_index=resid2,
1212 p2 = p2.get_selected_particles()
1222 for t
in range(-1, 2):
1224 molecule=chain1, copy_index=0,
1225 residue_index=resid1 + t)
1226 p = p.get_selected_particles()
1232 "CysteineCrossLink: missing representation for "
1233 "residue %d of chain %s" % (resid1 + t, chain1),
1237 molecule=chain2, copy_index=0,
1238 residue_index=resid2 + t)
1239 p = p.get_selected_particles()
1245 "CysteineCrossLink: missing representation for "
1246 "residue %d of chain %s" % (resid2 + t, chain2),
1250 if (p1
is not None and p2
is not None):
1251 ccl.add_contribution(p1, p2)
1255 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" +
1256 str(resid2) +
"_" + chain2,
1260 if (len(p1) == 3
and len(p2) == 3):
1261 p11n = p1[0].get_name()
1262 p12n = p1[1].get_name()
1263 p13n = p1[2].get_name()
1264 p21n = p2[0].get_name()
1265 p22n = p2[1].get_name()
1266 p23n = p2[2].get_name()
1268 print(
"CysteineCrossLink: generating CB cysteine "
1269 "cross-link restraint between")
1270 print(
"CysteineCrossLink: residue %d of chain %s and "
1271 "residue %d of chain %s"
1272 % (resid1, chain1, resid2, chain2))
1273 print(
"CysteineCrossLink: between particles %s %s %s and "
1274 "%s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1276 ccl.add_contribution(p1, p2)
1279 self.rs.add_restraint(ccl)
1280 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1281 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1284 self.weight.get_particle()
1285 ).set_weights_are_optimized(weightissampled)
1287 def set_label(self, label):
1290 def add_to_model(self):
1295 if self.epsilonissampled:
1296 for eps
in self.epsilons.keys():
1297 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
1298 "_" + self.label] = ([self.epsilons[eps]],
1299 self.epsilonmaxtrans)
1300 if self.betaissampled:
1301 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1302 self.label] = ([self.beta], self.betamaxtrans)
1303 if self.weightissampled:
1304 ps[
"Weights_CysteineCrossLinkRestraint_" +
1305 self.label] = ([self.weight], self.weightmaxtrans)
1306 if self.sigmaissampled:
1307 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1308 self.label] = ([self.sigma], self.sigmamaxtrans)
1311 def set_output_level(self, level="low"):
1313 self.outputlevel = level
1315 def get_restraint(self):
1318 def get_sigma(self):
1321 def get_output(self):
1323 score = self.rs.unprotected_evaluate(
None)
1324 output[
"_TotalScore"] = str(score)
1325 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1326 output[
"CysteineCrossLinkRestraint_sigma_" +
1327 self.label] = str(self.sigma.get_scale())
1328 for eps
in self.epsilons.keys():
1329 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1330 self.label] = str(self.epsilons[eps].get_scale())
1331 output[
"CysteineCrossLinkRestraint_beta_" +
1332 self.label] = str(self.beta.get_scale())
1333 for n
in range(self.weight.get_number_of_weights()):
1334 output[
"CysteineCrossLinkRestraint_weights_" +
1335 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1337 if self.outputlevel ==
"high":
1338 for rst
in self.rs.get_restraints():
1339 cclr = IMP.isd.CysteineCrossLinkRestraint.get_from(rst)
1340 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1342 "_" + self.label] = cclr.get_model_frequency()
1343 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1344 cclr.get_name() +
"_" +
1345 self.label] = cclr.get_standard_error()
1349 class DisulfideCrossLinkRestraint(object):
1350 def __init__(self, representation_or_hier,
1358 self.m = representation_or_hier.get_model()
1361 resolution=resolution)
1364 resolution=resolution)
1371 self.linear.set_slope(0.0)
1375 self.psi_dictionary = {}
1376 self.sigma_dictionary = {}
1377 self.psi_is_sampled =
False
1378 self.sigma_is_sampled =
False
1381 if len(ps1) > 1
or len(ps1) == 0:
1383 "DisulfideBondRestraint: ERROR> first selection pattern "
1384 "selects multiple particles or sero particles")
1386 if len(ps2) > 1
or len(ps2) == 0:
1388 "DisulfideBondRestraint: ERROR> second selection pattern "
1389 "selects multiple particles or sero particles")
1394 sigma = self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1395 psi = self.create_psi(
"PSI_DISULFIDE_BOND")
1397 p1i = p1.get_index()
1398 p2i = p2.get_index()
1404 dr.add_contribution((p1i, p2i), (si, si), psii)
1408 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1409 self.rslin.add_restraint(pr)
1412 self.rs.add_restraint(lw)
1414 self.xl[
"Particle1"] = p1
1415 self.xl[
"Particle2"] = p2
1416 self.xl[
"Sigma"] = sigma
1417 self.xl[
"Psi"] = psi
1419 def add_to_model(self):
1422 self.m, self.rslin, add_to_rmf=
True)
1424 def get_hierarchies(self):
1427 def get_restraint_sets(self):
1430 def get_restraint(self):
1433 def get_restraint_for_rmf(self):
1436 def get_restraints(self):
1438 for r
in self.rs.get_restraints():
1439 rlist.append(IMP.core.PairRestraint.get_from(r))
1442 def set_psi_is_sampled(self, is_sampled=True):
1443 self.psi_is_sampled = is_sampled
1445 def set_sigma_is_sampled(self, is_sampled=True):
1446 self.sigma_is_sampled = is_sampled
1448 def create_sigma(self, name):
1449 ''' a nuisance on the structural uncertainty '''
1450 if name
in self.sigma_dictionary:
1451 return self.sigma_dictionary[name][0]
1454 sigmaminnuis = 0.0000001
1455 sigmamaxnuis = 1000.0
1457 sigma = IMP.pmi.tools.SetupNuisance(
1458 self.m, sigmainit, sigmaminnuis, sigmamaxnuis,
1459 self.sigma_is_sampled).get_particle()
1460 self.sigma_dictionary[name] = (
1463 self.sigma_is_sampled)
1467 def create_psi(self, name):
1468 ''' a nuisance on the inconsistency '''
1469 if name
in self.psi_dictionary:
1470 return self.psi_dictionary[name][0]
1473 psiminnuis = 0.0000001
1474 psimaxnuis = 0.4999999
1476 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1477 psiminnuis, psimaxnuis,
1478 self.psi_is_sampled).get_particle()
1479 self.psi_dictionary[name] = (
1482 self.psi_is_sampled)
1486 def get_output(self):
1488 score = self.rs.unprotected_evaluate(
None)
1489 output[
"_TotalScore"] = str(score)
1490 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1491 output[
"DisulfideBondRestraint_Linear_Score_" +
1492 self.label] = self.rslin.unprotected_evaluate(
None)
1496 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Score distance between two particle centers using a harmonic function.
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
A RestraintSet subclass to track cross-link metadata.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Score a pair of particles based on the distance between their centers.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the partic...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.