1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
5 from __future__
import print_function
17 from collections
import defaultdict
26 """Container for restraints shown in the RMF file and in Chimera"""
28 def get_static_info(self):
31 ri.add_string(
"type",
"IMP.pmi.CrossLinkingMassSpectrometryRestraint")
32 ri.add_float(
"linker_length", self.length)
33 ri.add_float(
"slope", self.slope)
34 ri.add_filename(
"filename", self.filename
or "")
36 ri.add_string(
"linker_auth_name", self.linker.auth_name)
37 if self.linker.smiles:
38 ri.add_string(
"linker_smiles", self.linker.smiles)
43 """Setup cross-link distance restraints from mass spectrometry data.
44 The noise in the data and the structural uncertainty of cross-linked amino-acids
45 is inferred using Bayes theory of probability
46 @note Wraps an IMP::isd::CrossLinkMSRestraint
48 _include_in_rmf =
True
50 def __init__(self, root_hier, database=None,
56 attributes_for_label=
None,
58 CrossLinkDataBase=
None,
61 @param root_hier The canonical hierarchy containing all the states
62 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
63 object that contains the cross-link dataset
64 @param length maximal cross-linker length (including the residue sidechains)
65 @param resolution what representation resolution should the cross-link
66 restraint be applied to.
67 @param slope The slope of a distance-linear scoring function that
68 funnels the score when the particles are
69 too far away. Suggested value 0.02.
70 @param label the extra text to label the restraint so that it is
71 searchable in the output
72 @param filelabel automatically generated file containing missing/included/excluded
73 cross-links will be labeled using this text
74 @param attributes_for_label
75 @param weight Weight of restraint
76 @param linker description of the chemistry of the linker itself, as
77 an ihm.ChemDescriptor object
78 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
79 Common cross-linkers can be found in the `ihm.cross_linkers`
83 model = root_hier.get_model()
85 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
86 model, weight=weight, label=label,
87 restraint_set_class=_DataRestraintSet)
89 if database
is None and CrossLinkDataBase
is not None:
91 "CrossLinkDataBase is deprecated; use 'database' instead")
92 database = CrossLinkDataBase
95 raise Exception(
"You must pass a database")
98 "CrossLinkingMassSpectrometryRestraint: database should "
99 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
100 self.database = database
102 if resolution==0
or resolution
is None:
103 raise Exception(
"You must pass a resolution and it can't be zero")
105 indb = open(
"included." + filelabel +
".xl.db",
"w")
106 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
107 midb = open(
"missing." + filelabel +
".xl.db",
"w")
110 self.rs.set_name(self.rs.get_name() +
"_Data")
111 self.rspsi = self._create_restraint_set(
"PriorPsi")
112 self.rssig = self._create_restraint_set(
"PriorSig")
113 self.rslin = self._create_restraint_set(
"Linear")
115 self.rs.filename = self.database.name
116 self.rs.length = length
117 self.rs.slope = slope
118 self.rs.linker = linker
122 self.linear.set_slope(0.0)
125 self.psi_is_sampled =
True
126 self.sigma_is_sampled =
True
127 self.psi_dictionary={}
128 self.sigma_dictionary={}
130 self.outputlevel =
"low"
134 xl_groups = [p.get_cross_link_group(self)
135 for p, state
in IMP.pmi.tools._all_protocol_outputs(
139 copies_to_add = defaultdict(int)
140 print(
'gathering copies')
141 for xlid
in self.database.xlid_iterator():
142 for xl
in self.database[xlid]:
143 r1 = xl[self.database.residue1_key]
144 c1 = xl[self.database.protein1_key]
145 r2 = xl[self.database.residue2_key]
146 c2 = xl[self.database.protein2_key]
147 for c,r
in ((c1,r1),(c2,r2)):
148 if c
in copies_to_add:
154 resolution=resolution).get_selected_particles()
156 copies_to_add[c] = len(sel)-1
158 for molname
in copies_to_add:
159 if copies_to_add[molname]==0:
162 self.database.protein1_key, operator.eq, molname)
163 self.database.set_value(self.database.protein1_key,
166 self.database.protein2_key, operator.eq, molname)
167 self.database.set_value(self.database.protein2_key,
169 for ncopy
in range(copies_to_add[molname]):
170 self.database.clone_protein(
'%s.0' % molname,
171 '%s.%i' % (molname, ncopy + 1))
172 print(
'done pmi2 prelims')
174 for xlid
in self.database.xlid_iterator():
175 new_contribution=
True
176 for xl
in self.database[xlid]:
178 r1 = xl[self.database.residue1_key]
179 c1 = xl[self.database.protein1_key]
180 r2 = xl[self.database.residue2_key]
181 c2 = xl[self.database.protein2_key]
188 name1,copy1 = c1.split(
'.')
190 name2,copy2 = c2.split(
'.')
193 ex_xls = [(p[0].add_experimental_cross_link(
194 r1, name1, r2, name2, group), group)
196 zip(IMP.pmi.tools._all_protocol_outputs(
200 iterlist = range(len(IMP.atom.get_by_type(root_hier,
201 IMP.atom.STATE_TYPE)))
202 for nstate, r
in enumerate(iterlist):
204 xl[self.database.state_key] = nstate
205 xl[self.database.data_set_name_key] = self.label
210 copy_index=int(copy1),
212 resolution=resolution).get_selected_particles()
216 copy_index=int(copy2),
218 resolution=resolution).get_selected_particles()
224 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
227 "CrossLinkingMassSpectrometryRestraint: "
228 "residue %d of chain %s is not there" % (r1, c1),
230 midb.write(str(xl) +
"\n")
234 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
237 "CrossLinkingMassSpectrometryRestraint: "
238 "residue %d of chain %s is not there" % (r2, c2),
240 midb.write(str(xl) +
"\n")
246 if p1 == p2
and r1 == r2:
248 "CrossLinkingMassSpectrometryRestraint: "
249 "same particle and same residue, skipping "
254 print(
"generating a new cross-link restraint")
255 new_contribution=
False
260 restraints.append(dr)
263 if self.database.sigma1_key
not in xl.keys():
265 xl[self.database.sigma1_key] = sigma1name
267 sigma1name = xl[self.database.sigma1_key]
270 if self.database.sigma2_key
not in xl.keys():
272 xl[self.database.sigma2_key] = sigma2name
274 sigma2name = xl[self.database.sigma2_key]
277 if self.database.psi_key
not in xl.keys():
279 xl[self.database.psi_key] = psiname
281 psiname = xl[self.database.psi_key]
286 p1i = p1.get_particle_index()
288 p2i = p2.get_particle_index()
291 xl[
"Particle_sigma1"]=sigma1
293 xl[
"Particle_sigma2"]=sigma2
295 xl[
"Particle_psi"]=psi
297 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
300 print(
"--------------")
301 print(
"CrossLinkingMassSpectrometryRestraint: generating cross-link restraint between")
302 print(
"CrossLinkingMassSpectrometryRestraint: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
303 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 %s sigma2 %s psi %s" % (sigma1name, sigma2name, psiname))
304 print(
"CrossLinkingMassSpectrometryRestraint: between particles %s and %s" % (p1.get_name(), p2.get_name()))
305 print(
"==========================================\n")
306 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
309 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
310 sigma1, sigma2, psi, ex_xl[1])
317 xl[
"IntraRigidBody"]=
True
319 xl[
"IntraRigidBody"]=
False
321 xl_label = self.database.get_short_cross_link_string(xl)
322 xl[
"ShortLabel"]=xl_label
323 dr.set_name(xl_label)
327 pr.set_name(xl_label)
328 self.rslin.add_restraint(pr)
330 self.xl_list.append(xl)
332 indb.write(str(xl) +
"\n")
334 if len(self.xl_list) == 0:
335 raise SystemError(
"CrossLinkingMassSpectrometryRestraint: no cross-link was constructed")
336 self.xl_restraints = restraints
338 self.rs.add_restraint(lw)
340 def __set_dataset(self, ds):
341 self.database.dataset = ds
342 dataset = property(
lambda self: self.database.dataset, __set_dataset)
345 """ get the hierarchy """
349 """ get the restraint set """
353 """ get the restraints in a list """
354 return self.xl_restraints
357 """ get the dummy restraints to be displayed in the rmf file """
358 return self.rs, self.rssig, self.rspsi
361 """ Get a list of tuples containing the particle pairs """
363 for i
in range(len(self.pairs)):
364 p0 = self.pairs[i][0]
365 p1 = self.pairs[i][1]
366 ppairs.append((p0, p1))
370 """ Set the output level of the output """
371 self.outputlevel = level
374 """ Switch on/off the sampling of psi particles """
375 self.psi_is_sampled = is_sampled
378 """ Switch on/off the sampling of sigma particles """
379 self.sigma_is_sampled = is_sampled
383 """ This is called internally. Creates a nuisance
384 on the structural uncertainty """
385 if name
in self.sigma_dictionary:
386 return self.sigma_dictionary[name][0]
389 sigmaminnuis = 0.0000001
390 sigmamaxnuis = 1000.0
394 sigma = IMP.pmi.tools.SetupNuisance(
395 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
396 self.sigma_is_sampled, name=name).get_particle()
397 self.sigma_dictionary[name] = (
400 self.sigma_is_sampled)
401 self.rssig.add_restraint(
411 """ This is called internally. Creates a nuisance
412 on the data uncertainty """
413 if name
in self.psi_dictionary:
414 return self.psi_dictionary[name][0]
417 psiminnuis = 0.0000001
418 psimaxnuis = 0.4999999
422 psi = IMP.pmi.tools.SetupNuisance(
423 self.model, psiinit, psiminnuis, psimaxnuis,
424 self.psi_is_sampled, name=name).get_particle()
425 self.psi_dictionary[name] = (
430 self.rspsi.add_restraint(
442 """ Get the output of the restraint to be used by the IMP.pmi.output object"""
443 output = super(CrossLinkingMassSpectrometryRestraint, self).
get_output()
445 for xl
in self.xl_list:
447 xl_label=xl[
"ShortLabel"]
451 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
452 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
456 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
460 for psiname
in self.psi_dictionary:
461 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
462 str(psiname) + self._label_suffix] = str(
463 self.psi_dictionary[psiname][0].get_scale())
465 for sigmaname
in self.sigma_dictionary:
466 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
467 str(sigmaname) + self._label_suffix] = str(
468 self.sigma_dictionary[sigmaname][0].get_scale())
474 """ Get all need data to construct a mover in IMP.pmi.dof class"""
476 if self.sigma_is_sampled:
477 for sigmaname
in self.sigma_dictionary:
478 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" + str(sigmaname) +
"_" + self.label
479 particle=self.sigma_dictionary[sigmaname][0]
480 maxstep=(self.sigma_dictionary[sigmaname][1])
483 mv.set_name(mover_name)
486 if self.psi_is_sampled:
487 for psiname
in self.psi_dictionary:
488 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + str(psiname) +
"_" + self.label
489 particle=self.psi_dictionary[psiname][0]
490 maxstep=(self.psi_dictionary[psiname][1])
493 mv.set_name(mover_name)
500 """ Get the particles to be sampled by the IMP.pmi.sampler object """
502 if self.sigma_is_sampled:
503 for sigmaname
in self.sigma_dictionary:
504 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
505 str(sigmaname) + self._label_suffix] =\
506 ([self.sigma_dictionary[sigmaname][0]],
507 self.sigma_dictionary[sigmaname][1])
509 if self.psi_is_sampled:
510 for psiname
in self.psi_dictionary:
511 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
512 str(psiname) + self._label_suffix] =\
513 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
519 """Setup cross-link distance restraints at atomic level
520 The "atomic" aspect is that it models the particle uncertainty with a Gaussian
521 The noise in the data and the structural uncertainty of cross-linked amino-acids
522 is inferred using Bayes' theory of probability
523 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
526 _include_in_rmf =
True
536 nuisances_are_optimized=
True,
543 Automatically creates one "sigma" per cross-linked residue and one "psis" per pair.
544 Other nuisance options are available.
545 @note Will return an error if the data+extra_sel don't specify two particles per XL pair.
546 @param root_hier The root hierarchy on which you'll do selection
547 @param xldb CrossLinkDataBase object
548 @param atom_type Can either be "NZ" or "CA"
549 @param length The XL linker length
550 @param slope Linear term to add to the restraint function to help when far away
551 @param nstates The number of states to model. Defaults to the number of states in root.
552 @param label The output label for the restraint
553 @param nuisances_are_optimized Whether to optimize nuisances
554 @param sigma_init The initial value for all the sigmas
555 @param psi_init The initial value for all the psis
556 @param one_psi Use a single psi for all restraints (if False, creates one per XL)
557 @param filelabel automatically generated file containing missing/included/excluded
558 cross-links will be labeled using this text
559 @param weight Weight of restraint
564 self.root = root_hier
565 rname =
"AtomicXLRestraint"
566 super(AtomicCrossLinkMSRestraint, self).
__init__(
567 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
571 self.sigma_is_sampled = nuisances_are_optimized
572 self.psi_is_sampled = nuisances_are_optimized
574 if atom_type
not in(
"NZ",
"CA"):
575 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
576 self.atom_type = atom_type
577 self.nstates = nstates
579 self.nstates = len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE))
580 elif nstates!=len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE)):
581 print(
"Warning: nstates is not the same as the number of states in root")
583 self.rs_psi = self._create_restraint_set(
"psi")
584 self.rs_sig = self._create_restraint_set(
"sigma")
585 self.rs_lin = self._create_restraint_set(
"linear")
587 self.psi_dictionary = {}
588 self.sigma_dictionary = {}
590 self.particles=defaultdict(set)
591 self.one_psi = one_psi
592 self.bonded_pairs = []
594 print(
'creating a single psi for all XLs')
596 print(
'creating one psi for each XL')
599 if filelabel
is not None:
600 indb = open(
"included." + filelabel +
".xl.db",
"w")
601 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
602 midb = open(
"missing." + filelabel +
".xl.db",
"w")
608 # read ahead to get the number of XL's per residue
609 num_xls_per_res=defaultdict(int)
610 for unique_id in data:
611 for nstate in range(self.nstates):
612 for xl in data[unique_id]:
613 num_xls_per_res[str(xl.r1)]+=1
614 num_xls_per_res[str(xl.r2)]+=1
616 # Will setup two sigmas based on promiscuity of the residue
618 self.sig_low = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
619 max_val=100.0,is_opt=self.nuis_opt)
620 self.sig_high = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
621 max_val=100.0,is_opt=self.nuis_opt)
623 self._create_sigma(
'sigma',sigma_init)
625 self._create_psi(
'psi',psi_init)
627 for xlid
in self.xldb.xlid_iterator():
628 self._create_psi(xlid,psi_init)
632 for xlid
in self.xldb.xlid_iterator():
635 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
637 psip = self.psi_dictionary[unique_id][0].get_particle_index()
646 for nstate
in self.nstates:
647 for xl
in self.xldb[xlid]:
648 r1 = xl[self.xldb.residue1_key]
649 c1 = xl[self.xldb.protein1_key].strip()
650 r2 = xl[self.xldb.residue2_key]
651 c2 = xl[self.xldb.protein2_key].strip()
658 residue_index=r1).get_selected_particles()
663 residue_index=r2).get_selected_particles()
665 warnings.warn(
"AtomicXLRestraint: residue %d of "
666 "chain %s is not there" % (r1, c1),
668 if filelabel
is not None:
669 midb.write(str(xl) +
"\n")
673 warnings.warn(
"AtomicXLRestraint: residue %d of "
674 "chain %s is not there" % (r2, c2),
676 if filelabel
is not None:
677 midb.write(str(xl) +
"\n")
683 num1=num_xls_per_res[str(xl.r1)]
684 num2=num_xls_per_res[str(xl.r2)]
685 if num1<sig_threshold:
689 if num2<sig_threshold:
694 sig1 = self.sigma_dictionary[
'sigma'][0]
695 sig2 = self.sigma_dictionary[
'sigma'][0]
698 for p1,p2
in itertools.product(ps1,ps2):
701 self.particles[nstate]|=set((p1,p2))
702 r.add_contribution([p1.get_index(),p2.get_index()],
703 [sig1.get_particle_index(),sig2.get_particle_index()])
706 if num_contributions>0:
707 print(
'XL:',xlid,
'num contributions:',num_contributions)
710 raise Exception(
"You didn't create any XL restraints")
711 print(
'created',len(xlrs),
'XL restraints')
712 rname = self.rs.get_name()
714 self.rs.set_name(rname)
715 self.rs.set_weight(self.weight)
716 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
718 def get_hierarchy(self):
721 def _create_sigma(self, name,sigmainit):
722 """ This is called internally. Creates a nuisance
723 on the structural uncertainty """
724 if name
in self.sigma_dictionary:
725 return self.sigma_dictionary[name][0]
727 sigmaminnuis = 0.0000001
728 sigmamaxnuis = 1000.0
732 sigma = IMP.pmi.tools.SetupNuisance(self.model,
736 self.sigma_is_sampled).get_particle()
737 self.sigma_dictionary[name] = (
740 self.sigma_is_sampled)
741 self.rs_sig.add_restraint(
750 def _create_psi(self, name,psiinit):
751 """ This is called internally. Creates a nuisance
752 on the data uncertainty """
753 if name
in self.psi_dictionary:
754 return self.psi_dictionary[name][0]
756 psiminnuis = 0.0000001
757 psimaxnuis = 0.4999999
761 psi = IMP.pmi.tools.SetupNuisance(self.model,
765 self.psi_is_sampled).get_particle()
766 self.psi_dictionary[name] = (
771 self.rs_psi.add_restraint(
783 """ create dummy harmonic restraints for each XL but don't add to model
784 Makes it easy to see each contribution to each XL in RMF
786 class MyGetRestraint(object):
789 def get_restraint_for_rmf(self):
795 for nxl
in range(self.get_number_of_restraints()):
796 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
798 for ncontr
in range(xl.get_number_of_contributions()):
799 ps=xl.get_contribution(ncontr)
801 'xl%i_contr%i'%(nxl,ncontr))
803 dummy_rs.append(MyGetRestraint(rs))
808 """ Get the particles to be sampled by the IMP.pmi.sampler object """
810 if self.sigma_is_sampled:
811 for sigmaname
in self.sigma_dictionary:
812 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
813 str(sigmaname) + self._label_suffix] = \
814 ([self.sigma_dictionary[sigmaname][0]],
815 self.sigma_dictionary[sigmaname][1])
816 if self.psi_is_sampled:
817 for psiname
in self.psi_dictionary:
818 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
819 str(psiname) + self._label_suffix] =\
820 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
823 def get_bonded_pairs(self):
824 return self.bonded_pairs
826 def get_number_of_restraints(self):
827 return self.rs.get_number_of_restraints()
830 return 'XL restraint with '+str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
834 """Read a stat file and load all the sigmas.
835 This is potentially quite stupid.
836 It's also a hack since the sigmas should be stored in the RMF file.
837 Also, requires one sigma and one psi for ALL XLs.
840 sig_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
841 "-s",
"AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
842 psi_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
843 "-s",
"AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
844 for nxl
in range(self.rs.get_number_of_restraints()):
845 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
848 for contr
in range(xl.get_number_of_contributions()):
849 sig1,sig2=xl.get_contribution_sigmas(contr)
852 print(
'loaded nuisances from file')
855 max_prob_for_violation=0.1,
856 min_dist_for_violation=1e9,
858 limit_to_chains=
None,
860 """Create CMM files, one for each state, of all cross-links.
861 will draw in GREEN if non-violated in all states (or if only one state)
862 will draw in PURPLE if non-violated only in a subset of states (draws nothing elsewhere)
863 will draw in RED in ALL states if all violated
864 (if only one state, you'll only see green and red)
866 @param out_prefix Output xlink files prefix
867 @param max_prob_for_violation It's a violation if the probability is below this
868 @param min_dist_for_violation It's a violation if the min dist is above this
869 @param coarsen Use CA positions
870 @param limit_to_chains Try to visualize just these chains
871 @param exclude_to_chains Try to NOT visualize these chains
873 print(
'going to calculate violations and plot CMM files')
875 all_dists = [s[
"low_dist"]
for s
in all_stats]
881 cmds = defaultdict(set)
882 for nstate
in range(self.nstates):
883 outf=open(out_prefix+str(nstate)+
'.cmm',
'w')
884 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
887 print(
'will limit to',limit_to_chains)
888 print(
'will exclude',exclude_chains)
893 for nxl
in range(self.rs.get_number_of_restraints()):
897 for nstate
in range(self.nstates):
898 prob = state_info[nstate][nxl][
"prob"]
899 low_dist = state_info[nstate][nxl][
"low_dist"]
900 if prob<max_prob_for_violation
or low_dist>min_dist_for_violation:
908 if len(npass)==self.nstates:
910 elif len(nviol)==self.nstates:
914 print(nxl,
'state dists:',[state_info[nstate][nxl][
"low_dist"]
for nstate
in range(self.nstates)],
915 'viol states:',nviol,
'all viol?',all_viol)
916 for nstate
in range(self.nstates):
918 r=0.365; g=0.933; b=0.365;
921 r=0.980; g=0.302; b=0.247;
928 r=0.365; g=0.933; b=0.365;
930 pp = state_info[nstate][nxl][
"low_pp"]
939 cmds[nstate].add((ch1,r1))
940 cmds[nstate].add((ch2,r2))
942 outf = out_fns[nstate]
944 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
945 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,c1[0],c1[1],c1[2],r,g,b))
946 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
947 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv+1,c2[0],c2[1],c2[2],r,g,b))
948 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
949 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,nv+1,r,g,b))
952 for nstate
in range(self.nstates):
953 out_fns[nstate].write(
'</marker_set>\n')
954 out_fns[nstate].close()
956 for ch,r
in cmds[nstate]:
957 cmd+=
'#%i:%i.%s '%(nstate,r,ch)
961 def _get_contribution_info(self,xl,ncontr,use_CA=False):
962 """Return the particles at that contribution. If requested will return CA's instead"""
963 idx1=xl.get_contribution(ncontr)[0]
964 idx2=xl.get_contribution(ncontr)[1]
972 return idx1,idx2,dist
974 def get_best_stats(self,limit_to_state=None,limit_to_chains=None,exclude_chains='',use_CA=False):
975 ''' return the probability, best distance, two coords, and possibly the psi for each xl
976 @param limit_to_state Only examine contributions from one state
977 @param limit_to_chains Returns the particles for certain "easy to visualize" chains
978 @param exclude_chains Even if you limit, don't let one end be in this list.
979 Only works if you also limit chains
980 @param use_CA Limit to CA particles
983 for nxl
in range(self.rs.get_number_of_restraints()):
985 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
992 for contr
in range(xl.get_number_of_contributions()):
993 pp = xl.get_contribution(contr)
1000 if limit_to_state
is not None:
1002 if nstate!=limit_to_state:
1004 state_contrs.append(contr)
1007 if limit_to_chains
is not None:
1010 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1011 c1
not in exclude_chains
and c2
not in exclude_chains):
1012 if dist<low_dist_lim:
1019 if limit_to_state
is not None:
1020 this_info[
"prob"] = xl.evaluate_for_contributions(state_contrs,
None)
1022 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1023 if limit_to_chains
is not None:
1024 this_info[
"low_pp"] = low_pp_lim
1026 this_info[
"low_pp"] = low_pp
1028 this_info[
"low_dist"] = low_dist
1029 if not self.one_psi:
1031 this_info[
"psi"] = pval
1032 ret.append(this_info)
1035 def print_stats(self):
1038 for nxl,s
in enumerate(stats):
1040 print(s[
"low_dist"])
1043 def get_output(self):
1044 output = super(AtomicCrossLinkMSRestraint, self).get_output()
1055 for nxl,s
in enumerate(stats):
1056 if s[
'low_dist']>20.0:
1058 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"Prob")]=str(s[
'prob'])
1059 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"BestDist")]=str(s[
'low_dist'])
1060 if not self.one_psi:
1061 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"psi")]=str(s[
'psi'])
1062 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1066 class CysteineCrossLinkRestraint(object):
1067 def __init__(self, root_hier, filename, cbeta=False,
1068 betatuple=(0.03, 0.1),
1069 disttuple=(0.0, 25.0, 1000),
1070 omegatuple=(1.0, 1000.0, 50),
1071 sigmatuple=(0.3, 0.3, 1),
1073 sigmaissampled=
False,
1074 weightissampled=
True,
1075 epsilonissampled=
True
1082 self.m = root_hier.get_model()
1085 self.epsilonmaxtrans = 0.01
1086 self.sigmamaxtrans = 0.1
1087 self.betamaxtrans = 0.01
1088 self.weightmaxtrans = 0.1
1090 self.outputlevel =
"low"
1091 self.betaissampled = betaissampled
1092 self.sigmaissampled = sigmaissampled
1093 self.weightissampled = weightissampled
1094 self.epsilonissampled = epsilonissampled
1096 betalower = betatuple[0]
1097 betaupper = betatuple[1]
1104 self.beta = IMP.pmi.tools.SetupNuisance(
1109 betaissampled).get_particle(
1112 self.sigma = IMP.pmi.tools.SetupNuisance(
1117 sigmaissampled).get_particle()
1119 self.weight = IMP.pmi.tools.SetupWeight(
1121 isoptimized=
False).get_particle(
1125 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1134 if t[5]
in self.epsilons:
1135 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1136 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1138 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(self.m,
1139 0.01, 0.01, 1.0 - float(t[4]), epsilonissampled).get_particle()
1140 up = self.epsilons[t[5]].get_upper()
1141 low = self.epsilons[t[5]].get_lower()
1143 self.epsilons[t[5]].set_upper(low)
1145 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1149 crossdata = IMP.pmi.tools.get_cross_link_data(
1150 "cysteine",
"cysteine_CA_FES.txt.standard",
1151 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1153 crossdata = IMP.pmi.tools.get_cross_link_data(
1154 "cysteine",
"cysteine_CB_FES.txt.standard",
1155 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1158 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1159 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1160 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1163 print(
"--------------")
1164 print(
"CysteineCrossLink: attempting to create a restraint " + str(d))
1184 self.epsilons[epslabel],
1195 molecule=chain1, residue_index=resid1,
1197 p1 = p1.get_selected_particles()
1204 molecule=chain2, residue_index=resid2,
1206 p2 = p2.get_selected_particles()
1216 for t
in range(-1, 2):
1218 molecule=chain1, copy_index=0,
1219 residue_index=resid1 + t)
1220 p = p.get_selected_particles()
1226 "CysteineCrossLink: missing representation for "
1227 "residue %d of chain %s" % (resid1 + t, chain1),
1231 molecule=chain2, copy_index=0,
1232 residue_index=resid2 + t)
1233 p = p.get_selected_particles()
1239 "CysteineCrossLink: missing representation for "
1240 "residue %d of chain %s" % (resid2 + t, chain2),
1244 if (p1
is not None and p2
is not None):
1245 ccl.add_contribution(p1, p2)
1249 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2,
IMP.core.get_distance(d1, d2))
1252 if (len(p1) == 3
and len(p2) == 3):
1253 p11n = p1[0].get_name()
1254 p12n = p1[1].get_name()
1255 p13n = p1[2].get_name()
1256 p21n = p2[0].get_name()
1257 p22n = p2[1].get_name()
1258 p23n = p2[2].get_name()
1260 print(
"CysteineCrossLink: generating CB cysteine cross-link restraint between")
1261 print(
"CysteineCrossLink: residue %d of chain %s and residue %d of chain %s" % (resid1, chain1, resid2, chain2))
1262 print(
"CysteineCrossLink: between particles %s %s %s and %s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1264 ccl.add_contribution(p1, p2)
1267 self.rs.add_restraint(ccl)
1268 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1269 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1272 self.weight.get_particle()
1273 ).set_weights_are_optimized(weightissampled)
1275 def set_label(self, label):
1278 def add_to_model(self):
1283 if self.epsilonissampled:
1284 for eps
in self.epsilons.keys():
1285 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1286 self.label] = ([self.epsilons[eps]], self.epsilonmaxtrans)
1287 if self.betaissampled:
1288 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1289 self.label] = ([self.beta], self.betamaxtrans)
1290 if self.weightissampled:
1291 ps[
"Weights_CysteineCrossLinkRestraint_" +
1292 self.label] = ([self.weight], self.weightmaxtrans)
1293 if self.sigmaissampled:
1294 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1295 self.label] = ([self.sigma], self.sigmamaxtrans)
1298 def set_output_level(self, level="low"):
1300 self.outputlevel = level
1302 def get_restraint(self):
1305 def get_sigma(self):
1308 def get_output(self):
1310 score = self.rs.unprotected_evaluate(
None)
1311 output[
"_TotalScore"] = str(score)
1312 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1313 output[
"CysteineCrossLinkRestraint_sigma_" +
1314 self.label] = str(self.sigma.get_scale())
1315 for eps
in self.epsilons.keys():
1316 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1317 self.label] = str(self.epsilons[eps].get_scale())
1318 output[
"CysteineCrossLinkRestraint_beta_" +
1319 self.label] = str(self.beta.get_scale())
1320 for n
in range(self.weight.get_number_of_states()):
1321 output[
"CysteineCrossLinkRestraint_weights_" +
1322 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1324 if self.outputlevel ==
"high":
1325 for rst
in self.rs.get_restraints():
1326 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1327 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
1328 "_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_model_frequency()
1329 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1330 IMP.isd.CysteineCrossLinkRestraint.get_from(
1333 + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_standard_error()
1337 class DisulfideCrossLinkRestraint(object):
1338 def __init__(self, representation_or_hier,
1346 self.m = representation_or_hier.get_model()
1349 resolution=resolution)
1352 resolution=resolution)
1359 self.linear.set_slope(0.0)
1363 self.psi_dictionary={}
1364 self.sigma_dictionary={}
1365 self.psi_is_sampled =
False
1366 self.sigma_is_sampled =
False
1369 if len(ps1) > 1
or len(ps1) == 0:
1370 raise ValueError(
"DisulfideBondRestraint: ERROR> first selection pattern selects multiple particles or sero particles")
1372 if len(ps2) > 1
or len(ps2) == 0:
1373 raise ValueError(
"DisulfideBondRestraint: ERROR> second selection pattern selects multiple particles or sero particles")
1378 sigma=self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1379 psi=self.create_psi(
"PSI_DISULFIDE_BOND")
1381 p1i = p1.get_index()
1382 p2i = p2.get_index()
1391 dr.add_contribution((p1i, p2i), (si, si), psii)
1395 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1396 self.rslin.add_restraint(pr)
1399 self.rs.add_restraint(lw)
1401 self.xl[
"Particle1"]=p1
1402 self.xl[
"Particle2"]=p2
1403 self.xl[
"Sigma"]=sigma
1406 def add_to_model(self):
1411 def get_hierarchies(self):
1414 def get_restraint_sets(self):
1417 def get_restraint(self):
1420 def get_restraint_for_rmf(self):
1423 def get_restraints(self):
1425 for r
in self.rs.get_restraints():
1426 rlist.append(IMP.core.PairRestraint.get_from(r))
1429 def set_psi_is_sampled(self, is_sampled=True):
1430 self.psi_is_sampled = is_sampled
1432 def set_sigma_is_sampled(self, is_sampled=True):
1433 self.sigma_is_sampled = is_sampled
1436 def create_sigma(self, name):
1437 ''' a nuisance on the structural uncertainty '''
1438 if name
in self.sigma_dictionary:
1439 return self.sigma_dictionary[name][0]
1442 sigmaminnuis = 0.0000001
1443 sigmamaxnuis = 1000.0
1447 sigma = IMP.pmi.tools.SetupNuisance(self.m, sigmainit,
1448 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
1449 self.sigma_dictionary[name] = (
1452 self.sigma_is_sampled)
1456 def create_psi(self, name):
1457 ''' a nuisance on the inconsistency '''
1458 if name
in self.psi_dictionary:
1459 return self.psi_dictionary[name][0]
1462 psiminnuis = 0.0000001
1463 psimaxnuis = 0.4999999
1467 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1468 psiminnuis, psimaxnuis,
1469 self.psi_is_sampled).get_particle()
1470 self.psi_dictionary[name] = (
1473 self.psi_is_sampled)
1478 def get_output(self):
1480 score = self.rs.unprotected_evaluate(
None)
1481 output[
"_TotalScore"] = str(score)
1482 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1483 output[
"DisulfideBondRestraint_Linear_Score_" +
1484 self.label] = self.rslin.unprotected_evaluate(
None)
1488 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
void handle_use_deprecated(std::string message)
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
Report key:value information on restraints.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the part...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.