1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling crosslinking data.
5 from __future__
import print_function
18 from collections
import defaultdict
24 """Setup cross-link distance restraints from mass spectrometry data.
25 The noise in the data and the structural uncertainty of cross-linked amino-acids
26 is inferred using Bayes theory of probability
27 \note Wraps an IMP::isd::CrossLinkMSRestraint
29 def __init__(self, representation=None,
31 CrossLinkDataBase=
None,
37 attributes_for_label=
None,
40 @param representation DEPRECATED The IMP.pmi.representation.Representation
41 object that contain the molecular system
42 @param root_hier The canonical hierarchy containing all the states
43 @param CrossLinkDataBase The IMP.pmi.io.crosslink.CrossLinkDataBase
44 object that contains the cross-link dataset
45 @param length maximal cross-linker length (including the residue sidechains)
46 @param resolution what representation resolution should the cross-link
47 restraint be applied to.
48 @param slope The slope of a distance-linear scoring function that
49 funnels the score when the particles are
50 too far away. Suggested value 0.02.
51 @param label the extra text to label the restraint so that it is
52 searchable in the output
53 @param filelabel automatically generated file containing missing/included/excluded
54 cross-links will be labeled using this text
55 @param attributes_for_label
56 @param weight Weight of restraint
60 if representation
is not None:
62 if type(representation) != list:
63 representations = [representation]
65 representations = representation
66 m = representations[0].prot.get_model()
67 elif root_hier
is not None:
68 m = root_hier.get_model()
70 raise Exception(
"You must pass either representation or root_hier")
72 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
73 m, weight=weight, label=label)
75 if CrossLinkDataBase
is None:
76 raise Exception(
"You must pass a CrossLinkDataBase")
77 if not isinstance(CrossLinkDataBase,IMP.pmi.io.crosslink.CrossLinkDataBase):
78 raise TypeError(
"CrossLinkingMassSpectrometryRestraint: CrossLinkDataBase should be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
79 self.CrossLinkDataBase = CrossLinkDataBase
81 if resolution==0
or resolution
is None:
82 raise Exception(
"You must pass a resolution and it can't be zero")
84 indb = open(
"included." + filelabel +
".xl.db",
"w")
85 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
86 midb = open(
"missing." + filelabel +
".xl.db",
"w")
88 self.rs.set_name(self.rs.get_name() +
"_Data")
89 self.rspsi = self._create_restraint_set(
"PriorPsi")
90 self.rssig = self._create_restraint_set(
"PriorSig")
91 self.rslin = self._create_restraint_set(
"Linear")
95 self.linear.set_slope(0.0)
98 self.psi_is_sampled =
True
99 self.sigma_is_sampled =
True
100 self.psi_dictionary={}
101 self.sigma_dictionary={}
103 self.outputlevel =
"low"
109 copies_to_add = defaultdict(int)
110 print(
'gathering copies')
111 for xlid
in self.CrossLinkDataBase.xlid_iterator():
112 for xl
in self.CrossLinkDataBase[xlid]:
113 r1 = xl[self.CrossLinkDataBase.residue1_key]
114 c1 = xl[self.CrossLinkDataBase.protein1_key]
115 r2 = xl[self.CrossLinkDataBase.residue2_key]
116 c2 = xl[self.CrossLinkDataBase.protein2_key]
117 for c,r
in ((c1,r1),(c2,r2)):
118 if c
in copies_to_add:
124 resolution=resolution).get_selected_particles()
126 copies_to_add[c] = len(sel)-1
128 for molname
in copies_to_add:
129 if copies_to_add[molname]==0:
132 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein1_key,molname+
'.0',fo1)
134 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein2_key,molname+
'.0',fo2)
135 for ncopy
in range(copies_to_add[molname]):
136 self.CrossLinkDataBase.clone_protein(
'%s.0'%molname,
'%s.%i'%(molname,ncopy+1))
137 print(
'done pmi2 prelims')
139 for xlid
in self.CrossLinkDataBase.xlid_iterator():
140 new_contribution=
True
141 for xl
in self.CrossLinkDataBase[xlid]:
143 r1 = xl[self.CrossLinkDataBase.residue1_key]
144 c1 = xl[self.CrossLinkDataBase.protein1_key]
145 r2 = xl[self.CrossLinkDataBase.residue2_key]
146 c2 = xl[self.CrossLinkDataBase.protein2_key]
149 iterlist = range(len(IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)))
151 iterlist = representations
152 for nstate, r
in enumerate(iterlist):
154 xl[self.CrossLinkDataBase.state_key]=nstate
155 xl[self.CrossLinkDataBase.data_set_name_key]=self.label
163 name1,copy1 = c1.split(
'.')
165 name2,copy2 = c2.split(
'.')
169 copy_index=int(copy1),
171 resolution=resolution).get_selected_particles()
175 copy_index=int(copy2),
177 resolution=resolution).get_selected_particles()
184 resolution=resolution,
186 name_is_ambiguous=
False,
190 resolution=resolution,
192 name_is_ambiguous=
False,
196 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
198 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> residue %d of chain %s is not there" % (r1, c1))
199 midb.write(str(xl) +
"\n")
203 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
205 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> residue %d of chain %s is not there" % (r2, c2))
206 midb.write(str(xl) +
"\n")
212 if p1 == p2
and r1 == r2:
213 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> same particle and same residue, skipping cross-link")
217 print(
"generating a new crosslink restraint")
218 new_contribution=
False
223 restraints.append(dr)
226 if self.CrossLinkDataBase.sigma1_key
not in xl.keys():
228 xl[self.CrossLinkDataBase.sigma1_key]=sigma1name
230 sigma1name=xl[self.CrossLinkDataBase.sigma1_key]
233 if self.CrossLinkDataBase.sigma2_key
not in xl.keys():
235 xl[self.CrossLinkDataBase.sigma2_key]=sigma2name
237 sigma2name=xl[self.CrossLinkDataBase.sigma2_key]
240 if self.CrossLinkDataBase.psi_key
not in xl.keys():
242 xl[self.CrossLinkDataBase.psi_key]=psiname
244 psiname=xl[self.CrossLinkDataBase.psi_key]
249 p1i = p1.get_particle_index()
251 p2i = p2.get_particle_index()
254 xl[
"Particle_sigma1"]=sigma1
256 xl[
"Particle_sigma2"]=sigma2
258 xl[
"Particle_psi"]=psi
260 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
263 print(
"--------------")
264 print(
"CrossLinkingMassSpectrometryRestraint: generating cross-link restraint between")
265 print(
"CrossLinkingMassSpectrometryRestraint: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
266 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 %s sigma2 %s psi %s" % (sigma1name, sigma2name, psiname))
267 print(
"CrossLinkingMassSpectrometryRestraint: between particles %s and %s" % (p1.get_name(), p2.get_name()))
268 print(
"==========================================\n")
275 xl[
"IntraRigidBody"]=
True
277 xl[
"IntraRigidBody"]=
False
279 xl_label=self.CrossLinkDataBase.get_short_cross_link_string(xl)
280 xl[
"ShortLabel"]=xl_label
281 dr.set_name(xl_label)
285 pr.set_name(xl_label)
286 self.rslin.add_restraint(pr)
288 self.xl_list.append(xl)
290 indb.write(str(xl) +
"\n")
292 if len(self.xl_list) == 0:
293 raise SystemError(
"CrossLinkingMassSpectrometryRestraint: no crosslink was constructed")
296 self.rs.add_restraint(lw)
299 """ get the hierarchy """
303 """ get the restraint set """
307 """ get the restraints in a list """
309 for r
in self.rs.get_restraints():
310 rlist.append(IMP.core.PairRestraint.get_from(r))
314 """ get the dummy restraints to be displayed in the rmf file """
318 """ Get a list of tuples containing the particle pairs """
320 for i
in range(len(self.pairs)):
321 p0 = self.pairs[i][0]
322 p1 = self.pairs[i][1]
323 ppairs.append((p0, p1))
327 """ Set the output level of the output """
328 self.outputlevel = level
331 """ Switch on/off the sampling of psi particles """
332 self.psi_is_sampled = is_sampled
335 """ Switch on/off the sampling of sigma particles """
336 self.sigma_is_sampled = is_sampled
340 """ This is called internally. Creates a nuisance
341 on the structural uncertainty """
342 if name
in self.sigma_dictionary:
343 return self.sigma_dictionary[name][0]
346 sigmaminnuis = 0.0000001
347 sigmamaxnuis = 1000.0
351 sigma = IMP.pmi.tools.SetupNuisance(self.m, sigmainit,
352 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
353 self.sigma_dictionary[name] = (
356 self.sigma_is_sampled)
357 self.rssig.add_restraint(
367 """ This is called internally. Creates a nuisance
368 on the data uncertainty """
369 if name
in self.psi_dictionary:
370 return self.psi_dictionary[name][0]
373 psiminnuis = 0.0000001
374 psimaxnuis = 0.4999999
378 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
379 psiminnuis, psimaxnuis,
380 self.psi_is_sampled).get_particle()
381 self.psi_dictionary[name] = (
386 self.rspsi.add_restraint(
398 """ Get the output of the restraint to be used by the IMP.pmi.output object"""
399 output = super(CrossLinkingMassSpectrometryRestraint, self).
get_output()
401 for xl
in self.xl_list:
403 xl_label=xl[
"ShortLabel"]
407 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
408 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
412 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
416 for psiname
in self.psi_dictionary:
417 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
418 str(psiname) + self._label_suffix] = str(
419 self.psi_dictionary[psiname][0].get_scale())
421 for sigmaname
in self.sigma_dictionary:
422 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
423 str(sigmaname) + self._label_suffix] = str(
424 self.sigma_dictionary[sigmaname][0].get_scale())
430 """ Get all need data to construct a mover in IMP.pmi.dof class"""
432 if self.sigma_is_sampled:
433 for sigmaname
in self.sigma_dictionary:
434 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" + str(sigmaname) +
"_" + self.label
435 particle=self.sigma_dictionary[sigmaname][0]
436 maxstep=(self.sigma_dictionary[sigmaname][1])
439 mv.set_name(mover_name)
442 if self.psi_is_sampled:
443 for psiname
in self.psi_dictionary:
444 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + str(psiname) +
"_" + self.label
445 particle=self.psi_dictionary[psiname][0]
446 maxstep=(self.psi_dictionary[psiname][1])
449 mv.set_name(mover_name)
456 """ Get the particles to be sampled by the IMP.pmi.sampler object """
458 if self.sigma_is_sampled:
459 for sigmaname
in self.sigma_dictionary:
460 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
461 str(sigmaname) + self._label_suffix] =\
462 ([self.sigma_dictionary[sigmaname][0]],
463 self.sigma_dictionary[sigmaname][1])
465 if self.psi_is_sampled:
466 for psiname
in self.psi_dictionary:
467 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
468 str(psiname) + self._label_suffix] =\
469 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
475 """Setup cross-link distance restraints at atomic level
476 The "atomic" aspect is that it models the particle uncertainty with a Gaussian
477 The noise in the data and the structural uncertainty of cross-linked amino-acids
478 is inferred using Bayes' theory of probability
479 \note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
489 nuisances_are_optimized=
True,
496 Automatically creates one "sigma" per crosslinked residue and one "psis" per pair.
497 Other nuisance options are available.
498 \note Will return an error if the data+extra_sel don't specify two particles per XL pair.
499 @param root_hier The root hierarchy on which you'll do selection
500 @param xldb CrossLinkDataBase object
501 @param atom_type Can either be "NZ" or "CA"
502 @param length The XL linker length
503 @param slope Linear term to add to the restraint function to help when far away
504 @param nstates The number of states to model. Defaults to the number of states in root.
505 @param label The output label for the restraint
506 @param nuisances_are_optimized Whether to optimize nuisances
507 @param sigma_init The initial value for all the sigmas
508 @param psi_init The initial value for all the psis
509 @param one_psi Use a single psi for all restraints (if False, creates one per XL)
510 @param filelabel automatically generated file containing missing/included/excluded
511 cross-links will be labeled using this text
512 @param weight Weight of restraint
517 self.root = root_hier
518 rname =
"AtomicXLRestraint"
519 super(AtomicCrossLinkMSRestraint, self).
__init__(
520 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
524 self.sigma_is_sampled = nuisances_are_optimized
525 self.psi_is_sampled = nuisances_are_optimized
527 if atom_type
not in(
"NZ",
"CA"):
528 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
529 self.atom_type = atom_type
530 self.nstates = nstates
532 self.nstates = len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE))
533 elif nstates!=len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE)):
534 print(
"Warning: nstates is not the same as the number of states in root")
536 self.rs_psi = self._create_restraint_set(
"psi")
537 self.rs_sig = self._create_restraint_set(
"sigma")
538 self.rs_lin = self._create_restraint_set(
"linear")
540 self.psi_dictionary = {}
541 self.sigma_dictionary = {}
543 self.particles=defaultdict(set)
544 self.one_psi = one_psi
545 self.bonded_pairs = []
547 print(
'creating a single psi for all XLs')
549 print(
'creating one psi for each XL')
552 if filelabel
is not None:
553 indb = open(
"included." + filelabel +
".xl.db",
"w")
554 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
555 midb = open(
"missing." + filelabel +
".xl.db",
"w")
561 # read ahead to get the number of XL's per residue
562 num_xls_per_res=defaultdict(int)
563 for unique_id in data:
564 for nstate in range(self.nstates):
565 for xl in data[unique_id]:
566 num_xls_per_res[str(xl.r1)]+=1
567 num_xls_per_res[str(xl.r2)]+=1
569 # Will setup two sigmas based on promiscuity of the residue
571 self.sig_low = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
572 max_val=100.0,is_opt=self.nuis_opt)
573 self.sig_high = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
574 max_val=100.0,is_opt=self.nuis_opt)
576 self._create_sigma(
'sigma',sigma_init)
578 self._create_psi(
'psi',psi_init)
580 for xlid
in self.xldb.xlid_iterator():
581 self._create_psi(xlid,psi_init)
585 for xlid
in self.xldb.xlid_iterator():
588 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
590 psip = self.psi_dictionary[unique_id][0].get_particle_index()
599 for nstate
in self.nstates:
600 for xl
in self.xldb[xlid]:
601 r1 = xl[self.xldb.residue1_key]
602 c1 = xl[self.xldb.protein1_key].strip()
603 r2 = xl[self.xldb.residue2_key]
604 c2 = xl[self.xldb.protein2_key].strip()
611 residue_index=r1).get_selected_particles()
616 residue_index=r2).get_selected_particles()
618 print(
"AtomicXLRestraint: WARNING> residue %d of chain %s is not there" % (r1, c1))
619 if filelabel
is not None:
620 midb.write(str(xl) +
"\n")
624 print(
"AtomicXLRestraint: WARNING> residue %d of chain %s is not there" % (r2, c2))
625 if filelabel
is not None:
626 midb.write(str(xl) +
"\n")
632 num1=num_xls_per_res[str(xl.r1)]
633 num2=num_xls_per_res[str(xl.r2)]
634 if num1<sig_threshold:
638 if num2<sig_threshold:
643 sig1 = self.sigma_dictionary[
'sigma'][0]
644 sig2 = self.sigma_dictionary[
'sigma'][0]
647 for p1,p2
in itertools.product(ps1,ps2):
650 self.particles[nstate]|=set((p1,p2))
651 r.add_contribution([p1.get_index(),p2.get_index()],
652 [sig1.get_particle_index(),sig2.get_particle_index()])
655 if num_contributions>0:
656 print(
'XL:',xlid,
'num contributions:',num_contributions)
659 raise Exception(
"You didn't create any XL restraints")
660 print(
'created',len(xlrs),
'XL restraints')
661 rname = self.rs.get_name()
663 self.rs.set_name(rname)
664 self.rs.set_weight(self.weight)
665 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
667 def get_hierarchy(self):
670 def _create_sigma(self, name,sigmainit):
671 """ This is called internally. Creates a nuisance
672 on the structural uncertainty """
673 if name
in self.sigma_dictionary:
674 return self.sigma_dictionary[name][0]
676 sigmaminnuis = 0.0000001
677 sigmamaxnuis = 1000.0
681 sigma = IMP.pmi.tools.SetupNuisance(self.m,
685 self.sigma_is_sampled).get_particle()
686 self.sigma_dictionary[name] = (
689 self.sigma_is_sampled)
690 self.rs_sig.add_restraint(
699 def _create_psi(self, name,psiinit):
700 """ This is called internally. Creates a nuisance
701 on the data uncertainty """
702 if name
in self.psi_dictionary:
703 return self.psi_dictionary[name][0]
705 psiminnuis = 0.0000001
706 psimaxnuis = 0.4999999
710 psi = IMP.pmi.tools.SetupNuisance(self.m,
714 self.psi_is_sampled).get_particle()
715 self.psi_dictionary[name] = (
720 self.rs_psi.add_restraint(
732 """ create dummy harmonic restraints for each XL but don't add to model
733 Makes it easy to see each contribution to each XL in RMF
735 class MyGetRestraint(object):
744 for nxl
in range(self.get_number_of_restraints()):
745 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
747 for ncontr
in range(xl.get_number_of_contributions()):
748 ps=xl.get_contribution(ncontr)
750 'xl%i_contr%i'%(nxl,ncontr))
752 dummy_rs.append(MyGetRestraint(rs))
757 """ Get the particles to be sampled by the IMP.pmi.sampler object """
759 if self.sigma_is_sampled:
760 for sigmaname
in self.sigma_dictionary:
761 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
762 str(sigmaname) + self._label_suffix] = \
763 ([self.sigma_dictionary[sigmaname][0]],
764 self.sigma_dictionary[sigmaname][1])
765 if self.psi_is_sampled:
766 for psiname
in self.psi_dictionary:
767 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
768 str(psiname) + self._label_suffix] =\
769 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
772 def get_bonded_pairs(self):
773 return self.bonded_pairs
775 def get_number_of_restraints(self):
776 return self.rs.get_number_of_restraints()
779 return 'XL restraint with '+str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
783 """Read a stat file and load all the sigmas.
784 This is potentially quite stupid.
785 It's also a hack since the sigmas should be stored in the RMF file.
786 Also, requires one sigma and one psi for ALL XLs.
789 sig_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
790 "-s",
"AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
791 psi_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
792 "-s",
"AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
793 for nxl
in range(self.rs.get_number_of_restraints()):
794 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
797 for contr
in range(xl.get_number_of_contributions()):
798 sig1,sig2=xl.get_contribution_sigmas(contr)
801 print(
'loaded nuisances from file')
804 max_prob_for_violation=0.1,
805 min_dist_for_violation=1e9,
807 limit_to_chains=
None,
809 """Create CMM files, one for each state, of all crosslinks.
810 will draw in GREEN if non-violated in all states (or if only one state)
811 will draw in PURPLE if non-violated only in a subset of states (draws nothing elsewhere)
812 will draw in RED in ALL states if all violated
813 (if only one state, you'll only see green and red)
815 @param out_prefix Output xlink files prefix
816 @param max_prob_for_violation It's a violation if the probability is below this
817 @param min_dist_for_violation It's a violation if the min dist is above this
818 @param coarsen Use CA positions
819 @param limit_to_chains Try to visualize just these chains
820 @param exclude_to_chains Try to NOT visualize these chains
822 print(
'going to calculate violations and plot CMM files')
824 all_dists = [s[
"low_dist"]
for s
in all_stats]
830 cmds = defaultdict(set)
831 for nstate
in range(self.nstates):
832 outf=open(out_prefix+str(nstate)+
'.cmm',
'w')
833 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
836 print(
'will limit to',limit_to_chains)
837 print(
'will exclude',exclude_chains)
842 for nxl
in range(self.rs.get_number_of_restraints()):
846 for nstate
in range(self.nstates):
847 prob = state_info[nstate][nxl][
"prob"]
848 low_dist = state_info[nstate][nxl][
"low_dist"]
849 if prob<max_prob_for_violation
or low_dist>min_dist_for_violation:
857 if len(npass)==self.nstates:
859 elif len(nviol)==self.nstates:
863 print(nxl,
'state dists:',[state_info[nstate][nxl][
"low_dist"]
for nstate
in range(self.nstates)],
864 'viol states:',nviol,
'all viol?',all_viol)
865 for nstate
in range(self.nstates):
867 r=0.365; g=0.933; b=0.365;
870 r=0.980; g=0.302; b=0.247;
877 r=0.365; g=0.933; b=0.365;
879 pp = state_info[nstate][nxl][
"low_pp"]
888 cmds[nstate].add((ch1,r1))
889 cmds[nstate].add((ch2,r2))
891 outf = out_fns[nstate]
893 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
894 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,c1[0],c1[1],c1[2],r,g,b))
895 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
896 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv+1,c2[0],c2[1],c2[2],r,g,b))
897 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
898 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,nv+1,r,g,b))
901 for nstate
in range(self.nstates):
902 out_fns[nstate].write(
'</marker_set>\n')
903 out_fns[nstate].close()
905 for ch,r
in cmds[nstate]:
906 cmd+=
'#%i:%i.%s '%(nstate,r,ch)
910 def _get_contribution_info(self,xl,ncontr,use_CA=False):
911 """Return the particles at that contribution. If requested will return CA's instead"""
912 idx1=xl.get_contribution(ncontr)[0]
913 idx2=xl.get_contribution(ncontr)[1]
921 return idx1,idx2,dist
923 def get_best_stats(self,limit_to_state=None,limit_to_chains=None,exclude_chains='',use_CA=False):
924 ''' return the probability, best distance, two coords, and possibly the psi for each xl
925 @param limit_to_state Only examine contributions from one state
926 @param limit_to_chains Returns the particles for certain "easy to visualize" chains
927 @param exclude_chains Even if you limit, don't let one end be in this list.
928 Only works if you also limit chains
929 @param use_CA Limit to CA particles
932 for nxl
in range(self.rs.get_number_of_restraints()):
934 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
941 for contr
in range(xl.get_number_of_contributions()):
942 pp = xl.get_contribution(contr)
949 if limit_to_state
is not None:
951 if nstate!=limit_to_state:
953 state_contrs.append(contr)
956 if limit_to_chains
is not None:
959 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
960 c1
not in exclude_chains
and c2
not in exclude_chains):
961 if dist<low_dist_lim:
968 if limit_to_state
is not None:
969 this_info[
"prob"] = xl.evaluate_for_contributions(state_contrs,
None)
971 this_info[
"prob"] = xl.unprotected_evaluate(
None)
972 if limit_to_chains
is not None:
973 this_info[
"low_pp"] = low_pp_lim
975 this_info[
"low_pp"] = low_pp
977 this_info[
"low_dist"] = low_dist
980 this_info[
"psi"] = pval
981 ret.append(this_info)
984 def print_stats(self):
987 for nxl,s
in enumerate(stats):
993 output = super(AtomicCrossLinkMSRestraint, self).
get_output()
1004 for nxl,s
in enumerate(stats):
1005 if s[
'low_dist']>20.0:
1007 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"Prob")]=str(s[
'prob'])
1008 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"BestDist")]=str(s[
'low_dist'])
1009 if not self.one_psi:
1010 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"psi")]=str(s[
'psi'])
1011 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1017 """This restraint allows ambiguous crosslinking between multiple copies
1018 it is a variant of the SimplifiedCrossLinkMS
1028 self.m = representation.prot.get_model()
1034 self.outputlevel =
"low"
1035 self.expdistance = expdistance
1036 self.strength = strength
1038 fl = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1042 tokens = line.split()
1044 if (tokens[0] ==
"#"):
1053 resolution=resolution,
1055 name_is_ambiguous=
True,
1057 hrc1 = [representation.hier_db.particle_to_name[p]
for p
in ps1]
1059 print(
"ConnectivityCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1064 resolution=resolution,
1066 name_is_ambiguous=
True,
1068 hrc2 = [representation.hier_db.particle_to_name[p]
for p
in ps2]
1070 print(
"ConnectivityCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1077 if self.strength
is None:
1086 rad1 = rad1 / len(ps1)
1087 rad2 = rad2 / len(ps2)
1089 self.strength = 1 / (rad1 ** 2 + rad2 ** 2)
1097 self.rs.add_restraint(cr)
1098 self.pairs.append((ps1, hrc1, c1, r1, ps2, hrc2, c2, r2, cr))
1111 d1.set_radius(uncertainty1)
1112 d2.set_radius(uncertainty2)
1116 strength = 1 / (uncertainty1 ** 2 + uncertainty2 ** 2)
1123 for i
in range(npoints):
1127 scores.append(cr.unprotected_evaluate(
None))
1128 IMP.pmi.output.plot_xy_data(dists, scores)
1130 def set_label(self, label):
1132 self.rs.set_name(label)
1133 for r
in self.rs.get_restraints():
1136 def add_to_model(self):
1139 def get_restraint_sets(self):
1142 def get_restraint(self):
1145 def set_output_level(self, level="low"):
1147 self.outputlevel = level
1149 def set_weight(self, weight):
1150 self.weight = weight
1151 self.rs.set_weight(weight)
1153 def get_output(self):
1159 score = self.weight * self.rs.unprotected_evaluate(
None)
1160 output[
"_TotalScore"] = str(score)
1161 output[
"ConnectivityCrossLinkMS_Score_" + self.label] = str(score)
1162 for n, p
in enumerate(self.pairs):
1173 for n1, p1
in enumerate(ps1):
1176 for n2, p2
in enumerate(ps2):
1180 label = str(r1) +
":" + name1 +
"_" + str(r2) +
":" + name2
1181 output[
"ConnectivityCrossLinkMS_Distance_" +
1184 label = str(r1) +
":" + c1 +
"_" + str(r2) +
":" + c2
1185 output[
"ConnectivityCrossLinkMS_Score_" +
1186 label] = str(self.weight * cr.unprotected_evaluate(
None))
1190 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1191 class SimplifiedCrossLinkMS(object):
1201 spheredistancepairscore=
True):
1206 if columnmapping
is None:
1208 columnmapping[
"Protein1"] = 0
1209 columnmapping[
"Protein2"] = 1
1210 columnmapping[
"Residue1"] = 2
1211 columnmapping[
"Residue2"] = 3
1213 self.m = representation.prot.get_model()
1218 self.already_added_pairs = {}
1220 self.outputlevel =
"low"
1221 self.expdistance = expdistance
1222 self.strength = strength
1223 self.truncated = truncated
1224 self.spheredistancepairscore = spheredistancepairscore
1229 protein1 = columnmapping[
"Protein1"]
1230 protein2 = columnmapping[
"Protein2"]
1231 residue1 = columnmapping[
"Residue1"]
1232 residue2 = columnmapping[
"Residue2"]
1234 fl = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1238 tokens = line.split()
1240 if (tokens[0] ==
"#"):
1242 r1 = int(tokens[residue1])
1243 c1 = tokens[protein1]
1244 r2 = int(tokens[residue2])
1245 c2 = tokens[protein2]
1249 resolution=resolution,
1251 name_is_ambiguous=
False,
1255 resolution=resolution,
1257 name_is_ambiguous=
False,
1262 "residue %d of chain %s selects multiple particles; "
1264 % (r1, c1,
"".join(p.get_name()
for p
in ps1)))
1266 print(
"SimplifiedCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1271 "residue %d of chain %s selects multiple particles; "
1273 % (r2, c2,
"".join(p.get_name()
for p
in ps2)))
1275 print(
"SimplifiedCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1281 if (p1, p2)
in self.already_added_pairs:
1282 dr = self.already_added_pairs[(p1, p2)]
1283 weight = dr.get_weight()
1284 dr.set_weight(weight + 1.0)
1285 print(
"SimplifiedCrossLinkMS> crosslink %d %s %d %s was already found, adding 1.0 to the weight, weight is now %d" % (r1, c1, r2, c2, weight + 1.0))
1291 limit = self.strength * (self.expdistance + 15) ** 2 + 10.0
1302 if self.spheredistancepairscore:
1307 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2))
1309 self.rs.add_restraint(dr)
1310 self.pairs.append((p1, p2, dr, r1, c1, r2, c2))
1311 self.already_added_pairs[(p1, p2)] = dr
1312 self.already_added_pairs[(p2, p1)] = dr
1314 def set_label(self, label):
1317 def add_to_model(self):
1320 def get_restraint_sets(self):
1323 def get_restraint(self):
1326 def get_restraints(self):
1328 for r
in self.rs.get_restraints():
1329 rlist.append(IMP.core.PairRestraint.get_from(r))
1332 def get_particle_pairs(self):
1334 for i
in range(len(self.pairs)):
1335 p0 = self.pairs[i][0]
1336 p1 = self.pairs[i][1]
1337 ppairs.append((p0, p1))
1340 def set_output_level(self, level="low"):
1342 self.outputlevel = level
1344 def set_weight(self, weight):
1345 self.weight = weight
1346 self.rs.set_weight(weight)
1348 def plot_restraint(self, radius1, radius2, maxdist=50, npoints=10):
1354 d1.set_radius(radius1)
1355 d2.set_radius(radius2)
1357 limit = self.strength * (self.expdistance + 15) ** 2 + 10.0
1372 for i
in range(npoints):
1376 scores.append(dr.unprotected_evaluate(
None))
1377 IMP.pmi.output.plot_xy_data(dists, scores)
1379 def get_output(self):
1385 score = self.weight * self.rs.unprotected_evaluate(
None)
1386 output[
"_TotalScore"] = str(score)
1387 output[
"SimplifiedCrossLinkMS_Score_" + self.label] = str(score)
1388 for i
in range(len(self.pairs)):
1390 p0 = self.pairs[i][0]
1391 p1 = self.pairs[i][1]
1392 crosslinker =
'standard'
1393 ln = self.pairs[i][2]
1394 resid1 = self.pairs[i][3]
1395 chain1 = self.pairs[i][4]
1396 resid2 = self.pairs[i][5]
1397 chain2 = self.pairs[i][6]
1399 label = str(resid1) +
":" + chain1 +
"_" + \
1400 str(resid2) +
":" + chain2
1401 output[
"SimplifiedCrossLinkMS_Score_" + crosslinker +
"_" +
1402 label] = str(self.weight * ln.unprotected_evaluate(
None))
1404 if self.spheredistancepairscore:
1410 output[
"SimplifiedCrossLinkMS_Distance_" +
1417 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1418 class SigmoidalCrossLinkMS(object):
1420 self, representation, restraints_file, inflection, slope, amplitude,
1421 linear_slope, resolution=
None, columnmapping=
None, csvfile=
False,
1422 filters=
None, filelabel=
"None"):
1428 if columnmapping
is None:
1430 columnmapping[
"Protein1"] = 0
1431 columnmapping[
"Protein2"] = 1
1432 columnmapping[
"Residue1"] = 2
1433 columnmapping[
"Residue2"] = 3
1439 db = tools.get_db_from_csv(restraints_file)
1441 db = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1443 self.m = representation.prot.get_model()
1449 self.already_added_pairs = {}
1450 self.inflection = inflection
1452 self.amplitude = amplitude
1453 self.linear_slope = linear_slope
1455 self.outputlevel =
"low"
1460 protein1 = columnmapping[
"Protein1"]
1461 protein2 = columnmapping[
"Protein2"]
1462 residue1 = columnmapping[
"Residue1"]
1463 residue2 = columnmapping[
"Residue2"]
1465 indb = open(
"included." + filelabel +
".xl.db",
"w")
1466 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
1467 midb = open(
"missing." + filelabel +
".xl.db",
"w")
1471 tokens = entry.split()
1473 if (tokens[0] ==
"#"):
1475 r1 = int(tokens[residue1])
1476 c1 = tokens[protein1]
1477 r2 = int(tokens[residue2])
1478 c2 = tokens[protein2]
1480 if filters
is not None:
1481 if eval(tools.cross_link_db_filter_parser(filters)) ==
False:
1482 exdb.write(str(entry) +
"\n")
1484 indb.write(str(entry) +
"\n")
1485 r1 = int(entry[residue1])
1486 c1 = entry[protein1]
1487 r2 = int(entry[residue2])
1488 c2 = entry[protein2]
1492 resolution=resolution,
1494 name_is_ambiguous=
False,
1498 resolution=resolution,
1500 name_is_ambiguous=
False,
1504 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
1506 print(
"SigmoidalCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1507 midb.write(str(entry) +
"\n")
1511 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
1513 print(
"SigmoidalCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1514 midb.write(str(entry) +
"\n")
1520 if (p1, p2)
in self.already_added_pairs:
1521 dr = self.already_added_pairs[(p1, p2)]
1522 weight = dr.get_weight()
1523 dr.increment_amplitude(amplitude)
1524 print(
"SigmoidalCrossLinkMS> crosslink %d %s %d %s was already found, adding %d to the amplitude, amplitude is now %d" % (r1, c1, r2, c2, amplitude, dr.get_amplitude()))
1525 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2)
1526 +
"-ampl:" + str(dr.get_amplitude()))
1539 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2)
1540 +
"-ampl:" + str(dr.get_amplitude()))
1542 self.rs.add_restraint(dr)
1544 self.pairs.append((p1, p2, dr, r1, c1, r2, c2))
1545 self.already_added_pairs[(p1, p2)] = dr
1546 self.already_added_pairs[(p2, p1)] = dr
1548 def set_label(self, label):
1551 def add_to_model(self):
1554 def get_restraint_sets(self):
1557 def get_restraint(self):
1560 def get_restraints(self):
1562 for r
in self.rs.get_restraints():
1563 rlist.append(IMP.core.PairRestraint.get_from(r))
1566 def get_particle_pairs(self):
1568 for i
in range(len(self.pairs)):
1569 p0 = self.pairs[i][0]
1570 p1 = self.pairs[i][1]
1571 ppairs.append((p0, p1))
1574 def set_output_level(self, level="low"):
1576 self.outputlevel = level
1578 def set_weight(self, weight):
1579 self.weight = weight
1580 self.rs.set_weight(weight)
1582 def plot_restraint(self, radius1, radius2, maxdist=50, npoints=10):
1587 d1.set_radius(radius1)
1588 d2.set_radius(radius2)
1599 for i
in range(npoints):
1603 scores.append(dr.unprotected_evaluate(
None))
1604 IMP.pmi.output.plot_xy_data(dists, scores)
1606 def get_output(self):
1612 score = self.weight * self.rs.unprotected_evaluate(
None)
1613 output[
"_TotalScore"] = str(score)
1614 output[
"SigmoidalCrossLinkMS_Score_" + self.label] = str(score)
1615 for i
in range(len(self.pairs)):
1617 p0 = self.pairs[i][0]
1618 p1 = self.pairs[i][1]
1619 crosslinker =
'standard'
1620 ln = self.pairs[i][2]
1621 resid1 = self.pairs[i][3]
1622 chain1 = self.pairs[i][4]
1623 resid2 = self.pairs[i][5]
1624 chain2 = self.pairs[i][6]
1626 label = str(resid1) +
":" + chain1 +
"_" + \
1627 str(resid2) +
":" + chain2
1628 output[
"SigmoidalCrossLinkMS_Score_" + crosslinker +
"_" +
1629 label +
"_" + self.label] = str(self.weight * ln.unprotected_evaluate(
None))
1632 output[
"SigmoidalCrossLinkMS_Distance_" +
1637 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1638 class ISDCrossLinkMS(IMP.pmi.restraints._NuisancesBase):
1639 def __init__(self, representation,
1659 automatic_sigma_classification=
False,
1660 attributes_for_label=
None):
1670 if type(representation) != list:
1671 representations = [representation]
1673 representations = representation
1675 if columnmapping
is None:
1677 columnmapping[
"Protein1"] = 0
1678 columnmapping[
"Protein2"] = 1
1679 columnmapping[
"Residue1"] = 2
1680 columnmapping[
"Residue2"] = 3
1681 columnmapping[
"IDScore"] = 4
1682 columnmapping[
"XLUniqueID"] = 5
1688 db = IMP.pmi.tools.get_db_from_csv(restraints_file)
1690 db = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1692 indb = open(
"included." + filelabel +
".xl.db",
"w")
1693 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
1694 midb = open(
"missing." + filelabel +
".xl.db",
"w")
1696 self.m = representations[0].prot.get_model()
1706 self.sigma_dictionary = {}
1707 self.psi_dictionary = {}
1708 self.psi_is_sampled =
True
1709 self.sigma_is_sampled =
True
1711 self.dataset = representations[0].get_file_dataset(restraints_file)
1712 if not self.dataset
and os.path.exists(restraints_file):
1716 xl_groups = [p.get_cross_link_group(self)
1717 for p
in representations[0]._protocol_output]
1722 self.ids_map = IMP.pmi.tools.map()
1723 self.ids_map.set_map_element(20.0, 0.05)
1724 self.ids_map.set_map_element(65.0, 0.01)
1726 self.ids_map = ids_map
1728 if radius_map
is None:
1729 self.radius_map = IMP.pmi.tools.map()
1730 if automatic_sigma_classification:
1731 self.radius_map.set_map_element(10, 10)
1732 self.radius_map.set_map_element(1, 1)
1734 self.radius_map = radius_map
1736 self.outputlevel =
"low"
1740 self.linear.set_slope(slope)
1743 protein1 = columnmapping[
"Protein1"]
1744 protein2 = columnmapping[
"Protein2"]
1745 residue1 = columnmapping[
"Residue1"]
1746 residue2 = columnmapping[
"Residue2"]
1747 idscore = columnmapping[
"IDScore"]
1749 xluniqueid = columnmapping[
"XLUniqueID"]
1759 uniqueid_restraints_map = {}
1761 for nxl, entry
in enumerate(db):
1763 if not jackknifing
is None:
1770 raise NotImplementedError(
"jackknifing not yet implemented")
1773 tokens = entry.split()
1778 if (tokens[0] ==
"#"):
1781 r1 = int(tokens[residue1])
1782 c1 = tokens[protein1]
1783 r2 = int(tokens[residue2])
1784 c2 = tokens[protein2]
1786 if offset_dict
is not None:
1787 if c1
in offset_dict: r1+=offset_dict[c1]
1788 if c2
in offset_dict: r2+=offset_dict[c2]
1790 if rename_dict
is not None:
1791 if c1
in rename_dict: c1=rename_dict[c1]
1792 if c2
in rename_dict: c2=rename_dict[c2]
1797 ids = float(tokens[idscore])
1798 if xluniqueid
is None:
1801 xlid = tokens[xluniqueid]
1803 print(
"this line was not accessible " + str(entry))
1804 if residue1
not in entry: print(str(residue1)+
" keyword not in database")
1805 if residue2
not in entry: print(str(residue2)+
" keyword not in database")
1806 if protein1
not in entry: print(str(protein1)+
" keyword not in database")
1807 if protein2
not in entry: print(str(protein2)+
" keyword not in database")
1808 if idscore
not in entry: print(str(idscore)+
" keyword not in database")
1809 if xluniqueid
not in entry: print(str(xluniqueid)+
" keyword not in database")
1813 if filters
is not None:
1815 exdb.write(str(entry) +
"\n")
1819 r1 = int(entry[residue1])
1820 c1 = entry[protein1]
1821 r2 = int(entry[residue2])
1822 c2 = entry[protein2]
1824 if offset_dict
is not None:
1825 if c1
in offset_dict: r1+=offset_dict[c1]
1826 if c2
in offset_dict: r2+=offset_dict[c2]
1828 if rename_dict
is not None:
1829 if c1
in rename_dict: c1=rename_dict[c1]
1830 if c2
in rename_dict: c2=rename_dict[c2]
1836 ids = float(entry[idscore])
1838 ids = entry[idscore]
1839 if xluniqueid
is None:
1842 xlid = entry[xluniqueid]
1845 print(
"this line was not accessible " + str(entry))
1846 if residue1
not in entry: print(str(residue1)+
" keyword not in database")
1847 if residue2
not in entry: print(str(residue2)+
" keyword not in database")
1848 if protein1
not in entry: print(str(protein1)+
" keyword not in database")
1849 if protein2
not in entry: print(str(protein2)+
" keyword not in database")
1850 if idscore
not in entry: print(str(idscore)+
" keyword not in database")
1851 if xluniqueid
not in entry: print(str(xluniqueid)+
" keyword not in database")
1855 ex_xls = [p.add_experimental_cross_link(r1, c1, r2, c2,
1857 for p, group
in zip(representations[0]._protocol_output,
1860 for nstate, r
in enumerate(representations):
1865 resolution=resolution,
1867 name_is_ambiguous=
False,
1871 resolution=resolution,
1873 name_is_ambiguous=
False,
1877 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
1879 print(
"ISDCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1880 midb.write(str(entry) +
"\n")
1884 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
1886 print(
"ISDCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1887 midb.write(str(entry) +
"\n")
1893 if (p1 == p2)
and (r1 == r2) :
1894 print(
"ISDCrossLinkMS Restraint: WARNING> on the identical bead particles and the identical residues, thus skipping this cross-link.")
1897 if xlid
in uniqueid_restraints_map:
1898 print(
"Appending a crosslink restraint into the uniqueID %s" % str(xlid))
1899 dr = uniqueid_restraints_map[xlid]
1901 print(
"Generating a NEW crosslink restraint with a uniqueID %s" % str(xlid))
1906 restraints.append(dr)
1907 uniqueid_restraints_map[xlid] = dr
1909 mappedr1 = self.radius_map.get_map_element(
1911 sigma1 = self.get_sigma(mappedr1)[0]
1912 mappedr2 = self.radius_map.get_map_element(
1914 sigma2 = self.get_sigma(mappedr2)[0]
1916 psival = self.ids_map.get_map_element(ids)
1917 psi = self.get_psi(psival)[0]
1920 p1i = p1.get_particle_index()
1921 p2i = p2.get_particle_index()
1929 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
1930 print(
"--------------")
1931 print(
"ISDCrossLinkMS: generating cross-link restraint between")
1932 print(
"ISDCrossLinkMS: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
1933 print(
"ISDCrossLinkMS: with sigma1 %f sigma2 %f psi %s" % (mappedr1, mappedr2, psival))
1934 print(
"ISDCrossLinkMS: between particles %s and %s" % (p1.get_name(), p2.get_name()))
1935 print(
"==========================================\n")
1936 indb.write(str(entry) +
"\n")
1937 for p, ex_xl
in zip(representations[0]._protocol_output,
1939 p.add_cross_link(ex_xl, p1, p2, sigma1, sigma2, psi)
1946 xlattribute =
"intrarb"
1948 xlattribute =
"interrb"
1951 if not attributes_for_label
is None:
1952 for a
in attributes_for_label:
1953 xlattribute = xlattribute +
"_" + str(entry[a])
1955 xlattribute = xlattribute +
"-State:" + str(nstate)
1958 xlattribute +
"-" + c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2) +
"_" + self.label)
1963 xlattribute +
"-" + c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2) +
"_" + self.label)
1964 self.rslin.add_restraint(pr)
1984 self.rs.add_restraint(lw)
1986 def set_weight(self, weight):
1987 self.weight = weight
1988 self.rs.set_weight(weight)
1990 def set_slope_linear_term(self, slope):
1991 self.linear.set_slope(slope)
1993 def set_label(self, label):
1996 def add_to_model(self):
2003 def get_hierarchies(self):
2006 def get_restraint_sets(self):
2009 def get_restraint(self):
2012 def get_restraint_for_rmf(self):
2015 def get_restraints(self):
2017 for r
in self.rs.get_restraints():
2018 rlist.append(IMP.core.PairRestraint.get_from(r))
2021 def get_particle_pairs(self):
2023 for i
in range(len(self.pairs)):
2024 p0 = self.pairs[i][0]
2025 p1 = self.pairs[i][1]
2026 ppairs.append((p0, p1))
2029 def set_output_level(self, level="low"):
2031 self.outputlevel = level
2033 def set_psi_is_sampled(self, is_sampled=True):
2034 self.psi_is_sampled = is_sampled
2036 def set_sigma_is_sampled(self, is_sampled=True):
2037 self.sigma_is_sampled = is_sampled
2039 def get_label(self,pairs_index):
2040 resid1 = self.pairs[pairs_index][3]
2041 chain1 = self.pairs[pairs_index][4]
2042 resid2 = self.pairs[pairs_index][5]
2043 chain2 = self.pairs[pairs_index][6]
2044 attribute = self.pairs[pairs_index][7]
2045 rad1 = self.pairs[pairs_index][8]
2046 rad2 = self.pairs[pairs_index][9]
2047 psi = self.pairs[pairs_index][10]
2048 xlid= self.pairs[pairs_index][11]
2049 label = attribute +
"-" + \
2050 str(resid1) +
":" + chain1 +
"_" + str(resid2) +
":" + \
2051 chain2 +
"-" + str(rad1) +
"-" + str(rad2) +
"-" + str(psi)
2054 def write_db(self,filename):
2055 cldb=IMP.pmi.output.CrossLinkIdentifierDatabase()
2057 for pairs_index
in range(len(self.pairs)):
2059 resid1 = self.pairs[pairs_index][3]
2060 chain1 = self.pairs[pairs_index][4]
2061 resid2 = self.pairs[pairs_index][5]
2062 chain2 = self.pairs[pairs_index][6]
2063 attribute = self.pairs[pairs_index][7]
2064 rad1 = self.pairs[pairs_index][8]
2065 rad2 = self.pairs[pairs_index][9]
2066 psi = self.pairs[pairs_index][10]
2067 xlid= self.pairs[pairs_index][11]
2068 nstate=self.pairs[pairs_index][12]
2069 ids=self.pairs[pairs_index][13]
2071 label=self.get_label(pairs_index)
2072 cldb.set_unique_id(label,xlid)
2073 cldb.set_protein1(label,chain1)
2074 cldb.set_protein2(label,chain2)
2075 cldb.set_residue1(label,resid1)
2076 cldb.set_residue2(label,resid2)
2077 cldb.set_idscore(label,ids)
2078 cldb.set_state(label,nstate)
2079 cldb.set_sigma1(label,rad1)
2080 cldb.set_sigma2(label,rad2)
2081 cldb.set_psi(label,psi)
2082 cldb.write(filename)
2085 def get_output(self):
2091 score = self.weight * self.rs.unprotected_evaluate(
None)
2092 output[
"_TotalScore"] = str(score)
2093 output[
"ISDCrossLinkMS_Data_Score_" + self.label] = str(score)
2094 output[
"ISDCrossLinkMS_PriorSig_Score_" +
2095 self.label] = self.rssig.unprotected_evaluate(
None)
2096 output[
"ISDCrossLinkMS_PriorPsi_Score_" +
2097 self.label] = self.rspsi.unprotected_evaluate(
None)
2098 output[
"ISDCrossLinkMS_Linear_Score_" +
2099 self.label] = self.rslin.unprotected_evaluate(
None)
2100 for i
in range(len(self.pairs)):
2102 label=self.get_label(i)
2103 ln = self.pairs[i][2]
2104 p0 = self.pairs[i][0]
2105 p1 = self.pairs[i][1]
2106 output[
"ISDCrossLinkMS_Score_" +
2107 label +
"_" + self.label] = str(self.weight * -log(ln.unprotected_evaluate(
None)))
2111 output[
"ISDCrossLinkMS_Distance_" +
2115 for psiindex
in self.psi_dictionary:
2116 output[
"ISDCrossLinkMS_Psi_" +
2117 str(psiindex) +
"_" + self.label] = str(self.psi_dictionary[psiindex][0].get_scale())
2119 for resolution
in self.sigma_dictionary:
2120 output[
"ISDCrossLinkMS_Sigma_" +
2121 str(resolution) +
"_" + self.label] = str(self.sigma_dictionary[resolution][0].get_scale())
2126 def get_particles_to_sample(self):
2128 for resolution
in self.sigma_dictionary:
2129 if self.sigma_dictionary[resolution][2]
and self.sigma_is_sampled:
2130 ps[
"Nuisances_ISDCrossLinkMS_Sigma_" + str(resolution) +
"_" + self.label] =\
2131 ([self.sigma_dictionary[resolution][0]],
2132 self.sigma_dictionary[resolution][1])
2133 if self.psi_is_sampled:
2134 for psiindex
in self.psi_dictionary:
2135 if self.psi_dictionary[psiindex][2]:
2136 ps[
"Nuisances_ISDCrossLinkMS_Psi_" +
2137 str(psiindex) +
"_" + self.label] = ([self.psi_dictionary[psiindex][0]], self.psi_dictionary[psiindex][1])
2141 class CysteineCrossLinkRestraint(object):
2142 def __init__(self, representations, filename, cbeta=False,
2143 betatuple=(0.03, 0.1),
2144 disttuple=(0.0, 25.0, 1000),
2145 omegatuple=(1.0, 1000.0, 50),
2146 sigmatuple=(0.3, 0.3, 1),
2148 sigmaissampled=
False,
2149 weightissampled=
True,
2150 epsilonissampled=
True
2157 self.representations = representations
2158 self.m = self.representations[0].prot.get_model()
2161 self.epsilonmaxtrans = 0.01
2162 self.sigmamaxtrans = 0.1
2163 self.betamaxtrans = 0.01
2164 self.weightmaxtrans = 0.1
2166 self.outputlevel =
"low"
2167 self.betaissampled = betaissampled
2168 self.sigmaissampled = sigmaissampled
2169 self.weightissampled = weightissampled
2170 self.epsilonissampled = epsilonissampled
2172 betalower = betatuple[0]
2173 betaupper = betatuple[1]
2180 self.beta = IMP.pmi.tools.SetupNuisance(
2185 betaissampled).get_particle(
2188 self.sigma = IMP.pmi.tools.SetupNuisance(
2193 sigmaissampled).get_particle()
2195 self.weight = IMP.pmi.tools.SetupWeight(
2197 weightissampled).get_particle(
2201 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
2210 if t[5]
in self.epsilons:
2211 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
2212 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
2214 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(self.m,
2215 0.01, 0.01, 1.0 - float(t[4]), epsilonissampled).get_particle()
2216 up = self.epsilons[t[5]].get_upper()
2217 low = self.epsilons[t[5]].get_lower()
2219 self.epsilons[t[5]].set_upper(low)
2221 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
2225 crossdata = IMP.pmi.tools.get_cross_link_data(
2226 "cysteine",
"cysteine_CA_FES.txt.standard",
2227 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
2229 crossdata = IMP.pmi.tools.get_cross_link_data(
2230 "cysteine",
"cysteine_CB_FES.txt.standard",
2231 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
2234 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
2235 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
2236 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
2239 print(
"--------------")
2240 print(
"CysteineCrossLink: attempting to create a restraint " + str(d))
2260 self.epsilons[epslabel],
2266 for i, representation
in enumerate(self.representations):
2273 resolution=1, name=chain1,
2274 name_is_ambiguous=
False, residue=resid1)[0]
2280 resolution=1, name=chain2,
2281 name_is_ambiguous=
False, residue=resid2)[0]
2290 for t
in range(-1, 2):
2292 resolution=1, name=chain1,
2293 name_is_ambiguous=
False, residue=resid1 + t)
2299 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid1 + t, chain1))
2302 resolution=1, name=chain2,
2303 name_is_ambiguous=
False, residue=resid2 + t)
2309 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid2 + t, chain2))
2312 if (p1
is not None and p2
is not None):
2313 ccl.add_contribution(p1, p2)
2317 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2,
IMP.core.get_distance(d1, d2))
2320 if (len(p1) == 3
and len(p2) == 3):
2321 p11n = p1[0].get_name()
2322 p12n = p1[1].get_name()
2323 p13n = p1[2].get_name()
2324 p21n = p2[0].get_name()
2325 p22n = p2[1].get_name()
2326 p23n = p2[2].get_name()
2328 print(
"CysteineCrossLink: generating CB cysteine cross-link restraint between")
2329 print(
"CysteineCrossLink: residue %d of chain %s and residue %d of chain %s" % (resid1, chain1, resid2, chain2))
2330 print(
"CysteineCrossLink: between particles %s %s %s and %s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
2332 ccl.add_contribution(p1, p2)
2335 self.rs.add_restraint(ccl)
2336 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
2337 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
2339 def set_label(self, label):
2342 def add_to_model(self):
2345 def get_particles_to_sample(self):
2347 if self.epsilonissampled:
2348 for eps
in self.epsilons.keys():
2349 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
2350 self.label] = ([self.epsilons[eps]], self.epsilonmaxtrans)
2351 if self.betaissampled:
2352 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
2353 self.label] = ([self.beta], self.betamaxtrans)
2354 if self.weightissampled:
2355 ps[
"Weights_CysteineCrossLinkRestraint_" +
2356 self.label] = ([self.weight], self.weightmaxtrans)
2357 if self.sigmaissampled:
2358 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
2359 self.label] = ([self.sigma], self.sigmamaxtrans)
2362 def set_output_level(self, level="low"):
2364 self.outputlevel = level
2366 def get_restraint(self):
2369 def get_sigma(self):
2372 def get_output(self):
2375 score = self.rs.unprotected_evaluate(
None)
2376 output[
"_TotalScore"] = str(score)
2377 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
2378 output[
"CysteineCrossLinkRestraint_sigma_" +
2379 self.label] = str(self.sigma.get_scale())
2380 for eps
in self.epsilons.keys():
2381 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
2382 self.label] = str(self.epsilons[eps].get_scale())
2383 output[
"CysteineCrossLinkRestraint_beta_" +
2384 self.label] = str(self.beta.get_scale())
2385 for n
in range(self.weight.get_number_of_states()):
2386 output[
"CysteineCrossLinkRestraint_weights_" +
2387 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
2389 if self.outputlevel ==
"high":
2390 for rst
in self.rs.get_restraints():
2391 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
2392 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
2393 "_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_model_frequency()
2394 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
2395 IMP.isd.CysteineCrossLinkRestraint.get_from(
2398 + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_standard_error()
2399 if len(self.representations) > 1:
2400 for i
in range(len(self.prots)):
2401 output[
"CysteineCrossLinkRestraint_Frequency_Contribution_" +
2402 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
2403 "_State_" + str(i) +
"_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_frequencies()[i]
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
A function that is harmonic over an interval.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
Upper bound harmonic function (non-zero when feature > mean)
static XYZR setup_particle(Model *m, ParticleIndex pi)
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
Add uncertainty to a particle.
A score on the distance between the surfaces of two spheres.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
def get_restraint_for_rmf
Get the restraint for visualization in an RMF file.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
Simple sigmoidal score calculated between sphere surfaces.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
This restraint allows ambiguous crosslinking between multiple copies it is a variant of the Simplifie...
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all crosslinks.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the part...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
Class to handle individual particles of a Model object.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Restraint * create_connectivity_restraint(const Selections &s, double x0, double k, std::string name="Connectivity%1%")
Create a restraint connecting the selections.
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
def get_output
Get outputs to write to stat files.
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Base class for PMI restraints, which wrap IMP.Restraint(s).
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.
A decorator for a particle with x,y,z coordinates and a radius.