1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling crosslinking data.
5 from __future__
import print_function
18 from collections
import defaultdict
24 """Setup cross-link distance restraints from mass spectrometry data.
25 The noise in the data and the structural uncertainty of cross-linked amino-acids
26 is inferred using Bayes theory of probability
27 \note Wraps an IMP::isd::CrossLinkMSRestraint
29 def __init__(self, representation=None,
31 CrossLinkDataBase=
None,
37 attributes_for_label=
None,
40 @param representation DEPRECATED The IMP.pmi.representation.Representation
41 object that contain the molecular system
42 @param root_hier The canonical hierarchy containing all the states
43 @param CrossLinkDataBase The IMP.pmi.io.crosslink.CrossLinkDataBase
44 object that contains the cross-link dataset
45 @param length maximal cross-linker length (including the residue sidechains)
46 @param resolution what representation resolution should the cross-link
47 restraint be applied to.
48 @param slope The slope of a distance-linear scoring function that
49 funnels the score when the particles are
50 too far away. Suggested value 0.02.
51 @param label the extra text to label the restraint so that it is
52 searchable in the output
53 @param filelabel automatically generated file containing missing/included/excluded
54 cross-links will be labeled using this text
55 @param attributes_for_label
56 @param weight Weight of restraint
60 if representation
is not None:
62 if type(representation) != list:
63 representations = [representation]
65 representations = representation
66 m = representations[0].prot.get_model()
67 elif root_hier
is not None:
68 m = root_hier.get_model()
70 raise Exception(
"You must pass either representation or root_hier")
72 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
73 m, weight=weight, label=label)
75 if CrossLinkDataBase
is None:
76 raise Exception(
"You must pass a CrossLinkDataBase")
77 if not isinstance(CrossLinkDataBase,IMP.pmi.io.crosslink.CrossLinkDataBase):
78 raise TypeError(
"CrossLinkingMassSpectrometryRestraint: CrossLinkDataBase should be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
79 self.CrossLinkDataBase = CrossLinkDataBase
81 if resolution==0
or resolution
is None:
82 raise Exception(
"You must pass a resolution and it can't be zero")
84 indb = open(
"included." + filelabel +
".xl.db",
"w")
85 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
86 midb = open(
"missing." + filelabel +
".xl.db",
"w")
88 self.rs.set_name(self.rs.get_name() +
"_Data")
89 self.rspsi = self._create_restraint_set(
"PriorPsi")
90 self.rssig = self._create_restraint_set(
"PriorSig")
91 self.rslin = self._create_restraint_set(
"Linear")
95 self.linear.set_slope(0.0)
98 self.psi_is_sampled =
True
99 self.sigma_is_sampled =
True
100 self.psi_dictionary={}
101 self.sigma_dictionary={}
103 self.outputlevel =
"low"
109 copies_to_add = defaultdict(int)
110 print(
'gathering copies')
111 for xlid
in self.CrossLinkDataBase.xlid_iterator():
112 for xl
in self.CrossLinkDataBase[xlid]:
113 r1 = xl[self.CrossLinkDataBase.residue1_key]
114 c1 = xl[self.CrossLinkDataBase.protein1_key]
115 r2 = xl[self.CrossLinkDataBase.residue2_key]
116 c2 = xl[self.CrossLinkDataBase.protein2_key]
117 for c,r
in ((c1,r1),(c2,r2)):
118 if c
in copies_to_add:
124 resolution=resolution).get_selected_particles()
126 copies_to_add[c] = len(sel)-1
128 for molname
in copies_to_add:
129 if copies_to_add[molname]==0:
132 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein1_key,molname+
'.0',fo1)
134 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein2_key,molname+
'.0',fo2)
135 for ncopy
in range(copies_to_add[molname]):
136 self.CrossLinkDataBase.clone_protein(
'%s.0'%molname,
'%s.%i'%(molname,ncopy+1))
137 print(
'done pmi2 prelims')
139 for xlid
in self.CrossLinkDataBase.xlid_iterator():
140 new_contribution=
True
141 for xl
in self.CrossLinkDataBase[xlid]:
143 r1 = xl[self.CrossLinkDataBase.residue1_key]
144 c1 = xl[self.CrossLinkDataBase.protein1_key]
145 r2 = xl[self.CrossLinkDataBase.residue2_key]
146 c2 = xl[self.CrossLinkDataBase.protein2_key]
149 iterlist = range(len(IMP.atom.get_by_type(root_hier,IMP.atom.STATE_TYPE)))
151 iterlist = representations
152 for nstate, r
in enumerate(iterlist):
154 xl[self.CrossLinkDataBase.state_key]=nstate
155 xl[self.CrossLinkDataBase.data_set_name_key]=self.label
163 name1,copy1 = c1.split(
'.')
165 name2,copy2 = c2.split(
'.')
169 copy_index=int(copy1),
171 resolution=resolution).get_selected_particles()
175 copy_index=int(copy2),
177 resolution=resolution).get_selected_particles()
184 resolution=resolution,
186 name_is_ambiguous=
False,
190 resolution=resolution,
192 name_is_ambiguous=
False,
196 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
198 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> residue %d of chain %s is not there" % (r1, c1))
199 midb.write(str(xl) +
"\n")
203 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
205 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> residue %d of chain %s is not there" % (r2, c2))
206 midb.write(str(xl) +
"\n")
212 if p1 == p2
and r1 == r2:
213 print(
"CrossLinkingMassSpectrometryRestraint: WARNING> same particle and same residue, skipping cross-link")
217 print(
"generating a new crosslink restraint")
218 new_contribution=
False
223 restraints.append(dr)
226 if self.CrossLinkDataBase.sigma1_key
not in xl.keys():
228 xl[self.CrossLinkDataBase.sigma1_key]=sigma1name
230 sigma1name=xl[self.CrossLinkDataBase.sigma1_key]
233 if self.CrossLinkDataBase.sigma2_key
not in xl.keys():
235 xl[self.CrossLinkDataBase.sigma2_key]=sigma2name
237 sigma2name=xl[self.CrossLinkDataBase.sigma2_key]
240 if self.CrossLinkDataBase.psi_key
not in xl.keys():
242 xl[self.CrossLinkDataBase.psi_key]=psiname
244 psiname=xl[self.CrossLinkDataBase.psi_key]
249 p1i = p1.get_particle_index()
251 p2i = p2.get_particle_index()
254 xl[
"Particle_sigma1"]=sigma1
256 xl[
"Particle_sigma2"]=sigma2
258 xl[
"Particle_psi"]=psi
260 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
263 print(
"--------------")
264 print(
"CrossLinkingMassSpectrometryRestraint: generating cross-link restraint between")
265 print(
"CrossLinkingMassSpectrometryRestraint: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
266 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 %s sigma2 %s psi %s" % (sigma1name, sigma2name, psiname))
267 print(
"CrossLinkingMassSpectrometryRestraint: between particles %s and %s" % (p1.get_name(), p2.get_name()))
268 print(
"==========================================\n")
275 xl[
"IntraRigidBody"]=
True
277 xl[
"IntraRigidBody"]=
False
279 xl_label=self.CrossLinkDataBase.get_short_cross_link_string(xl)
280 xl[
"ShortLabel"]=xl_label
281 dr.set_name(xl_label)
285 pr.set_name(xl_label)
286 self.rslin.add_restraint(pr)
288 self.xl_list.append(xl)
290 indb.write(str(xl) +
"\n")
292 if len(self.xl_list) == 0:
293 raise SystemError(
"CrossLinkingMassSpectrometryRestraint: no crosslink was constructed")
296 self.rs.add_restraint(lw)
299 """ get the hierarchy """
303 """ get the restraint set """
307 """ get the restraints in a list """
309 for r
in self.rs.get_restraints():
310 rlist.append(IMP.core.PairRestraint.get_from(r))
314 """ get the dummy restraints to be displayed in the rmf file """
318 """ Get a list of tuples containing the particle pairs """
320 for i
in range(len(self.pairs)):
321 p0 = self.pairs[i][0]
322 p1 = self.pairs[i][1]
323 ppairs.append((p0, p1))
327 """ Set the output level of the output """
328 self.outputlevel = level
331 """ Switch on/off the sampling of psi particles """
332 self.psi_is_sampled = is_sampled
335 """ Switch on/off the sampling of sigma particles """
336 self.sigma_is_sampled = is_sampled
340 """ This is called internally. Creates a nuisance
341 on the structural uncertainty """
342 if name
in self.sigma_dictionary:
343 return self.sigma_dictionary[name][0]
346 sigmaminnuis = 0.0000001
347 sigmamaxnuis = 1000.0
351 sigma = IMP.pmi.tools.SetupNuisance(self.m, sigmainit,
352 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
353 self.sigma_dictionary[name] = (
356 self.sigma_is_sampled)
357 self.rssig.add_restraint(
367 """ This is called internally. Creates a nuisance
368 on the data uncertainty """
369 if name
in self.psi_dictionary:
370 return self.psi_dictionary[name][0]
373 psiminnuis = 0.0000001
374 psimaxnuis = 0.4999999
378 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
379 psiminnuis, psimaxnuis,
380 self.psi_is_sampled).get_particle()
381 self.psi_dictionary[name] = (
386 self.rspsi.add_restraint(
398 """ Get the output of the restraint to be used by the IMP.pmi.output object"""
399 output = super(CrossLinkingMassSpectrometryRestraint, self).
get_output()
401 for xl
in self.xl_list:
403 xl_label=xl[
"ShortLabel"]
407 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
408 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
412 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
416 for psiname
in self.psi_dictionary:
417 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
418 str(psiname) + self._label_suffix] = str(
419 self.psi_dictionary[psiname][0].get_scale())
421 for sigmaname
in self.sigma_dictionary:
422 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
423 str(sigmaname) + self._label_suffix] = str(
424 self.sigma_dictionary[sigmaname][0].get_scale())
430 """ Get all need data to construct a mover in IMP.pmi.dof class"""
432 if self.sigma_is_sampled:
433 for sigmaname
in self.sigma_dictionary:
434 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" + str(sigmaname) +
"_" + self.label
435 particle=self.sigma_dictionary[sigmaname][0]
436 maxstep=(self.sigma_dictionary[sigmaname][1])
439 mv.set_name(mover_name)
442 if self.psi_is_sampled:
443 for psiname
in self.psi_dictionary:
444 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + str(psiname) +
"_" + self.label
445 particle=self.psi_dictionary[psiname][0]
446 maxstep=(self.psi_dictionary[psiname][1])
449 mv.set_name(mover_name)
456 """ Get the particles to be sampled by the IMP.pmi.sampler object """
458 if self.sigma_is_sampled:
459 for sigmaname
in self.sigma_dictionary:
460 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
461 str(sigmaname) + self._label_suffix] =\
462 ([self.sigma_dictionary[sigmaname][0]],
463 self.sigma_dictionary[sigmaname][1])
465 if self.psi_is_sampled:
466 for psiname
in self.psi_dictionary:
467 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
468 str(psiname) + self._label_suffix] =\
469 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
475 """Setup cross-link distance restraints at atomic level
476 The "atomic" aspect is that it models the particle uncertainty with a Gaussian
477 The noise in the data and the structural uncertainty of cross-linked amino-acids
478 is inferred using Bayes' theory of probability
479 \note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
489 nuisances_are_optimized=
True,
496 Automatically creates one "sigma" per crosslinked residue and one "psis" per pair.
497 Other nuisance options are available.
498 \note Will return an error if the data+extra_sel don't specify two particles per XL pair.
499 @param root_hier The root hierarchy on which you'll do selection
500 @param xldb CrossLinkDataBase object
501 @param atom_type Can either be "NZ" or "CA"
502 @param length The XL linker length
503 @param slope Linear term to add to the restraint function to help when far away
504 @param nstates The number of states to model. Defaults to the number of states in root.
505 @param label The output label for the restraint
506 @param nuisances_are_optimized Whether to optimize nuisances
507 @param sigma_init The initial value for all the sigmas
508 @param psi_init The initial value for all the psis
509 @param one_psi Use a single psi for all restraints (if False, creates one per XL)
510 @param filelabel automatically generated file containing missing/included/excluded
511 cross-links will be labeled using this text
512 @param weight Weight of restraint
517 self.root = root_hier
518 rname =
"AtomicXLRestraint"
519 super(AtomicCrossLinkMSRestraint, self).
__init__(
520 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
524 self.sigma_is_sampled = nuisances_are_optimized
525 self.psi_is_sampled = nuisances_are_optimized
527 if atom_type
not in(
"NZ",
"CA"):
528 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
529 self.atom_type = atom_type
530 self.nstates = nstates
532 self.nstates = len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE))
533 elif nstates!=len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE)):
534 print(
"Warning: nstates is not the same as the number of states in root")
536 self.rs_psi = self._create_restraint_set(
"psi")
537 self.rs_sig = self._create_restraint_set(
"sigma")
538 self.rs_lin = self._create_restraint_set(
"linear")
540 self.psi_dictionary = {}
541 self.sigma_dictionary = {}
543 self.particles=defaultdict(set)
544 self.one_psi = one_psi
545 self.bonded_pairs = []
547 print(
'creating a single psi for all XLs')
549 print(
'creating one psi for each XL')
552 if filelabel
is not None:
553 indb = open(
"included." + filelabel +
".xl.db",
"w")
554 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
555 midb = open(
"missing." + filelabel +
".xl.db",
"w")
561 # read ahead to get the number of XL's per residue
562 num_xls_per_res=defaultdict(int)
563 for unique_id in data:
564 for nstate in range(self.nstates):
565 for xl in data[unique_id]:
566 num_xls_per_res[str(xl.r1)]+=1
567 num_xls_per_res[str(xl.r2)]+=1
569 # Will setup two sigmas based on promiscuity of the residue
571 self.sig_low = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
572 max_val=100.0,is_opt=self.nuis_opt)
573 self.sig_high = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
574 max_val=100.0,is_opt=self.nuis_opt)
576 self._create_sigma(
'sigma',sigma_init)
578 self._create_psi(
'psi',psi_init)
580 for xlid
in self.xldb.xlid_iterator():
581 self._create_psi(xlid,psi_init)
585 for xlid
in self.xldb.xlid_iterator():
588 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
590 psip = self.psi_dictionary[unique_id][0].get_particle_index()
599 for nstate
in self.nstates:
600 for xl
in self.xldb[xlid]:
601 r1 = xl[self.xldb.residue1_key]
602 c1 = xl[self.xldb.protein1_key].strip()
603 r2 = xl[self.xldb.residue2_key]
604 c2 = xl[self.xldb.protein2_key].strip()
611 residue_index=r1).get_selected_particles()
616 residue_index=r2).get_selected_particles()
618 print(
"AtomicXLRestraint: WARNING> residue %d of chain %s is not there" % (r1, c1))
619 if filelabel
is not None:
620 midb.write(str(xl) +
"\n")
624 print(
"AtomicXLRestraint: WARNING> residue %d of chain %s is not there" % (r2, c2))
625 if filelabel
is not None:
626 midb.write(str(xl) +
"\n")
632 num1=num_xls_per_res[str(xl.r1)]
633 num2=num_xls_per_res[str(xl.r2)]
634 if num1<sig_threshold:
638 if num2<sig_threshold:
643 sig1 = self.sigma_dictionary[
'sigma'][0]
644 sig2 = self.sigma_dictionary[
'sigma'][0]
647 for p1,p2
in itertools.product(ps1,ps2):
650 self.particles[nstate]|=set((p1,p2))
651 r.add_contribution([p1.get_index(),p2.get_index()],
652 [sig1.get_particle_index(),sig2.get_particle_index()])
655 if num_contributions>0:
656 print(
'XL:',xlid,
'num contributions:',num_contributions)
659 raise Exception(
"You didn't create any XL restraints")
660 print(
'created',len(xlrs),
'XL restraints')
661 rname = self.rs.get_name()
663 self.rs.set_name(rname)
664 self.rs.set_weight(self.weight)
665 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
667 def get_hierarchy(self):
670 def _create_sigma(self, name,sigmainit):
671 """ This is called internally. Creates a nuisance
672 on the structural uncertainty """
673 if name
in self.sigma_dictionary:
674 return self.sigma_dictionary[name][0]
676 sigmaminnuis = 0.0000001
677 sigmamaxnuis = 1000.0
681 sigma = IMP.pmi.tools.SetupNuisance(self.m,
685 self.sigma_is_sampled).get_particle()
686 self.sigma_dictionary[name] = (
689 self.sigma_is_sampled)
690 self.rs_sig.add_restraint(
699 def _create_psi(self, name,psiinit):
700 """ This is called internally. Creates a nuisance
701 on the data uncertainty """
702 if name
in self.psi_dictionary:
703 return self.psi_dictionary[name][0]
705 psiminnuis = 0.0000001
706 psimaxnuis = 0.4999999
710 psi = IMP.pmi.tools.SetupNuisance(self.m,
714 self.psi_is_sampled).get_particle()
715 self.psi_dictionary[name] = (
720 self.rs_psi.add_restraint(
732 """ create dummy harmonic restraints for each XL but don't add to model
733 Makes it easy to see each contribution to each XL in RMF
735 class MyGetRestraint(object):
744 for nxl
in range(self.get_number_of_restraints()):
745 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
747 for ncontr
in range(xl.get_number_of_contributions()):
748 ps=xl.get_contribution(ncontr)
750 'xl%i_contr%i'%(nxl,ncontr))
752 dummy_rs.append(MyGetRestraint(rs))
757 """ Get the particles to be sampled by the IMP.pmi.sampler object """
759 if self.sigma_is_sampled:
760 for sigmaname
in self.sigma_dictionary:
761 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
762 str(sigmaname) + self._label_suffix] = \
763 ([self.sigma_dictionary[sigmaname][0]],
764 self.sigma_dictionary[sigmaname][1])
765 if self.psi_is_sampled:
766 for psiname
in self.psi_dictionary:
767 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
768 str(psiname) + self._label_suffix] =\
769 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
772 def get_bonded_pairs(self):
773 return self.bonded_pairs
775 def get_number_of_restraints(self):
776 return self.rs.get_number_of_restraints()
779 return 'XL restraint with '+str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
783 """Read a stat file and load all the sigmas.
784 This is potentially quite stupid.
785 It's also a hack since the sigmas should be stored in the RMF file.
786 Also, requires one sigma and one psi for ALL XLs.
789 sig_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
790 "-s",
"AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
791 psi_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
792 "-s",
"AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
793 for nxl
in range(self.rs.get_number_of_restraints()):
794 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
797 for contr
in range(xl.get_number_of_contributions()):
798 sig1,sig2=xl.get_contribution_sigmas(contr)
801 print(
'loaded nuisances from file')
804 max_prob_for_violation=0.1,
805 min_dist_for_violation=1e9,
807 limit_to_chains=
None,
809 """Create CMM files, one for each state, of all crosslinks.
810 will draw in GREEN if non-violated in all states (or if only one state)
811 will draw in PURPLE if non-violated only in a subset of states (draws nothing elsewhere)
812 will draw in RED in ALL states if all violated
813 (if only one state, you'll only see green and red)
815 @param out_prefix Output xlink files prefix
816 @param max_prob_for_violation It's a violation if the probability is below this
817 @param min_dist_for_violation It's a violation if the min dist is above this
818 @param coarsen Use CA positions
819 @param limit_to_chains Try to visualize just these chains
820 @param exclude_to_chains Try to NOT visualize these chains
822 print(
'going to calculate violations and plot CMM files')
824 all_dists = [s[
"low_dist"]
for s
in all_stats]
830 cmds = defaultdict(set)
831 for nstate
in range(self.nstates):
832 outf=open(out_prefix+str(nstate)+
'.cmm',
'w')
833 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
836 print(
'will limit to',limit_to_chains)
837 print(
'will exclude',exclude_chains)
842 for nxl
in range(self.rs.get_number_of_restraints()):
846 for nstate
in range(self.nstates):
847 prob = state_info[nstate][nxl][
"prob"]
848 low_dist = state_info[nstate][nxl][
"low_dist"]
849 if prob<max_prob_for_violation
or low_dist>min_dist_for_violation:
857 if len(npass)==self.nstates:
859 elif len(nviol)==self.nstates:
863 print(nxl,
'state dists:',[state_info[nstate][nxl][
"low_dist"]
for nstate
in range(self.nstates)],
864 'viol states:',nviol,
'all viol?',all_viol)
865 for nstate
in range(self.nstates):
867 r=0.365; g=0.933; b=0.365;
870 r=0.980; g=0.302; b=0.247;
877 r=0.365; g=0.933; b=0.365;
879 pp = state_info[nstate][nxl][
"low_pp"]
888 cmds[nstate].add((ch1,r1))
889 cmds[nstate].add((ch2,r2))
891 outf = out_fns[nstate]
893 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
894 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,c1[0],c1[1],c1[2],r,g,b))
895 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
896 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv+1,c2[0],c2[1],c2[2],r,g,b))
897 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
898 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,nv+1,r,g,b))
901 for nstate
in range(self.nstates):
902 out_fns[nstate].write(
'</marker_set>\n')
903 out_fns[nstate].close()
905 for ch,r
in cmds[nstate]:
906 cmd+=
'#%i:%i.%s '%(nstate,r,ch)
910 def _get_contribution_info(self,xl,ncontr,use_CA=False):
911 """Return the particles at that contribution. If requested will return CA's instead"""
912 idx1=xl.get_contribution(ncontr)[0]
913 idx2=xl.get_contribution(ncontr)[1]
921 return idx1,idx2,dist
923 def get_best_stats(self,limit_to_state=None,limit_to_chains=None,exclude_chains='',use_CA=False):
924 ''' return the probability, best distance, two coords, and possibly the psi for each xl
925 @param limit_to_state Only examine contributions from one state
926 @param limit_to_chains Returns the particles for certain "easy to visualize" chains
927 @param exclude_chains Even if you limit, don't let one end be in this list.
928 Only works if you also limit chains
929 @param use_CA Limit to CA particles
932 for nxl
in range(self.rs.get_number_of_restraints()):
934 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
941 for contr
in range(xl.get_number_of_contributions()):
942 pp = xl.get_contribution(contr)
949 if limit_to_state
is not None:
951 if nstate!=limit_to_state:
953 state_contrs.append(contr)
956 if limit_to_chains
is not None:
959 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
960 c1
not in exclude_chains
and c2
not in exclude_chains):
961 if dist<low_dist_lim:
968 if limit_to_state
is not None:
969 this_info[
"prob"] = xl.evaluate_for_contributions(state_contrs,
None)
971 this_info[
"prob"] = xl.unprotected_evaluate(
None)
972 if limit_to_chains
is not None:
973 this_info[
"low_pp"] = low_pp_lim
975 this_info[
"low_pp"] = low_pp
977 this_info[
"low_dist"] = low_dist
980 this_info[
"psi"] = pval
981 ret.append(this_info)
984 def print_stats(self):
987 for nxl,s
in enumerate(stats):
993 output = super(AtomicCrossLinkMSRestraint, self).
get_output()
1004 for nxl,s
in enumerate(stats):
1005 if s[
'low_dist']>20.0:
1007 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"Prob")]=str(s[
'prob'])
1008 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"BestDist")]=str(s[
'low_dist'])
1009 if not self.one_psi:
1010 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"psi")]=str(s[
'psi'])
1011 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1017 """This restraint allows ambiguous crosslinking between multiple copies
1018 it is a variant of the SimplifiedCrossLinkMS
1028 self.m = representation.prot.get_model()
1034 self.outputlevel =
"low"
1035 self.expdistance = expdistance
1036 self.strength = strength
1038 fl = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1042 tokens = line.split()
1044 if (tokens[0] ==
"#"):
1053 resolution=resolution,
1055 name_is_ambiguous=
True,
1057 hrc1 = [representation.hier_db.particle_to_name[p]
for p
in ps1]
1059 print(
"ConnectivityCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1064 resolution=resolution,
1066 name_is_ambiguous=
True,
1068 hrc2 = [representation.hier_db.particle_to_name[p]
for p
in ps2]
1070 print(
"ConnectivityCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1075 s1_inds = s1.get_selected_particle_indexes()
1076 s2_inds = s2.get_selected_particle_indexes()
1077 if not set(s1_inds).isdisjoint(set(s2_inds)):
1078 print(
"ConnectivityCrossLinkMS: WARNING> %s %s and %s %s "
1079 "select the same bead(s) - ignoring" % (c1, r1, c2, r2))
1083 if self.strength
is None:
1092 rad1 = rad1 / len(ps1)
1093 rad2 = rad2 / len(ps2)
1095 self.strength = 1 / (rad1 ** 2 + rad2 ** 2)
1103 self.rs.add_restraint(cr)
1104 self.pairs.append((ps1, hrc1, c1, r1, ps2, hrc2, c2, r2, cr))
1117 d1.set_radius(uncertainty1)
1118 d2.set_radius(uncertainty2)
1122 strength = 1 / (uncertainty1 ** 2 + uncertainty2 ** 2)
1129 for i
in range(npoints):
1133 scores.append(cr.unprotected_evaluate(
None))
1134 IMP.pmi.output.plot_xy_data(dists, scores)
1136 def set_label(self, label):
1138 self.rs.set_name(label)
1139 for r
in self.rs.get_restraints():
1142 def add_to_model(self):
1145 def get_restraint_sets(self):
1148 def get_restraint(self):
1151 def set_output_level(self, level="low"):
1153 self.outputlevel = level
1155 def set_weight(self, weight):
1156 self.weight = weight
1157 self.rs.set_weight(weight)
1159 def get_output(self):
1165 score = self.weight * self.rs.unprotected_evaluate(
None)
1166 output[
"_TotalScore"] = str(score)
1167 output[
"ConnectivityCrossLinkMS_Score_" + self.label] = str(score)
1168 for n, p
in enumerate(self.pairs):
1179 for n1, p1
in enumerate(ps1):
1182 for n2, p2
in enumerate(ps2):
1186 label = str(r1) +
":" + name1 +
"_" + str(r2) +
":" + name2
1187 output[
"ConnectivityCrossLinkMS_Distance_" +
1190 label = str(r1) +
":" + c1 +
"_" + str(r2) +
":" + c2
1191 output[
"ConnectivityCrossLinkMS_Score_" +
1192 label] = str(self.weight * cr.unprotected_evaluate(
None))
1196 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1197 class SimplifiedCrossLinkMS(object):
1207 spheredistancepairscore=
True):
1212 if columnmapping
is None:
1214 columnmapping[
"Protein1"] = 0
1215 columnmapping[
"Protein2"] = 1
1216 columnmapping[
"Residue1"] = 2
1217 columnmapping[
"Residue2"] = 3
1219 self.m = representation.prot.get_model()
1224 self.already_added_pairs = {}
1226 self.outputlevel =
"low"
1227 self.expdistance = expdistance
1228 self.strength = strength
1229 self.truncated = truncated
1230 self.spheredistancepairscore = spheredistancepairscore
1235 protein1 = columnmapping[
"Protein1"]
1236 protein2 = columnmapping[
"Protein2"]
1237 residue1 = columnmapping[
"Residue1"]
1238 residue2 = columnmapping[
"Residue2"]
1240 fl = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1244 tokens = line.split()
1246 if (tokens[0] ==
"#"):
1248 r1 = int(tokens[residue1])
1249 c1 = tokens[protein1]
1250 r2 = int(tokens[residue2])
1251 c2 = tokens[protein2]
1255 resolution=resolution,
1257 name_is_ambiguous=
False,
1261 resolution=resolution,
1263 name_is_ambiguous=
False,
1268 "residue %d of chain %s selects multiple particles; "
1270 % (r1, c1,
"".join(p.get_name()
for p
in ps1)))
1272 print(
"SimplifiedCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1277 "residue %d of chain %s selects multiple particles; "
1279 % (r2, c2,
"".join(p.get_name()
for p
in ps2)))
1281 print(
"SimplifiedCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1287 if (p1, p2)
in self.already_added_pairs:
1288 dr = self.already_added_pairs[(p1, p2)]
1289 weight = dr.get_weight()
1290 dr.set_weight(weight + 1.0)
1291 print(
"SimplifiedCrossLinkMS> crosslink %d %s %d %s was already found, adding 1.0 to the weight, weight is now %d" % (r1, c1, r2, c2, weight + 1.0))
1297 limit = self.strength * (self.expdistance + 15) ** 2 + 10.0
1308 if self.spheredistancepairscore:
1313 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2))
1315 self.rs.add_restraint(dr)
1316 self.pairs.append((p1, p2, dr, r1, c1, r2, c2))
1317 self.already_added_pairs[(p1, p2)] = dr
1318 self.already_added_pairs[(p2, p1)] = dr
1320 def set_label(self, label):
1323 def add_to_model(self):
1326 def get_restraint_sets(self):
1329 def get_restraint(self):
1332 def get_restraints(self):
1334 for r
in self.rs.get_restraints():
1335 rlist.append(IMP.core.PairRestraint.get_from(r))
1338 def get_particle_pairs(self):
1340 for i
in range(len(self.pairs)):
1341 p0 = self.pairs[i][0]
1342 p1 = self.pairs[i][1]
1343 ppairs.append((p0, p1))
1346 def set_output_level(self, level="low"):
1348 self.outputlevel = level
1350 def set_weight(self, weight):
1351 self.weight = weight
1352 self.rs.set_weight(weight)
1354 def plot_restraint(self, radius1, radius2, maxdist=50, npoints=10):
1360 d1.set_radius(radius1)
1361 d2.set_radius(radius2)
1363 limit = self.strength * (self.expdistance + 15) ** 2 + 10.0
1378 for i
in range(npoints):
1382 scores.append(dr.unprotected_evaluate(
None))
1383 IMP.pmi.output.plot_xy_data(dists, scores)
1385 def get_output(self):
1391 score = self.weight * self.rs.unprotected_evaluate(
None)
1392 output[
"_TotalScore"] = str(score)
1393 output[
"SimplifiedCrossLinkMS_Score_" + self.label] = str(score)
1394 for i
in range(len(self.pairs)):
1396 p0 = self.pairs[i][0]
1397 p1 = self.pairs[i][1]
1398 crosslinker =
'standard'
1399 ln = self.pairs[i][2]
1400 resid1 = self.pairs[i][3]
1401 chain1 = self.pairs[i][4]
1402 resid2 = self.pairs[i][5]
1403 chain2 = self.pairs[i][6]
1405 label = str(resid1) +
":" + chain1 +
"_" + \
1406 str(resid2) +
":" + chain2
1407 output[
"SimplifiedCrossLinkMS_Score_" + crosslinker +
"_" +
1408 label] = str(self.weight * ln.unprotected_evaluate(
None))
1410 if self.spheredistancepairscore:
1416 output[
"SimplifiedCrossLinkMS_Distance_" +
1423 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1424 class SigmoidalCrossLinkMS(object):
1426 self, representation, restraints_file, inflection, slope, amplitude,
1427 linear_slope, resolution=
None, columnmapping=
None, csvfile=
False,
1428 filters=
None, filelabel=
"None"):
1434 if columnmapping
is None:
1436 columnmapping[
"Protein1"] = 0
1437 columnmapping[
"Protein2"] = 1
1438 columnmapping[
"Residue1"] = 2
1439 columnmapping[
"Residue2"] = 3
1445 db = tools.get_db_from_csv(restraints_file)
1447 db = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1449 self.m = representation.prot.get_model()
1455 self.already_added_pairs = {}
1456 self.inflection = inflection
1458 self.amplitude = amplitude
1459 self.linear_slope = linear_slope
1461 self.outputlevel =
"low"
1466 protein1 = columnmapping[
"Protein1"]
1467 protein2 = columnmapping[
"Protein2"]
1468 residue1 = columnmapping[
"Residue1"]
1469 residue2 = columnmapping[
"Residue2"]
1471 indb = open(
"included." + filelabel +
".xl.db",
"w")
1472 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
1473 midb = open(
"missing." + filelabel +
".xl.db",
"w")
1477 tokens = entry.split()
1479 if (tokens[0] ==
"#"):
1481 r1 = int(tokens[residue1])
1482 c1 = tokens[protein1]
1483 r2 = int(tokens[residue2])
1484 c2 = tokens[protein2]
1486 if filters
is not None:
1487 if eval(tools.cross_link_db_filter_parser(filters)) ==
False:
1488 exdb.write(str(entry) +
"\n")
1490 indb.write(str(entry) +
"\n")
1491 r1 = int(entry[residue1])
1492 c1 = entry[protein1]
1493 r2 = int(entry[residue2])
1494 c2 = entry[protein2]
1498 resolution=resolution,
1500 name_is_ambiguous=
False,
1504 resolution=resolution,
1506 name_is_ambiguous=
False,
1510 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
1512 print(
"SigmoidalCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1513 midb.write(str(entry) +
"\n")
1517 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
1519 print(
"SigmoidalCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1520 midb.write(str(entry) +
"\n")
1526 if (p1, p2)
in self.already_added_pairs:
1527 dr = self.already_added_pairs[(p1, p2)]
1528 weight = dr.get_weight()
1529 dr.increment_amplitude(amplitude)
1530 print(
"SigmoidalCrossLinkMS> crosslink %d %s %d %s was already found, adding %d to the amplitude, amplitude is now %d" % (r1, c1, r2, c2, amplitude, dr.get_amplitude()))
1531 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2)
1532 +
"-ampl:" + str(dr.get_amplitude()))
1545 dr.set_name(c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2)
1546 +
"-ampl:" + str(dr.get_amplitude()))
1548 self.rs.add_restraint(dr)
1550 self.pairs.append((p1, p2, dr, r1, c1, r2, c2))
1551 self.already_added_pairs[(p1, p2)] = dr
1552 self.already_added_pairs[(p2, p1)] = dr
1554 def set_label(self, label):
1557 def add_to_model(self):
1560 def get_restraint_sets(self):
1563 def get_restraint(self):
1566 def get_restraints(self):
1568 for r
in self.rs.get_restraints():
1569 rlist.append(IMP.core.PairRestraint.get_from(r))
1572 def get_particle_pairs(self):
1574 for i
in range(len(self.pairs)):
1575 p0 = self.pairs[i][0]
1576 p1 = self.pairs[i][1]
1577 ppairs.append((p0, p1))
1580 def set_output_level(self, level="low"):
1582 self.outputlevel = level
1584 def set_weight(self, weight):
1585 self.weight = weight
1586 self.rs.set_weight(weight)
1588 def plot_restraint(self, radius1, radius2, maxdist=50, npoints=10):
1593 d1.set_radius(radius1)
1594 d2.set_radius(radius2)
1605 for i
in range(npoints):
1609 scores.append(dr.unprotected_evaluate(
None))
1610 IMP.pmi.output.plot_xy_data(dists, scores)
1612 def get_output(self):
1618 score = self.weight * self.rs.unprotected_evaluate(
None)
1619 output[
"_TotalScore"] = str(score)
1620 output[
"SigmoidalCrossLinkMS_Score_" + self.label] = str(score)
1621 for i
in range(len(self.pairs)):
1623 p0 = self.pairs[i][0]
1624 p1 = self.pairs[i][1]
1625 crosslinker =
'standard'
1626 ln = self.pairs[i][2]
1627 resid1 = self.pairs[i][3]
1628 chain1 = self.pairs[i][4]
1629 resid2 = self.pairs[i][5]
1630 chain2 = self.pairs[i][6]
1632 label = str(resid1) +
":" + chain1 +
"_" + \
1633 str(resid2) +
":" + chain2
1634 output[
"SigmoidalCrossLinkMS_Score_" + crosslinker +
"_" +
1635 label +
"_" + self.label] = str(self.weight * ln.unprotected_evaluate(
None))
1638 output[
"SigmoidalCrossLinkMS_Distance_" +
1643 @
IMP.deprecated_object(
"2.5",
"Use IMP.pmi.restraints.crosslinking.CrossLinkingMassSpectrometryRestraint instead.")
1644 class ISDCrossLinkMS(IMP.pmi.restraints._NuisancesBase):
1645 def __init__(self, representation,
1665 automatic_sigma_classification=
False,
1666 attributes_for_label=
None):
1676 if type(representation) != list:
1677 representations = [representation]
1679 representations = representation
1681 if columnmapping
is None:
1683 columnmapping[
"Protein1"] = 0
1684 columnmapping[
"Protein2"] = 1
1685 columnmapping[
"Residue1"] = 2
1686 columnmapping[
"Residue2"] = 3
1687 columnmapping[
"IDScore"] = 4
1688 columnmapping[
"XLUniqueID"] = 5
1694 db = IMP.pmi.tools.get_db_from_csv(restraints_file)
1696 db = IMP.pmi.tools.open_file_or_inline_text(restraints_file)
1698 indb = open(
"included." + filelabel +
".xl.db",
"w")
1699 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
1700 midb = open(
"missing." + filelabel +
".xl.db",
"w")
1702 self.m = representations[0].prot.get_model()
1712 self.sigma_dictionary = {}
1713 self.psi_dictionary = {}
1714 self.psi_is_sampled =
True
1715 self.sigma_is_sampled =
True
1717 self.dataset = representations[0].get_file_dataset(restraints_file)
1718 if not self.dataset
and os.path.exists(restraints_file):
1720 details=
"Crosslinks")
1723 xl_groups = [p.get_cross_link_group(self)
1724 for p, state
in representations[0]._protocol_output]
1729 self.ids_map = IMP.pmi.tools.map()
1730 self.ids_map.set_map_element(20.0, 0.05)
1731 self.ids_map.set_map_element(65.0, 0.01)
1733 self.ids_map = ids_map
1735 if radius_map
is None:
1736 self.radius_map = IMP.pmi.tools.map()
1737 if automatic_sigma_classification:
1738 self.radius_map.set_map_element(10, 10)
1739 self.radius_map.set_map_element(1, 1)
1741 self.radius_map = radius_map
1743 self.outputlevel =
"low"
1747 self.linear.set_slope(slope)
1750 protein1 = columnmapping[
"Protein1"]
1751 protein2 = columnmapping[
"Protein2"]
1752 residue1 = columnmapping[
"Residue1"]
1753 residue2 = columnmapping[
"Residue2"]
1754 idscore = columnmapping[
"IDScore"]
1756 xluniqueid = columnmapping[
"XLUniqueID"]
1766 uniqueid_restraints_map = {}
1768 for nxl, entry
in enumerate(db):
1770 if not jackknifing
is None:
1777 raise NotImplementedError(
"jackknifing not yet implemented")
1780 tokens = entry.split()
1785 if (tokens[0] ==
"#"):
1788 r1 = int(tokens[residue1])
1789 c1 = tokens[protein1]
1790 r2 = int(tokens[residue2])
1791 c2 = tokens[protein2]
1793 if offset_dict
is not None:
1794 if c1
in offset_dict: r1+=offset_dict[c1]
1795 if c2
in offset_dict: r2+=offset_dict[c2]
1797 if rename_dict
is not None:
1798 if c1
in rename_dict: c1=rename_dict[c1]
1799 if c2
in rename_dict: c2=rename_dict[c2]
1804 ids = float(tokens[idscore])
1805 if xluniqueid
is None:
1808 xlid = tokens[xluniqueid]
1810 print(
"this line was not accessible " + str(entry))
1811 if residue1
not in entry: print(str(residue1)+
" keyword not in database")
1812 if residue2
not in entry: print(str(residue2)+
" keyword not in database")
1813 if protein1
not in entry: print(str(protein1)+
" keyword not in database")
1814 if protein2
not in entry: print(str(protein2)+
" keyword not in database")
1815 if idscore
not in entry: print(str(idscore)+
" keyword not in database")
1816 if xluniqueid
not in entry: print(str(xluniqueid)+
" keyword not in database")
1820 if filters
is not None:
1822 exdb.write(str(entry) +
"\n")
1826 r1 = int(entry[residue1])
1827 c1 = entry[protein1]
1828 r2 = int(entry[residue2])
1829 c2 = entry[protein2]
1831 if offset_dict
is not None:
1832 if c1
in offset_dict: r1+=offset_dict[c1]
1833 if c2
in offset_dict: r2+=offset_dict[c2]
1835 if rename_dict
is not None:
1836 if c1
in rename_dict: c1=rename_dict[c1]
1837 if c2
in rename_dict: c2=rename_dict[c2]
1843 ids = float(entry[idscore])
1845 ids = entry[idscore]
1846 if xluniqueid
is None:
1849 xlid = entry[xluniqueid]
1852 print(
"this line was not accessible " + str(entry))
1853 if residue1
not in entry: print(str(residue1)+
" keyword not in database")
1854 if residue2
not in entry: print(str(residue2)+
" keyword not in database")
1855 if protein1
not in entry: print(str(protein1)+
" keyword not in database")
1856 if protein2
not in entry: print(str(protein2)+
" keyword not in database")
1857 if idscore
not in entry: print(str(idscore)+
" keyword not in database")
1858 if xluniqueid
not in entry: print(str(xluniqueid)+
" keyword not in database")
1862 ex_xls = [p[0].add_experimental_cross_link(r1, c1, r2, c2,
1864 for p, group
in zip(representations[0]._protocol_output,
1867 for nstate, r
in enumerate(representations):
1872 resolution=resolution,
1874 name_is_ambiguous=
False,
1878 resolution=resolution,
1880 name_is_ambiguous=
False,
1884 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
1886 print(
"ISDCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r1, c1))
1887 midb.write(str(entry) +
"\n")
1891 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
1893 print(
"ISDCrossLinkMS: WARNING> residue %d of chain %s is not there" % (r2, c2))
1894 midb.write(str(entry) +
"\n")
1900 if (p1 == p2)
and (r1 == r2) :
1901 print(
"ISDCrossLinkMS Restraint: WARNING> on the identical bead particles and the identical residues, thus skipping this cross-link.")
1904 if xlid
in uniqueid_restraints_map:
1905 print(
"Appending a crosslink restraint into the uniqueID %s" % str(xlid))
1906 dr = uniqueid_restraints_map[xlid]
1908 print(
"Generating a NEW crosslink restraint with a uniqueID %s" % str(xlid))
1913 restraints.append(dr)
1914 uniqueid_restraints_map[xlid] = dr
1916 mappedr1 = self.radius_map.get_map_element(
1918 sigma1 = self.get_sigma(mappedr1)[0]
1919 mappedr2 = self.radius_map.get_map_element(
1921 sigma2 = self.get_sigma(mappedr2)[0]
1923 psival = self.ids_map.get_map_element(ids)
1924 psi = self.get_psi(psival)[0]
1927 p1i = p1.get_particle_index()
1928 p2i = p2.get_particle_index()
1936 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
1937 print(
"--------------")
1938 print(
"ISDCrossLinkMS: generating cross-link restraint between")
1939 print(
"ISDCrossLinkMS: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
1940 print(
"ISDCrossLinkMS: with sigma1 %f sigma2 %f psi %s" % (mappedr1, mappedr2, psival))
1941 print(
"ISDCrossLinkMS: between particles %s and %s" % (p1.get_name(), p2.get_name()))
1942 print(
"==========================================\n")
1943 indb.write(str(entry) +
"\n")
1944 for p, ex_xl
in zip(representations[0]._protocol_output,
1946 p[0].add_cross_link(p[1], ex_xl, p1, p2, sigma1, sigma2,
1954 xlattribute =
"intrarb"
1956 xlattribute =
"interrb"
1959 if not attributes_for_label
is None:
1960 for a
in attributes_for_label:
1961 xlattribute = xlattribute +
"_" + str(entry[a])
1963 xlattribute = xlattribute +
"-State:" + str(nstate)
1966 xlattribute +
"-" + c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2) +
"_" + self.label)
1971 xlattribute +
"-" + c1 +
":" + str(r1) +
"-" + c2 +
":" + str(r2) +
"_" + self.label)
1972 self.rslin.add_restraint(pr)
1992 self.rs.add_restraint(lw)
1994 def set_weight(self, weight):
1995 self.weight = weight
1996 self.rs.set_weight(weight)
1998 def set_slope_linear_term(self, slope):
1999 self.linear.set_slope(slope)
2001 def set_label(self, label):
2004 def add_to_model(self):
2011 def get_hierarchies(self):
2014 def get_restraint_sets(self):
2017 def get_restraint(self):
2020 def get_restraint_for_rmf(self):
2023 def get_restraints(self):
2025 for r
in self.rs.get_restraints():
2026 rlist.append(IMP.core.PairRestraint.get_from(r))
2029 def get_particle_pairs(self):
2031 for i
in range(len(self.pairs)):
2032 p0 = self.pairs[i][0]
2033 p1 = self.pairs[i][1]
2034 ppairs.append((p0, p1))
2037 def set_output_level(self, level="low"):
2039 self.outputlevel = level
2041 def set_psi_is_sampled(self, is_sampled=True):
2042 self.psi_is_sampled = is_sampled
2044 def set_sigma_is_sampled(self, is_sampled=True):
2045 self.sigma_is_sampled = is_sampled
2047 def get_label(self,pairs_index):
2048 resid1 = self.pairs[pairs_index][3]
2049 chain1 = self.pairs[pairs_index][4]
2050 resid2 = self.pairs[pairs_index][5]
2051 chain2 = self.pairs[pairs_index][6]
2052 attribute = self.pairs[pairs_index][7]
2053 rad1 = self.pairs[pairs_index][8]
2054 rad2 = self.pairs[pairs_index][9]
2055 psi = self.pairs[pairs_index][10]
2056 xlid= self.pairs[pairs_index][11]
2057 label = attribute +
"-" + \
2058 str(resid1) +
":" + chain1 +
"_" + str(resid2) +
":" + \
2059 chain2 +
"-" + str(rad1) +
"-" + str(rad2) +
"-" + str(psi)
2062 def write_db(self,filename):
2063 cldb=IMP.pmi.output.CrossLinkIdentifierDatabase()
2065 for pairs_index
in range(len(self.pairs)):
2067 resid1 = self.pairs[pairs_index][3]
2068 chain1 = self.pairs[pairs_index][4]
2069 resid2 = self.pairs[pairs_index][5]
2070 chain2 = self.pairs[pairs_index][6]
2071 attribute = self.pairs[pairs_index][7]
2072 rad1 = self.pairs[pairs_index][8]
2073 rad2 = self.pairs[pairs_index][9]
2074 psi = self.pairs[pairs_index][10]
2075 xlid= self.pairs[pairs_index][11]
2076 nstate=self.pairs[pairs_index][12]
2077 ids=self.pairs[pairs_index][13]
2079 label=self.get_label(pairs_index)
2080 cldb.set_unique_id(label,xlid)
2081 cldb.set_protein1(label,chain1)
2082 cldb.set_protein2(label,chain2)
2083 cldb.set_residue1(label,resid1)
2084 cldb.set_residue2(label,resid2)
2085 cldb.set_idscore(label,ids)
2086 cldb.set_state(label,nstate)
2087 cldb.set_sigma1(label,rad1)
2088 cldb.set_sigma2(label,rad2)
2089 cldb.set_psi(label,psi)
2090 cldb.write(filename)
2093 def get_output(self):
2099 score = self.weight * self.rs.unprotected_evaluate(
None)
2100 output[
"_TotalScore"] = str(score)
2101 output[
"ISDCrossLinkMS_Data_Score_" + self.label] = str(score)
2102 output[
"ISDCrossLinkMS_PriorSig_Score_" +
2103 self.label] = self.rssig.unprotected_evaluate(
None)
2104 output[
"ISDCrossLinkMS_PriorPsi_Score_" +
2105 self.label] = self.rspsi.unprotected_evaluate(
None)
2106 output[
"ISDCrossLinkMS_Linear_Score_" +
2107 self.label] = self.rslin.unprotected_evaluate(
None)
2108 for i
in range(len(self.pairs)):
2110 label=self.get_label(i)
2111 ln = self.pairs[i][2]
2112 p0 = self.pairs[i][0]
2113 p1 = self.pairs[i][1]
2114 output[
"ISDCrossLinkMS_Score_" +
2115 label +
"_" + self.label] = str(self.weight * -log(ln.unprotected_evaluate(
None)))
2119 output[
"ISDCrossLinkMS_Distance_" +
2123 for psiindex
in self.psi_dictionary:
2124 output[
"ISDCrossLinkMS_Psi_" +
2125 str(psiindex) +
"_" + self.label] = str(self.psi_dictionary[psiindex][0].get_scale())
2127 for resolution
in self.sigma_dictionary:
2128 output[
"ISDCrossLinkMS_Sigma_" +
2129 str(resolution) +
"_" + self.label] = str(self.sigma_dictionary[resolution][0].get_scale())
2134 def get_particles_to_sample(self):
2136 for resolution
in self.sigma_dictionary:
2137 if self.sigma_dictionary[resolution][2]
and self.sigma_is_sampled:
2138 ps[
"Nuisances_ISDCrossLinkMS_Sigma_" + str(resolution) +
"_" + self.label] =\
2139 ([self.sigma_dictionary[resolution][0]],
2140 self.sigma_dictionary[resolution][1])
2141 if self.psi_is_sampled:
2142 for psiindex
in self.psi_dictionary:
2143 if self.psi_dictionary[psiindex][2]:
2144 ps[
"Nuisances_ISDCrossLinkMS_Psi_" +
2145 str(psiindex) +
"_" + self.label] = ([self.psi_dictionary[psiindex][0]], self.psi_dictionary[psiindex][1])
2149 class CysteineCrossLinkRestraint(object):
2150 def __init__(self, representations, filename, cbeta=False,
2151 betatuple=(0.03, 0.1),
2152 disttuple=(0.0, 25.0, 1000),
2153 omegatuple=(1.0, 1000.0, 50),
2154 sigmatuple=(0.3, 0.3, 1),
2156 sigmaissampled=
False,
2157 weightissampled=
True,
2158 epsilonissampled=
True
2165 self.representations = representations
2166 self.m = self.representations[0].prot.get_model()
2169 self.epsilonmaxtrans = 0.01
2170 self.sigmamaxtrans = 0.1
2171 self.betamaxtrans = 0.01
2172 self.weightmaxtrans = 0.1
2174 self.outputlevel =
"low"
2175 self.betaissampled = betaissampled
2176 self.sigmaissampled = sigmaissampled
2177 self.weightissampled = weightissampled
2178 self.epsilonissampled = epsilonissampled
2180 betalower = betatuple[0]
2181 betaupper = betatuple[1]
2188 self.beta = IMP.pmi.tools.SetupNuisance(
2193 betaissampled).get_particle(
2196 self.sigma = IMP.pmi.tools.SetupNuisance(
2201 sigmaissampled).get_particle()
2203 self.weight = IMP.pmi.tools.SetupWeight(
2205 weightissampled).get_particle(
2209 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
2218 if t[5]
in self.epsilons:
2219 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
2220 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
2222 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(self.m,
2223 0.01, 0.01, 1.0 - float(t[4]), epsilonissampled).get_particle()
2224 up = self.epsilons[t[5]].get_upper()
2225 low = self.epsilons[t[5]].get_lower()
2227 self.epsilons[t[5]].set_upper(low)
2229 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
2233 crossdata = IMP.pmi.tools.get_cross_link_data(
2234 "cysteine",
"cysteine_CA_FES.txt.standard",
2235 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
2237 crossdata = IMP.pmi.tools.get_cross_link_data(
2238 "cysteine",
"cysteine_CB_FES.txt.standard",
2239 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
2242 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
2243 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
2244 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
2247 print(
"--------------")
2248 print(
"CysteineCrossLink: attempting to create a restraint " + str(d))
2268 self.epsilons[epslabel],
2274 for i, representation
in enumerate(self.representations):
2281 resolution=1, name=chain1,
2282 name_is_ambiguous=
False, residue=resid1)[0]
2288 resolution=1, name=chain2,
2289 name_is_ambiguous=
False, residue=resid2)[0]
2298 for t
in range(-1, 2):
2300 resolution=1, name=chain1,
2301 name_is_ambiguous=
False, residue=resid1 + t)
2307 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid1 + t, chain1))
2310 resolution=1, name=chain2,
2311 name_is_ambiguous=
False, residue=resid2 + t)
2317 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid2 + t, chain2))
2320 if (p1
is not None and p2
is not None):
2321 ccl.add_contribution(p1, p2)
2325 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2,
IMP.core.get_distance(d1, d2))
2328 if (len(p1) == 3
and len(p2) == 3):
2329 p11n = p1[0].get_name()
2330 p12n = p1[1].get_name()
2331 p13n = p1[2].get_name()
2332 p21n = p2[0].get_name()
2333 p22n = p2[1].get_name()
2334 p23n = p2[2].get_name()
2336 print(
"CysteineCrossLink: generating CB cysteine cross-link restraint between")
2337 print(
"CysteineCrossLink: residue %d of chain %s and residue %d of chain %s" % (resid1, chain1, resid2, chain2))
2338 print(
"CysteineCrossLink: between particles %s %s %s and %s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
2340 ccl.add_contribution(p1, p2)
2343 self.rs.add_restraint(ccl)
2344 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
2345 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
2347 def set_label(self, label):
2350 def add_to_model(self):
2353 def get_particles_to_sample(self):
2355 if self.epsilonissampled:
2356 for eps
in self.epsilons.keys():
2357 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
2358 self.label] = ([self.epsilons[eps]], self.epsilonmaxtrans)
2359 if self.betaissampled:
2360 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
2361 self.label] = ([self.beta], self.betamaxtrans)
2362 if self.weightissampled:
2363 ps[
"Weights_CysteineCrossLinkRestraint_" +
2364 self.label] = ([self.weight], self.weightmaxtrans)
2365 if self.sigmaissampled:
2366 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
2367 self.label] = ([self.sigma], self.sigmamaxtrans)
2370 def set_output_level(self, level="low"):
2372 self.outputlevel = level
2374 def get_restraint(self):
2377 def get_sigma(self):
2380 def get_output(self):
2383 score = self.rs.unprotected_evaluate(
None)
2384 output[
"_TotalScore"] = str(score)
2385 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
2386 output[
"CysteineCrossLinkRestraint_sigma_" +
2387 self.label] = str(self.sigma.get_scale())
2388 for eps
in self.epsilons.keys():
2389 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
2390 self.label] = str(self.epsilons[eps].get_scale())
2391 output[
"CysteineCrossLinkRestraint_beta_" +
2392 self.label] = str(self.beta.get_scale())
2393 for n
in range(self.weight.get_number_of_states()):
2394 output[
"CysteineCrossLinkRestraint_weights_" +
2395 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
2397 if self.outputlevel ==
"high":
2398 for rst
in self.rs.get_restraints():
2399 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
2400 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
2401 "_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_model_frequency()
2402 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
2403 IMP.isd.CysteineCrossLinkRestraint.get_from(
2406 + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_standard_error()
2407 if len(self.representations) > 1:
2408 for i
in range(len(self.prots)):
2409 output[
"CysteineCrossLinkRestraint_Frequency_Contribution_" +
2410 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
2411 "_State_" + str(i) +
"_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_frequencies()[i]
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
A function that is harmonic over an interval.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
Upper bound harmonic function (non-zero when feature > mean)
static XYZR setup_particle(Model *m, ParticleIndex pi)
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
Add uncertainty to a particle.
A score on the distance between the surfaces of two spheres.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
def get_restraint_for_rmf
Get the restraint for visualization in an RMF file.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
Simple sigmoidal score calculated between sphere surfaces.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
This restraint allows ambiguous crosslinking between multiple copies it is a variant of the Simplifie...
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all crosslinks.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
def deprecated_object
Python decorator to mark a class as deprecated.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the part...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
Class to handle individual particles of a Model object.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Restraint * create_connectivity_restraint(const Selections &s, double x0, double k, std::string name="Connectivity%1%")
Create a restraint connecting the selections.
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
def get_output
Get outputs to write to stat files.
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Base class for PMI restraints, which wrap IMP.Restraint(s).
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.
A decorator for a particle with x,y,z coordinates and a radius.