1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling crosslinking data.
5 from __future__
import print_function
17 from collections
import defaultdict
26 """Setup cross-link distance restraints from mass spectrometry data.
27 The noise in the data and the structural uncertainty of cross-linked amino-acids
28 is inferred using Bayes theory of probability
29 @note Wraps an IMP::isd::CrossLinkMSRestraint
32 CrossLinkDataBase=
None,
38 attributes_for_label=
None,
41 @param root_hier The canonical hierarchy containing all the states
42 @param CrossLinkDataBase The IMP.pmi.io.crosslink.CrossLinkDataBase
43 object that contains the cross-link dataset
44 @param length maximal cross-linker length (including the residue sidechains)
45 @param resolution what representation resolution should the cross-link
46 restraint be applied to.
47 @param slope The slope of a distance-linear scoring function that
48 funnels the score when the particles are
49 too far away. Suggested value 0.02.
50 @param label the extra text to label the restraint so that it is
51 searchable in the output
52 @param filelabel automatically generated file containing missing/included/excluded
53 cross-links will be labeled using this text
54 @param attributes_for_label
55 @param weight Weight of restraint
58 model = root_hier.get_model()
60 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
61 model, weight=weight, label=label)
63 if CrossLinkDataBase
is None:
64 raise Exception(
"You must pass a CrossLinkDataBase")
66 raise TypeError(
"CrossLinkingMassSpectrometryRestraint: CrossLinkDataBase should be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
67 self.CrossLinkDataBase = CrossLinkDataBase
69 if resolution==0
or resolution
is None:
70 raise Exception(
"You must pass a resolution and it can't be zero")
72 indb = open(
"included." + filelabel +
".xl.db",
"w")
73 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
74 midb = open(
"missing." + filelabel +
".xl.db",
"w")
76 self.rs.set_name(self.rs.get_name() +
"_Data")
77 self.rspsi = self._create_restraint_set(
"PriorPsi")
78 self.rssig = self._create_restraint_set(
"PriorSig")
79 self.rslin = self._create_restraint_set(
"Linear")
83 self.linear.set_slope(0.0)
86 self.psi_is_sampled =
True
87 self.sigma_is_sampled =
True
88 self.psi_dictionary={}
89 self.sigma_dictionary={}
91 self.outputlevel =
"low"
95 xl_groups = [p.get_cross_link_group(self)
96 for p, state
in IMP.pmi.tools._all_protocol_outputs(
100 copies_to_add = defaultdict(int)
101 print(
'gathering copies')
102 for xlid
in self.CrossLinkDataBase.xlid_iterator():
103 for xl
in self.CrossLinkDataBase[xlid]:
104 r1 = xl[self.CrossLinkDataBase.residue1_key]
105 c1 = xl[self.CrossLinkDataBase.protein1_key]
106 r2 = xl[self.CrossLinkDataBase.residue2_key]
107 c2 = xl[self.CrossLinkDataBase.protein2_key]
108 for c,r
in ((c1,r1),(c2,r2)):
109 if c
in copies_to_add:
115 resolution=resolution).get_selected_particles()
117 copies_to_add[c] = len(sel)-1
119 for molname
in copies_to_add:
120 if copies_to_add[molname]==0:
123 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein1_key,molname+
'.0',fo1)
125 self.CrossLinkDataBase.set_value(self.CrossLinkDataBase.protein2_key,molname+
'.0',fo2)
126 for ncopy
in range(copies_to_add[molname]):
127 self.CrossLinkDataBase.clone_protein(
'%s.0'%molname,
'%s.%i'%(molname,ncopy+1))
128 print(
'done pmi2 prelims')
130 for xlid
in self.CrossLinkDataBase.xlid_iterator():
131 new_contribution=
True
132 for xl
in self.CrossLinkDataBase[xlid]:
134 r1 = xl[self.CrossLinkDataBase.residue1_key]
135 c1 = xl[self.CrossLinkDataBase.protein1_key]
136 r2 = xl[self.CrossLinkDataBase.residue2_key]
137 c2 = xl[self.CrossLinkDataBase.protein2_key]
140 ex_xls = [(p[0].add_experimental_cross_link(r1, c1, r2, c2,
143 zip(IMP.pmi.tools._all_protocol_outputs(
147 iterlist = range(len(IMP.atom.get_by_type(root_hier,
148 IMP.atom.STATE_TYPE)))
149 for nstate, r
in enumerate(iterlist):
151 xl[self.CrossLinkDataBase.state_key]=nstate
152 xl[self.CrossLinkDataBase.data_set_name_key]=self.label
159 name1,copy1 = c1.split(
'.')
161 name2,copy2 = c2.split(
'.')
165 copy_index=int(copy1),
167 resolution=resolution).get_selected_particles()
171 copy_index=int(copy2),
173 resolution=resolution).get_selected_particles()
179 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
182 "CrossLinkingMassSpectrometryRestraint: "
183 "residue %d of chain %s is not there" % (r1, c1),
185 midb.write(str(xl) +
"\n")
189 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
192 "CrossLinkingMassSpectrometryRestraint: "
193 "residue %d of chain %s is not there" % (r2, c2),
195 midb.write(str(xl) +
"\n")
201 if p1 == p2
and r1 == r2:
203 "CrossLinkingMassSpectrometryRestraint: "
204 "same particle and same residue, skipping "
209 print(
"generating a new crosslink restraint")
210 new_contribution=
False
215 restraints.append(dr)
218 if self.CrossLinkDataBase.sigma1_key
not in xl.keys():
220 xl[self.CrossLinkDataBase.sigma1_key]=sigma1name
222 sigma1name=xl[self.CrossLinkDataBase.sigma1_key]
225 if self.CrossLinkDataBase.sigma2_key
not in xl.keys():
227 xl[self.CrossLinkDataBase.sigma2_key]=sigma2name
229 sigma2name=xl[self.CrossLinkDataBase.sigma2_key]
232 if self.CrossLinkDataBase.psi_key
not in xl.keys():
234 xl[self.CrossLinkDataBase.psi_key]=psiname
236 psiname=xl[self.CrossLinkDataBase.psi_key]
241 p1i = p1.get_particle_index()
243 p2i = p2.get_particle_index()
246 xl[
"Particle_sigma1"]=sigma1
248 xl[
"Particle_sigma2"]=sigma2
250 xl[
"Particle_psi"]=psi
252 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
255 print(
"--------------")
256 print(
"CrossLinkingMassSpectrometryRestraint: generating cross-link restraint between")
257 print(
"CrossLinkingMassSpectrometryRestraint: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
258 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 %s sigma2 %s psi %s" % (sigma1name, sigma2name, psiname))
259 print(
"CrossLinkingMassSpectrometryRestraint: between particles %s and %s" % (p1.get_name(), p2.get_name()))
260 print(
"==========================================\n")
261 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
264 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
265 sigma1, sigma2, psi, ex_xl[1])
272 xl[
"IntraRigidBody"]=
True
274 xl[
"IntraRigidBody"]=
False
276 xl_label=self.CrossLinkDataBase.get_short_cross_link_string(xl)
277 xl[
"ShortLabel"]=xl_label
278 dr.set_name(xl_label)
282 pr.set_name(xl_label)
283 self.rslin.add_restraint(pr)
285 self.xl_list.append(xl)
287 indb.write(str(xl) +
"\n")
289 if len(self.xl_list) == 0:
290 raise SystemError(
"CrossLinkingMassSpectrometryRestraint: no crosslink was constructed")
291 self.xl_restraints = restraints
293 self.rs.add_restraint(lw)
295 def __set_dataset(self, ds):
296 self.CrossLinkDataBase.dataset = ds
297 dataset = property(
lambda self: self.CrossLinkDataBase.dataset,
301 """ get the hierarchy """
305 """ get the restraint set """
309 """ get the restraints in a list """
310 return self.xl_restraints
313 """ get the dummy restraints to be displayed in the rmf file """
317 """ Get a list of tuples containing the particle pairs """
319 for i
in range(len(self.pairs)):
320 p0 = self.pairs[i][0]
321 p1 = self.pairs[i][1]
322 ppairs.append((p0, p1))
326 """ Set the output level of the output """
327 self.outputlevel = level
330 """ Switch on/off the sampling of psi particles """
331 self.psi_is_sampled = is_sampled
334 """ Switch on/off the sampling of sigma particles """
335 self.sigma_is_sampled = is_sampled
339 """ This is called internally. Creates a nuisance
340 on the structural uncertainty """
341 if name
in self.sigma_dictionary:
342 return self.sigma_dictionary[name][0]
345 sigmaminnuis = 0.0000001
346 sigmamaxnuis = 1000.0
350 sigma = IMP.pmi.tools.SetupNuisance(self.model, sigmainit,
351 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
352 self.sigma_dictionary[name] = (
355 self.sigma_is_sampled)
356 self.rssig.add_restraint(
366 """ This is called internally. Creates a nuisance
367 on the data uncertainty """
368 if name
in self.psi_dictionary:
369 return self.psi_dictionary[name][0]
372 psiminnuis = 0.0000001
373 psimaxnuis = 0.4999999
377 psi = IMP.pmi.tools.SetupNuisance(self.model, psiinit,
378 psiminnuis, psimaxnuis,
379 self.psi_is_sampled).get_particle()
380 self.psi_dictionary[name] = (
385 self.rspsi.add_restraint(
397 """ Get the output of the restraint to be used by the IMP.pmi.output object"""
398 output = super(CrossLinkingMassSpectrometryRestraint, self).
get_output()
400 for xl
in self.xl_list:
402 xl_label=xl[
"ShortLabel"]
406 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
407 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
411 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
415 for psiname
in self.psi_dictionary:
416 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
417 str(psiname) + self._label_suffix] = str(
418 self.psi_dictionary[psiname][0].get_scale())
420 for sigmaname
in self.sigma_dictionary:
421 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
422 str(sigmaname) + self._label_suffix] = str(
423 self.sigma_dictionary[sigmaname][0].get_scale())
429 """ Get all need data to construct a mover in IMP.pmi.dof class"""
431 if self.sigma_is_sampled:
432 for sigmaname
in self.sigma_dictionary:
433 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" + str(sigmaname) +
"_" + self.label
434 particle=self.sigma_dictionary[sigmaname][0]
435 maxstep=(self.sigma_dictionary[sigmaname][1])
438 mv.set_name(mover_name)
441 if self.psi_is_sampled:
442 for psiname
in self.psi_dictionary:
443 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + str(psiname) +
"_" + self.label
444 particle=self.psi_dictionary[psiname][0]
445 maxstep=(self.psi_dictionary[psiname][1])
448 mv.set_name(mover_name)
455 """ Get the particles to be sampled by the IMP.pmi.sampler object """
457 if self.sigma_is_sampled:
458 for sigmaname
in self.sigma_dictionary:
459 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
460 str(sigmaname) + self._label_suffix] =\
461 ([self.sigma_dictionary[sigmaname][0]],
462 self.sigma_dictionary[sigmaname][1])
464 if self.psi_is_sampled:
465 for psiname
in self.psi_dictionary:
466 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
467 str(psiname) + self._label_suffix] =\
468 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
474 """Setup cross-link distance restraints at atomic level
475 The "atomic" aspect is that it models the particle uncertainty with a Gaussian
476 The noise in the data and the structural uncertainty of cross-linked amino-acids
477 is inferred using Bayes' theory of probability
478 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
488 nuisances_are_optimized=
True,
495 Automatically creates one "sigma" per crosslinked residue and one "psis" per pair.
496 Other nuisance options are available.
497 @note Will return an error if the data+extra_sel don't specify two particles per XL pair.
498 @param root_hier The root hierarchy on which you'll do selection
499 @param xldb CrossLinkDataBase object
500 @param atom_type Can either be "NZ" or "CA"
501 @param length The XL linker length
502 @param slope Linear term to add to the restraint function to help when far away
503 @param nstates The number of states to model. Defaults to the number of states in root.
504 @param label The output label for the restraint
505 @param nuisances_are_optimized Whether to optimize nuisances
506 @param sigma_init The initial value for all the sigmas
507 @param psi_init The initial value for all the psis
508 @param one_psi Use a single psi for all restraints (if False, creates one per XL)
509 @param filelabel automatically generated file containing missing/included/excluded
510 cross-links will be labeled using this text
511 @param weight Weight of restraint
516 self.root = root_hier
517 rname =
"AtomicXLRestraint"
518 super(AtomicCrossLinkMSRestraint, self).
__init__(
519 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
523 self.sigma_is_sampled = nuisances_are_optimized
524 self.psi_is_sampled = nuisances_are_optimized
526 if atom_type
not in(
"NZ",
"CA"):
527 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
528 self.atom_type = atom_type
529 self.nstates = nstates
531 self.nstates = len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE))
532 elif nstates!=len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE)):
533 print(
"Warning: nstates is not the same as the number of states in root")
535 self.rs_psi = self._create_restraint_set(
"psi")
536 self.rs_sig = self._create_restraint_set(
"sigma")
537 self.rs_lin = self._create_restraint_set(
"linear")
539 self.psi_dictionary = {}
540 self.sigma_dictionary = {}
542 self.particles=defaultdict(set)
543 self.one_psi = one_psi
544 self.bonded_pairs = []
546 print(
'creating a single psi for all XLs')
548 print(
'creating one psi for each XL')
551 if filelabel
is not None:
552 indb = open(
"included." + filelabel +
".xl.db",
"w")
553 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
554 midb = open(
"missing." + filelabel +
".xl.db",
"w")
560 # read ahead to get the number of XL's per residue
561 num_xls_per_res=defaultdict(int)
562 for unique_id in data:
563 for nstate in range(self.nstates):
564 for xl in data[unique_id]:
565 num_xls_per_res[str(xl.r1)]+=1
566 num_xls_per_res[str(xl.r2)]+=1
568 # Will setup two sigmas based on promiscuity of the residue
570 self.sig_low = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
571 max_val=100.0,is_opt=self.nuis_opt)
572 self.sig_high = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
573 max_val=100.0,is_opt=self.nuis_opt)
575 self._create_sigma(
'sigma',sigma_init)
577 self._create_psi(
'psi',psi_init)
579 for xlid
in self.xldb.xlid_iterator():
580 self._create_psi(xlid,psi_init)
584 for xlid
in self.xldb.xlid_iterator():
587 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
589 psip = self.psi_dictionary[unique_id][0].get_particle_index()
598 for nstate
in self.nstates:
599 for xl
in self.xldb[xlid]:
600 r1 = xl[self.xldb.residue1_key]
601 c1 = xl[self.xldb.protein1_key].strip()
602 r2 = xl[self.xldb.residue2_key]
603 c2 = xl[self.xldb.protein2_key].strip()
610 residue_index=r1).get_selected_particles()
615 residue_index=r2).get_selected_particles()
617 warnings.warn(
"AtomicXLRestraint: residue %d of "
618 "chain %s is not there" % (r1, c1),
620 if filelabel
is not None:
621 midb.write(str(xl) +
"\n")
625 warnings.warn(
"AtomicXLRestraint: residue %d of "
626 "chain %s is not there" % (r2, c2),
628 if filelabel
is not None:
629 midb.write(str(xl) +
"\n")
635 num1=num_xls_per_res[str(xl.r1)]
636 num2=num_xls_per_res[str(xl.r2)]
637 if num1<sig_threshold:
641 if num2<sig_threshold:
646 sig1 = self.sigma_dictionary[
'sigma'][0]
647 sig2 = self.sigma_dictionary[
'sigma'][0]
650 for p1,p2
in itertools.product(ps1,ps2):
653 self.particles[nstate]|=set((p1,p2))
654 r.add_contribution([p1.get_index(),p2.get_index()],
655 [sig1.get_particle_index(),sig2.get_particle_index()])
658 if num_contributions>0:
659 print(
'XL:',xlid,
'num contributions:',num_contributions)
662 raise Exception(
"You didn't create any XL restraints")
663 print(
'created',len(xlrs),
'XL restraints')
664 rname = self.rs.get_name()
666 self.rs.set_name(rname)
667 self.rs.set_weight(self.weight)
668 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
670 def get_hierarchy(self):
673 def _create_sigma(self, name,sigmainit):
674 """ This is called internally. Creates a nuisance
675 on the structural uncertainty """
676 if name
in self.sigma_dictionary:
677 return self.sigma_dictionary[name][0]
679 sigmaminnuis = 0.0000001
680 sigmamaxnuis = 1000.0
684 sigma = IMP.pmi.tools.SetupNuisance(self.model,
688 self.sigma_is_sampled).get_particle()
689 self.sigma_dictionary[name] = (
692 self.sigma_is_sampled)
693 self.rs_sig.add_restraint(
702 def _create_psi(self, name,psiinit):
703 """ This is called internally. Creates a nuisance
704 on the data uncertainty """
705 if name
in self.psi_dictionary:
706 return self.psi_dictionary[name][0]
708 psiminnuis = 0.0000001
709 psimaxnuis = 0.4999999
713 psi = IMP.pmi.tools.SetupNuisance(self.model,
717 self.psi_is_sampled).get_particle()
718 self.psi_dictionary[name] = (
723 self.rs_psi.add_restraint(
735 """ create dummy harmonic restraints for each XL but don't add to model
736 Makes it easy to see each contribution to each XL in RMF
738 class MyGetRestraint(object):
747 for nxl
in range(self.get_number_of_restraints()):
748 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
750 for ncontr
in range(xl.get_number_of_contributions()):
751 ps=xl.get_contribution(ncontr)
753 'xl%i_contr%i'%(nxl,ncontr))
755 dummy_rs.append(MyGetRestraint(rs))
760 """ Get the particles to be sampled by the IMP.pmi.sampler object """
762 if self.sigma_is_sampled:
763 for sigmaname
in self.sigma_dictionary:
764 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
765 str(sigmaname) + self._label_suffix] = \
766 ([self.sigma_dictionary[sigmaname][0]],
767 self.sigma_dictionary[sigmaname][1])
768 if self.psi_is_sampled:
769 for psiname
in self.psi_dictionary:
770 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
771 str(psiname) + self._label_suffix] =\
772 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
775 def get_bonded_pairs(self):
776 return self.bonded_pairs
778 def get_number_of_restraints(self):
779 return self.rs.get_number_of_restraints()
782 return 'XL restraint with '+str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
786 """Read a stat file and load all the sigmas.
787 This is potentially quite stupid.
788 It's also a hack since the sigmas should be stored in the RMF file.
789 Also, requires one sigma and one psi for ALL XLs.
792 sig_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
793 "-s",
"AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
794 psi_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
795 "-s",
"AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
796 for nxl
in range(self.rs.get_number_of_restraints()):
797 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
800 for contr
in range(xl.get_number_of_contributions()):
801 sig1,sig2=xl.get_contribution_sigmas(contr)
804 print(
'loaded nuisances from file')
807 max_prob_for_violation=0.1,
808 min_dist_for_violation=1e9,
810 limit_to_chains=
None,
812 """Create CMM files, one for each state, of all crosslinks.
813 will draw in GREEN if non-violated in all states (or if only one state)
814 will draw in PURPLE if non-violated only in a subset of states (draws nothing elsewhere)
815 will draw in RED in ALL states if all violated
816 (if only one state, you'll only see green and red)
818 @param out_prefix Output xlink files prefix
819 @param max_prob_for_violation It's a violation if the probability is below this
820 @param min_dist_for_violation It's a violation if the min dist is above this
821 @param coarsen Use CA positions
822 @param limit_to_chains Try to visualize just these chains
823 @param exclude_to_chains Try to NOT visualize these chains
825 print(
'going to calculate violations and plot CMM files')
827 all_dists = [s[
"low_dist"]
for s
in all_stats]
833 cmds = defaultdict(set)
834 for nstate
in range(self.nstates):
835 outf=open(out_prefix+str(nstate)+
'.cmm',
'w')
836 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
839 print(
'will limit to',limit_to_chains)
840 print(
'will exclude',exclude_chains)
845 for nxl
in range(self.rs.get_number_of_restraints()):
849 for nstate
in range(self.nstates):
850 prob = state_info[nstate][nxl][
"prob"]
851 low_dist = state_info[nstate][nxl][
"low_dist"]
852 if prob<max_prob_for_violation
or low_dist>min_dist_for_violation:
860 if len(npass)==self.nstates:
862 elif len(nviol)==self.nstates:
866 print(nxl,
'state dists:',[state_info[nstate][nxl][
"low_dist"]
for nstate
in range(self.nstates)],
867 'viol states:',nviol,
'all viol?',all_viol)
868 for nstate
in range(self.nstates):
870 r=0.365; g=0.933; b=0.365;
873 r=0.980; g=0.302; b=0.247;
880 r=0.365; g=0.933; b=0.365;
882 pp = state_info[nstate][nxl][
"low_pp"]
891 cmds[nstate].add((ch1,r1))
892 cmds[nstate].add((ch2,r2))
894 outf = out_fns[nstate]
896 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
897 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,c1[0],c1[1],c1[2],r,g,b))
898 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
899 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv+1,c2[0],c2[1],c2[2],r,g,b))
900 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
901 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,nv+1,r,g,b))
904 for nstate
in range(self.nstates):
905 out_fns[nstate].write(
'</marker_set>\n')
906 out_fns[nstate].close()
908 for ch,r
in cmds[nstate]:
909 cmd+=
'#%i:%i.%s '%(nstate,r,ch)
913 def _get_contribution_info(self,xl,ncontr,use_CA=False):
914 """Return the particles at that contribution. If requested will return CA's instead"""
915 idx1=xl.get_contribution(ncontr)[0]
916 idx2=xl.get_contribution(ncontr)[1]
924 return idx1,idx2,dist
926 def get_best_stats(self,limit_to_state=None,limit_to_chains=None,exclude_chains='',use_CA=False):
927 ''' return the probability, best distance, two coords, and possibly the psi for each xl
928 @param limit_to_state Only examine contributions from one state
929 @param limit_to_chains Returns the particles for certain "easy to visualize" chains
930 @param exclude_chains Even if you limit, don't let one end be in this list.
931 Only works if you also limit chains
932 @param use_CA Limit to CA particles
935 for nxl
in range(self.rs.get_number_of_restraints()):
937 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
944 for contr
in range(xl.get_number_of_contributions()):
945 pp = xl.get_contribution(contr)
952 if limit_to_state
is not None:
954 if nstate!=limit_to_state:
956 state_contrs.append(contr)
959 if limit_to_chains
is not None:
962 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
963 c1
not in exclude_chains
and c2
not in exclude_chains):
964 if dist<low_dist_lim:
971 if limit_to_state
is not None:
972 this_info[
"prob"] = xl.evaluate_for_contributions(state_contrs,
None)
974 this_info[
"prob"] = xl.unprotected_evaluate(
None)
975 if limit_to_chains
is not None:
976 this_info[
"low_pp"] = low_pp_lim
978 this_info[
"low_pp"] = low_pp
980 this_info[
"low_dist"] = low_dist
983 this_info[
"psi"] = pval
984 ret.append(this_info)
987 def print_stats(self):
990 for nxl,s
in enumerate(stats):
996 output = super(AtomicCrossLinkMSRestraint, self).
get_output()
1007 for nxl,s
in enumerate(stats):
1008 if s[
'low_dist']>20.0:
1010 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"Prob")]=str(s[
'prob'])
1011 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"BestDist")]=str(s[
'low_dist'])
1012 if not self.one_psi:
1013 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"psi")]=str(s[
'psi'])
1014 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1018 class CysteineCrossLinkRestraint(object):
1019 def __init__(self, root_hier, filename, cbeta=False,
1020 betatuple=(0.03, 0.1),
1021 disttuple=(0.0, 25.0, 1000),
1022 omegatuple=(1.0, 1000.0, 50),
1023 sigmatuple=(0.3, 0.3, 1),
1025 sigmaissampled=
False,
1026 weightissampled=
True,
1027 epsilonissampled=
True
1034 self.m = root_hier.get_model()
1037 self.epsilonmaxtrans = 0.01
1038 self.sigmamaxtrans = 0.1
1039 self.betamaxtrans = 0.01
1040 self.weightmaxtrans = 0.1
1042 self.outputlevel =
"low"
1043 self.betaissampled = betaissampled
1044 self.sigmaissampled = sigmaissampled
1045 self.weightissampled = weightissampled
1046 self.epsilonissampled = epsilonissampled
1048 betalower = betatuple[0]
1049 betaupper = betatuple[1]
1056 self.beta = IMP.pmi.tools.SetupNuisance(
1061 betaissampled).get_particle(
1064 self.sigma = IMP.pmi.tools.SetupNuisance(
1069 sigmaissampled).get_particle()
1071 self.weight = IMP.pmi.tools.SetupWeight(
1073 isoptimized=
False).get_particle(
1077 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1086 if t[5]
in self.epsilons:
1087 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1088 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1090 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(self.m,
1091 0.01, 0.01, 1.0 - float(t[4]), epsilonissampled).get_particle()
1092 up = self.epsilons[t[5]].get_upper()
1093 low = self.epsilons[t[5]].get_lower()
1095 self.epsilons[t[5]].set_upper(low)
1097 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1101 crossdata = IMP.pmi.tools.get_cross_link_data(
1102 "cysteine",
"cysteine_CA_FES.txt.standard",
1103 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1105 crossdata = IMP.pmi.tools.get_cross_link_data(
1106 "cysteine",
"cysteine_CB_FES.txt.standard",
1107 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1110 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1111 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1112 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1115 print(
"--------------")
1116 print(
"CysteineCrossLink: attempting to create a restraint " + str(d))
1136 self.epsilons[epslabel],
1147 molecule=chain1, residue_index=resid1,
1149 p1 = p1.get_selected_particles()
1156 molecule=chain2, residue_index=resid2,
1158 p2 = p2.get_selected_particles()
1168 for t
in range(-1, 2):
1170 molecule=chain1, copy_index=0,
1171 residue_index=resid1 + t)
1172 p = p.get_selected_particles()
1177 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid1 + t, chain1))
1180 molecule=chain2, copy_index=0,
1181 residue_index=resid2 + t)
1182 p = p.get_selected_particles()
1187 print(
"\033[93m CysteineCrossLink: missing representation for residue %d of chain %s \033[0m" % (resid2 + t, chain2))
1190 if (p1
is not None and p2
is not None):
1191 ccl.add_contribution(p1, p2)
1195 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2,
IMP.core.get_distance(d1, d2))
1198 if (len(p1) == 3
and len(p2) == 3):
1199 p11n = p1[0].get_name()
1200 p12n = p1[1].get_name()
1201 p13n = p1[2].get_name()
1202 p21n = p2[0].get_name()
1203 p22n = p2[1].get_name()
1204 p23n = p2[2].get_name()
1206 print(
"CysteineCrossLink: generating CB cysteine cross-link restraint between")
1207 print(
"CysteineCrossLink: residue %d of chain %s and residue %d of chain %s" % (resid1, chain1, resid2, chain2))
1208 print(
"CysteineCrossLink: between particles %s %s %s and %s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1210 ccl.add_contribution(p1, p2)
1213 self.rs.add_restraint(ccl)
1214 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1215 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1218 self.weight.get_particle()
1219 ).set_weights_are_optimized(weightissampled)
1229 if self.epsilonissampled:
1230 for eps
in self.epsilons.keys():
1231 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1232 self.label] = ([self.epsilons[eps]], self.epsilonmaxtrans)
1233 if self.betaissampled:
1234 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1235 self.label] = ([self.beta], self.betamaxtrans)
1236 if self.weightissampled:
1237 ps[
"Weights_CysteineCrossLinkRestraint_" +
1238 self.label] = ([self.weight], self.weightmaxtrans)
1239 if self.sigmaissampled:
1240 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1241 self.label] = ([self.sigma], self.sigmamaxtrans)
1244 def set_output_level(self, level="low"):
1246 self.outputlevel = level
1251 def get_sigma(self):
1256 score = self.rs.unprotected_evaluate(
None)
1257 output[
"_TotalScore"] = str(score)
1258 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1259 output[
"CysteineCrossLinkRestraint_sigma_" +
1260 self.label] = str(self.sigma.get_scale())
1261 for eps
in self.epsilons.keys():
1262 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1263 self.label] = str(self.epsilons[eps].get_scale())
1264 output[
"CysteineCrossLinkRestraint_beta_" +
1265 self.label] = str(self.beta.get_scale())
1266 for n
in range(self.weight.get_number_of_states()):
1267 output[
"CysteineCrossLinkRestraint_weights_" +
1268 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1270 if self.outputlevel ==
"high":
1271 for rst
in self.rs.get_restraints():
1272 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1273 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
1274 "_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_model_frequency()
1275 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1276 IMP.isd.CysteineCrossLinkRestraint.get_from(
1279 + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_standard_error()
1283 class DisulfideCrossLinkRestraint(object):
1284 def __init__(self, representation_or_hier,
1292 self.m = representation_or_hier.get_model()
1295 resolution=resolution)
1298 resolution=resolution)
1305 self.linear.set_slope(0.0)
1309 self.psi_dictionary={}
1310 self.sigma_dictionary={}
1311 self.psi_is_sampled =
False
1312 self.sigma_is_sampled =
False
1315 if len(ps1) > 1
or len(ps1) == 0:
1316 raise ValueError(
"DisulfideBondRestraint: ERROR> first selection pattern selects multiple particles or sero particles")
1318 if len(ps2) > 1
or len(ps2) == 0:
1319 raise ValueError(
"DisulfideBondRestraint: ERROR> second selection pattern selects multiple particles or sero particles")
1324 sigma=self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1325 psi=self.create_psi(
"PSI_DISULFIDE_BOND")
1327 p1i = p1.get_index()
1328 p2i = p2.get_index()
1337 dr.add_contribution((p1i, p2i), (si, si), psii)
1341 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1342 self.rslin.add_restraint(pr)
1345 self.rs.add_restraint(lw)
1347 self.xl[
"Particle1"]=p1
1348 self.xl[
"Particle2"]=p2
1349 self.xl[
"Sigma"]=sigma
1356 def get_hierarchies(self):
1359 def get_restraint_sets(self):
1368 def get_restraints(self):
1370 for r
in self.rs.get_restraints():
1371 rlist.append(IMP.core.PairRestraint.get_from(r))
1374 def set_psi_is_sampled(self, is_sampled=True):
1375 self.psi_is_sampled = is_sampled
1377 def set_sigma_is_sampled(self, is_sampled=True):
1378 self.sigma_is_sampled = is_sampled
1381 def create_sigma(self, name):
1382 ''' a nuisance on the structural uncertainty '''
1383 if name
in self.sigma_dictionary:
1384 return self.sigma_dictionary[name][0]
1387 sigmaminnuis = 0.0000001
1388 sigmamaxnuis = 1000.0
1392 sigma = IMP.pmi.tools.SetupNuisance(self.m, sigmainit,
1393 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
1394 self.sigma_dictionary[name] = (
1397 self.sigma_is_sampled)
1401 def create_psi(self, name):
1402 ''' a nuisance on the inconsistency '''
1403 if name
in self.psi_dictionary:
1404 return self.psi_dictionary[name][0]
1407 psiminnuis = 0.0000001
1408 psimaxnuis = 0.4999999
1412 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1413 psiminnuis, psimaxnuis,
1414 self.psi_is_sampled).get_particle()
1415 self.psi_dictionary[name] = (
1418 self.psi_is_sampled)
1425 score = self.rs.unprotected_evaluate(
None)
1426 output[
"_TotalScore"] = str(score)
1427 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1428 output[
"DisulfideBondRestraint_Linear_Score_" +
1429 self.label] = self.rslin.unprotected_evaluate(
None)
1433 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
def add_to_model
Add the restraint to the model.
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
def get_restraint_for_rmf
Get the restraint for visualization in an RMF file.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
def set_label
Set the unique label used in outputs and particle/restraint names.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
def plot_violations
Create CMM files, one for each state, of all crosslinks.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the part...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_restraint
Get the primary restraint set.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def get_output
Get outputs to write to stat files.
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Base class for PMI restraints, which wrap IMP.Restraint(s).
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.