1 """@namespace IMP.pmi.restraints.crosslinking
2 Restraints for handling cross-linking data.
5 from __future__
import print_function
17 from collections
import defaultdict
26 """Container for restraints shown in the RMF file and in Chimera"""
28 def get_static_info(self):
31 ri.add_string(
"type",
"IMP.pmi.CrossLinkingMassSpectrometryRestraint")
32 ri.add_float(
"linker_length", self.length)
33 ri.add_float(
"slope", self.slope)
34 ri.add_filename(
"filename", self.filename
or "")
36 ri.add_string(
"linker_auth_name", self.linker.auth_name)
37 if self.linker.smiles:
38 ri.add_string(
"linker_smiles", self.linker.smiles)
43 """Setup cross-link distance restraints from mass spectrometry data.
44 The noise in the data and the structural uncertainty of cross-linked amino-acids
45 is inferred using Bayes theory of probability
46 @note Wraps an IMP::isd::CrossLinkMSRestraint
48 _include_in_rmf =
True
50 def __init__(self, root_hier, database=None,
56 attributes_for_label=
None,
58 CrossLinkDataBase=
None,
61 @param root_hier The canonical hierarchy containing all the states
62 @param database The IMP.pmi.io.crosslink.CrossLinkDataBase
63 object that contains the cross-link dataset
64 @param length maximal cross-linker length (including the residue sidechains)
65 @param resolution what representation resolution should the cross-link
66 restraint be applied to.
67 @param slope The slope of a distance-linear scoring function that
68 funnels the score when the particles are
69 too far away. Suggested value 0.02.
70 @param label the extra text to label the restraint so that it is
71 searchable in the output
72 @param filelabel automatically generated file containing missing/included/excluded
73 cross-links will be labeled using this text
74 @param attributes_for_label
75 @param weight Weight of restraint
76 @param linker description of the chemistry of the linker itself, as
77 an ihm.ChemDescriptor object
78 (see https://python-ihm.readthedocs.io/en/latest/main.html#ihm.ChemDescriptor).
79 Common cross-linkers can be found in the `ihm.cross_linkers`
83 model = root_hier.get_model()
85 super(CrossLinkingMassSpectrometryRestraint, self).
__init__(
86 model, weight=weight, label=label,
87 restraint_set_class=_DataRestraintSet)
89 if database
is None and CrossLinkDataBase
is not None:
91 "CrossLinkDataBase is deprecated; use 'database' instead")
92 database = CrossLinkDataBase
95 raise Exception(
"You must pass a database")
98 "CrossLinkingMassSpectrometryRestraint: database should "
99 "be an IMP.pmi.io.crosslink.CrossLinkDataBase object")
100 self.database = database
102 if resolution==0
or resolution
is None:
103 raise Exception(
"You must pass a resolution and it can't be zero")
105 indb = open(
"included." + filelabel +
".xl.db",
"w")
106 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
107 midb = open(
"missing." + filelabel +
".xl.db",
"w")
112 "No linker chemistry specified; this will be guessed from the "
113 "label (%s). It is recommended to specify a linker as an "
114 "ihm.ChemDescriptor object (see the "
115 "CrossLinkingMassSpectrometryRestraint documentation)." % label,
117 self.rs.set_name(self.rs.get_name() +
"_Data")
118 self.rspsi = self._create_restraint_set(
"PriorPsi")
119 self.rssig = self._create_restraint_set(
"PriorSig")
120 self.rslin = self._create_restraint_set(
"Linear")
122 self.rs.filename = self.database.name
123 self.rs.length = length
124 self.rs.slope = slope
125 self.rs.linker = linker
129 self.linear.set_slope(0.0)
132 self.psi_is_sampled =
True
133 self.sigma_is_sampled =
True
134 self.psi_dictionary={}
135 self.sigma_dictionary={}
137 self.outputlevel =
"low"
141 xl_groups = [p.get_cross_link_group(self)
142 for p, state
in IMP.pmi.tools._all_protocol_outputs(
146 copies_to_add = defaultdict(int)
147 print(
'gathering copies')
148 for xlid
in self.database.xlid_iterator():
149 for xl
in self.database[xlid]:
150 r1 = xl[self.database.residue1_key]
151 c1 = xl[self.database.protein1_key]
152 r2 = xl[self.database.residue2_key]
153 c2 = xl[self.database.protein2_key]
154 for c,r
in ((c1,r1),(c2,r2)):
155 if c
in copies_to_add:
161 resolution=resolution).get_selected_particles()
163 copies_to_add[c] = len(sel)-1
165 for molname
in copies_to_add:
166 if copies_to_add[molname]==0:
169 self.database.protein1_key, operator.eq, molname)
170 self.database.set_value(self.database.protein1_key,
173 self.database.protein2_key, operator.eq, molname)
174 self.database.set_value(self.database.protein2_key,
176 for ncopy
in range(copies_to_add[molname]):
177 self.database.clone_protein(
'%s.0' % molname,
178 '%s.%i' % (molname, ncopy + 1))
179 print(
'done pmi2 prelims')
181 for xlid
in self.database.xlid_iterator():
182 new_contribution=
True
183 for xl
in self.database[xlid]:
185 r1 = xl[self.database.residue1_key]
186 c1 = xl[self.database.protein1_key]
187 r2 = xl[self.database.residue2_key]
188 c2 = xl[self.database.protein2_key]
195 name1,copy1 = c1.split(
'.')
197 name2,copy2 = c2.split(
'.')
200 ex_xls = [(p[0].add_experimental_cross_link(
201 r1, name1, r2, name2, group), group)
203 zip(IMP.pmi.tools._all_protocol_outputs(
207 iterlist = range(len(IMP.atom.get_by_type(root_hier,
208 IMP.atom.STATE_TYPE)))
209 for nstate, r
in enumerate(iterlist):
211 xl[self.database.state_key] = nstate
212 xl[self.database.data_set_name_key] = self.label
217 copy_index=int(copy1),
219 resolution=resolution).get_selected_particles()
223 copy_index=int(copy2),
225 resolution=resolution).get_selected_particles()
231 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r1, c1, str(ps1)))
234 "CrossLinkingMassSpectrometryRestraint: "
235 "residue %d of chain %s is not there" % (r1, c1),
237 midb.write(str(xl) +
"\n")
241 raise ValueError(
"residue %d of chain %s selects multiple particles %s" % (r2, c2, str(ps2)))
244 "CrossLinkingMassSpectrometryRestraint: "
245 "residue %d of chain %s is not there" % (r2, c2),
247 midb.write(str(xl) +
"\n")
253 if p1 == p2
and r1 == r2:
255 "CrossLinkingMassSpectrometryRestraint: "
256 "same particle and same residue, skipping "
261 print(
"generating a new cross-link restraint")
262 new_contribution=
False
267 restraints.append(dr)
270 if self.database.sigma1_key
not in xl.keys():
272 xl[self.database.sigma1_key] = sigma1name
274 sigma1name = xl[self.database.sigma1_key]
277 if self.database.sigma2_key
not in xl.keys():
279 xl[self.database.sigma2_key] = sigma2name
281 sigma2name = xl[self.database.sigma2_key]
284 if self.database.psi_key
not in xl.keys():
286 xl[self.database.psi_key] = psiname
288 psiname = xl[self.database.psi_key]
293 p1i = p1.get_particle_index()
295 p2i = p2.get_particle_index()
298 xl[
"Particle_sigma1"]=sigma1
300 xl[
"Particle_sigma2"]=sigma2
302 xl[
"Particle_psi"]=psi
304 dr.add_contribution((p1i, p2i), (s1i, s2i), psii)
307 print(
"--------------")
308 print(
"CrossLinkingMassSpectrometryRestraint: generating cross-link restraint between")
309 print(
"CrossLinkingMassSpectrometryRestraint: residue %d of chain %s and residue %d of chain %s" % (r1, c1, r2, c2))
310 print(
"CrossLinkingMassSpectrometryRestraint: with sigma1 %s sigma2 %s psi %s" % (sigma1name, sigma2name, psiname))
311 print(
"CrossLinkingMassSpectrometryRestraint: between particles %s and %s" % (p1.get_name(), p2.get_name()))
312 print(
"==========================================\n")
313 for p, ex_xl
in zip(IMP.pmi.tools._all_protocol_outputs(
316 p[0].add_cross_link(p[1], ex_xl[0], p1, p2, length,
317 sigma1, sigma2, psi, ex_xl[1])
324 xl[
"IntraRigidBody"]=
True
326 xl[
"IntraRigidBody"]=
False
328 xl_label = self.database.get_short_cross_link_string(xl)
329 xl[
"ShortLabel"]=xl_label
330 dr.set_name(xl_label)
334 pr.set_name(xl_label)
335 self.rslin.add_restraint(pr)
337 self.xl_list.append(xl)
339 indb.write(str(xl) +
"\n")
341 if len(self.xl_list) == 0:
342 raise SystemError(
"CrossLinkingMassSpectrometryRestraint: no cross-link was constructed")
343 self.xl_restraints = restraints
345 self.rs.add_restraint(lw)
347 def __set_dataset(self, ds):
348 self.database.dataset = ds
349 dataset = property(
lambda self: self.database.dataset, __set_dataset)
352 """ get the hierarchy """
356 """ get the restraint set """
360 """ get the restraints in a list """
361 return self.xl_restraints
364 """ get the dummy restraints to be displayed in the rmf file """
365 return self.rs, self.rssig, self.rspsi
368 """ Get a list of tuples containing the particle pairs """
370 for i
in range(len(self.pairs)):
371 p0 = self.pairs[i][0]
372 p1 = self.pairs[i][1]
373 ppairs.append((p0, p1))
377 """ Set the output level of the output """
378 self.outputlevel = level
381 """ Switch on/off the sampling of psi particles """
382 self.psi_is_sampled = is_sampled
385 """ Switch on/off the sampling of sigma particles """
386 self.sigma_is_sampled = is_sampled
390 """ This is called internally. Creates a nuisance
391 on the structural uncertainty """
392 if name
in self.sigma_dictionary:
393 return self.sigma_dictionary[name][0]
396 sigmaminnuis = 0.0000001
397 sigmamaxnuis = 1000.0
401 sigma = IMP.pmi.tools.SetupNuisance(
402 self.model, sigmainit, sigmaminnuis, sigmamaxnuis,
403 self.sigma_is_sampled, name=name).get_particle()
404 self.sigma_dictionary[name] = (
407 self.sigma_is_sampled)
408 self.rssig.add_restraint(
418 """ This is called internally. Creates a nuisance
419 on the data uncertainty """
420 if name
in self.psi_dictionary:
421 return self.psi_dictionary[name][0]
424 psiminnuis = 0.0000001
425 psimaxnuis = 0.4999999
429 psi = IMP.pmi.tools.SetupNuisance(
430 self.model, psiinit, psiminnuis, psimaxnuis,
431 self.psi_is_sampled, name=name).get_particle()
432 self.psi_dictionary[name] = (
437 self.rspsi.add_restraint(
449 """ Get the output of the restraint to be used by the IMP.pmi.output object"""
450 output = super(CrossLinkingMassSpectrometryRestraint, self).
get_output()
452 for xl
in self.xl_list:
454 xl_label=xl[
"ShortLabel"]
458 output[
"CrossLinkingMassSpectrometryRestraint_Score_" +
459 xl_label] = str(-log(ln.unprotected_evaluate(
None)))
463 output[
"CrossLinkingMassSpectrometryRestraint_Distance_" +
467 for psiname
in self.psi_dictionary:
468 output[
"CrossLinkingMassSpectrometryRestraint_Psi_" +
469 str(psiname) + self._label_suffix] = str(
470 self.psi_dictionary[psiname][0].get_scale())
472 for sigmaname
in self.sigma_dictionary:
473 output[
"CrossLinkingMassSpectrometryRestraint_Sigma_" +
474 str(sigmaname) + self._label_suffix] = str(
475 self.sigma_dictionary[sigmaname][0].get_scale())
481 """ Get all need data to construct a mover in IMP.pmi.dof class"""
483 if self.sigma_is_sampled:
484 for sigmaname
in self.sigma_dictionary:
485 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" + str(sigmaname) +
"_" + self.label
486 particle=self.sigma_dictionary[sigmaname][0]
487 maxstep=(self.sigma_dictionary[sigmaname][1])
490 mv.set_name(mover_name)
493 if self.psi_is_sampled:
494 for psiname
in self.psi_dictionary:
495 mover_name=
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" + str(psiname) +
"_" + self.label
496 particle=self.psi_dictionary[psiname][0]
497 maxstep=(self.psi_dictionary[psiname][1])
500 mv.set_name(mover_name)
507 """ Get the particles to be sampled by the IMP.pmi.sampler object """
509 if self.sigma_is_sampled:
510 for sigmaname
in self.sigma_dictionary:
511 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Sigma_" +
512 str(sigmaname) + self._label_suffix] =\
513 ([self.sigma_dictionary[sigmaname][0]],
514 self.sigma_dictionary[sigmaname][1])
516 if self.psi_is_sampled:
517 for psiname
in self.psi_dictionary:
518 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
519 str(psiname) + self._label_suffix] =\
520 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
526 """Setup cross-link distance restraints at atomic level
527 The "atomic" aspect is that it models the particle uncertainty with a Gaussian
528 The noise in the data and the structural uncertainty of cross-linked amino-acids
529 is inferred using Bayes' theory of probability
530 @note Wraps an IMP::isd::AtomicCrossLinkMSRestraint
533 _include_in_rmf =
True
543 nuisances_are_optimized=
True,
550 Automatically creates one "sigma" per cross-linked residue and one "psis" per pair.
551 Other nuisance options are available.
552 @note Will return an error if the data+extra_sel don't specify two particles per XL pair.
553 @param root_hier The root hierarchy on which you'll do selection
554 @param xldb CrossLinkDataBase object
555 @param atom_type Can either be "NZ" or "CA"
556 @param length The XL linker length
557 @param slope Linear term to add to the restraint function to help when far away
558 @param nstates The number of states to model. Defaults to the number of states in root.
559 @param label The output label for the restraint
560 @param nuisances_are_optimized Whether to optimize nuisances
561 @param sigma_init The initial value for all the sigmas
562 @param psi_init The initial value for all the psis
563 @param one_psi Use a single psi for all restraints (if False, creates one per XL)
564 @param filelabel automatically generated file containing missing/included/excluded
565 cross-links will be labeled using this text
566 @param weight Weight of restraint
571 self.root = root_hier
572 rname =
"AtomicXLRestraint"
573 super(AtomicCrossLinkMSRestraint, self).
__init__(
574 self.root.get_model(), name=
"AtomicXLRestraint", label=label,
578 self.sigma_is_sampled = nuisances_are_optimized
579 self.psi_is_sampled = nuisances_are_optimized
581 if atom_type
not in(
"NZ",
"CA"):
582 raise Exception(
"AtomicXLRestraint: atom_type must be NZ or CA")
583 self.atom_type = atom_type
584 self.nstates = nstates
586 self.nstates = len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE))
587 elif nstates!=len(IMP.atom.get_by_type(self.root,IMP.atom.STATE_TYPE)):
588 print(
"Warning: nstates is not the same as the number of states in root")
590 self.rs_psi = self._create_restraint_set(
"psi")
591 self.rs_sig = self._create_restraint_set(
"sigma")
592 self.rs_lin = self._create_restraint_set(
"linear")
594 self.psi_dictionary = {}
595 self.sigma_dictionary = {}
597 self.particles=defaultdict(set)
598 self.one_psi = one_psi
599 self.bonded_pairs = []
601 print(
'creating a single psi for all XLs')
603 print(
'creating one psi for each XL')
606 if filelabel
is not None:
607 indb = open(
"included." + filelabel +
".xl.db",
"w")
608 exdb = open(
"excluded." + filelabel +
".xl.db",
"w")
609 midb = open(
"missing." + filelabel +
".xl.db",
"w")
615 # read ahead to get the number of XL's per residue
616 num_xls_per_res=defaultdict(int)
617 for unique_id in data:
618 for nstate in range(self.nstates):
619 for xl in data[unique_id]:
620 num_xls_per_res[str(xl.r1)]+=1
621 num_xls_per_res[str(xl.r2)]+=1
623 # Will setup two sigmas based on promiscuity of the residue
625 self.sig_low = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
626 max_val=100.0,is_opt=self.nuis_opt)
627 self.sig_high = setup_nuisance(self.m,self.rs_nuis,init_val=sigma_init,min_val=1.0,
628 max_val=100.0,is_opt=self.nuis_opt)
630 self._create_sigma(
'sigma',sigma_init)
632 self._create_psi(
'psi',psi_init)
634 for xlid
in self.xldb.xlid_iterator():
635 self._create_psi(xlid,psi_init)
639 for xlid
in self.xldb.xlid_iterator():
642 psip = self.psi_dictionary[
'psi'][0].get_particle_index()
644 psip = self.psi_dictionary[unique_id][0].get_particle_index()
653 for nstate
in self.nstates:
654 for xl
in self.xldb[xlid]:
655 r1 = xl[self.xldb.residue1_key]
656 c1 = xl[self.xldb.protein1_key].strip()
657 r2 = xl[self.xldb.residue2_key]
658 c2 = xl[self.xldb.protein2_key].strip()
665 residue_index=r1).get_selected_particles()
670 residue_index=r2).get_selected_particles()
672 warnings.warn(
"AtomicXLRestraint: residue %d of "
673 "chain %s is not there" % (r1, c1),
675 if filelabel
is not None:
676 midb.write(str(xl) +
"\n")
680 warnings.warn(
"AtomicXLRestraint: residue %d of "
681 "chain %s is not there" % (r2, c2),
683 if filelabel
is not None:
684 midb.write(str(xl) +
"\n")
690 num1=num_xls_per_res[str(xl.r1)]
691 num2=num_xls_per_res[str(xl.r2)]
692 if num1<sig_threshold:
696 if num2<sig_threshold:
701 sig1 = self.sigma_dictionary[
'sigma'][0]
702 sig2 = self.sigma_dictionary[
'sigma'][0]
705 for p1,p2
in itertools.product(ps1,ps2):
708 self.particles[nstate]|=set((p1,p2))
709 r.add_contribution([p1.get_index(),p2.get_index()],
710 [sig1.get_particle_index(),sig2.get_particle_index()])
713 if num_contributions>0:
714 print(
'XL:',xlid,
'num contributions:',num_contributions)
717 raise Exception(
"You didn't create any XL restraints")
718 print(
'created',len(xlrs),
'XL restraints')
719 rname = self.rs.get_name()
721 self.rs.set_name(rname)
722 self.rs.set_weight(self.weight)
723 self.restraint_sets = [self.rs] + self.restraint_sets[1:]
725 def get_hierarchy(self):
728 def _create_sigma(self, name,sigmainit):
729 """ This is called internally. Creates a nuisance
730 on the structural uncertainty """
731 if name
in self.sigma_dictionary:
732 return self.sigma_dictionary[name][0]
734 sigmaminnuis = 0.0000001
735 sigmamaxnuis = 1000.0
739 sigma = IMP.pmi.tools.SetupNuisance(self.model,
743 self.sigma_is_sampled).get_particle()
744 self.sigma_dictionary[name] = (
747 self.sigma_is_sampled)
748 self.rs_sig.add_restraint(
757 def _create_psi(self, name,psiinit):
758 """ This is called internally. Creates a nuisance
759 on the data uncertainty """
760 if name
in self.psi_dictionary:
761 return self.psi_dictionary[name][0]
763 psiminnuis = 0.0000001
764 psimaxnuis = 0.4999999
768 psi = IMP.pmi.tools.SetupNuisance(self.model,
772 self.psi_is_sampled).get_particle()
773 self.psi_dictionary[name] = (
778 self.rs_psi.add_restraint(
790 """ create dummy harmonic restraints for each XL but don't add to model
791 Makes it easy to see each contribution to each XL in RMF
793 class MyGetRestraint(object):
796 def get_restraint_for_rmf(self):
802 for nxl
in range(self.get_number_of_restraints()):
803 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
805 for ncontr
in range(xl.get_number_of_contributions()):
806 ps=xl.get_contribution(ncontr)
808 'xl%i_contr%i'%(nxl,ncontr))
810 dummy_rs.append(MyGetRestraint(rs))
815 """ Get the particles to be sampled by the IMP.pmi.sampler object """
817 if self.sigma_is_sampled:
818 for sigmaname
in self.sigma_dictionary:
819 ps[
"Nuisances_AtomicCrossLinkingMSRestraint_Sigma_" +
820 str(sigmaname) + self._label_suffix] = \
821 ([self.sigma_dictionary[sigmaname][0]],
822 self.sigma_dictionary[sigmaname][1])
823 if self.psi_is_sampled:
824 for psiname
in self.psi_dictionary:
825 ps[
"Nuisances_CrossLinkingMassSpectrometryRestraint_Psi_" +
826 str(psiname) + self._label_suffix] =\
827 ([self.psi_dictionary[psiname][0]], self.psi_dictionary[psiname][1])
830 def get_bonded_pairs(self):
831 return self.bonded_pairs
833 def get_number_of_restraints(self):
834 return self.rs.get_number_of_restraints()
837 return 'XL restraint with '+str(len(self.rs.get_restraint(0).get_number_of_restraints())) \
841 """Read a stat file and load all the sigmas.
842 This is potentially quite stupid.
843 It's also a hack since the sigmas should be stored in the RMF file.
844 Also, requires one sigma and one psi for ALL XLs.
847 sig_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
848 "-s",
"AtomicXLRestraint_sigma"]).split(
'\n>')[1+nframe])
849 psi_val = float(subprocess.check_output([
"process_output.py",
"-f",in_fn,
850 "-s",
"AtomicXLRestraint_psi"]).split(
'\n>')[1+nframe])
851 for nxl
in range(self.rs.get_number_of_restraints()):
852 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
855 for contr
in range(xl.get_number_of_contributions()):
856 sig1,sig2=xl.get_contribution_sigmas(contr)
859 print(
'loaded nuisances from file')
862 max_prob_for_violation=0.1,
863 min_dist_for_violation=1e9,
865 limit_to_chains=
None,
867 """Create CMM files, one for each state, of all cross-links.
868 will draw in GREEN if non-violated in all states (or if only one state)
869 will draw in PURPLE if non-violated only in a subset of states (draws nothing elsewhere)
870 will draw in RED in ALL states if all violated
871 (if only one state, you'll only see green and red)
873 @param out_prefix Output xlink files prefix
874 @param max_prob_for_violation It's a violation if the probability is below this
875 @param min_dist_for_violation It's a violation if the min dist is above this
876 @param coarsen Use CA positions
877 @param limit_to_chains Try to visualize just these chains
878 @param exclude_to_chains Try to NOT visualize these chains
880 print(
'going to calculate violations and plot CMM files')
882 all_dists = [s[
"low_dist"]
for s
in all_stats]
888 cmds = defaultdict(set)
889 for nstate
in range(self.nstates):
890 outf=open(out_prefix+str(nstate)+
'.cmm',
'w')
891 outf.write(
'<marker_set name="xlinks_state%i"> \n' % nstate)
894 print(
'will limit to',limit_to_chains)
895 print(
'will exclude',exclude_chains)
900 for nxl
in range(self.rs.get_number_of_restraints()):
904 for nstate
in range(self.nstates):
905 prob = state_info[nstate][nxl][
"prob"]
906 low_dist = state_info[nstate][nxl][
"low_dist"]
907 if prob<max_prob_for_violation
or low_dist>min_dist_for_violation:
915 if len(npass)==self.nstates:
917 elif len(nviol)==self.nstates:
921 print(nxl,
'state dists:',[state_info[nstate][nxl][
"low_dist"]
for nstate
in range(self.nstates)],
922 'viol states:',nviol,
'all viol?',all_viol)
923 for nstate
in range(self.nstates):
925 r=0.365; g=0.933; b=0.365;
928 r=0.980; g=0.302; b=0.247;
935 r=0.365; g=0.933; b=0.365;
937 pp = state_info[nstate][nxl][
"low_pp"]
946 cmds[nstate].add((ch1,r1))
947 cmds[nstate].add((ch2,r2))
949 outf = out_fns[nstate]
951 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
952 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,c1[0],c1[1],c1[2],r,g,b))
953 outf.write(
'<marker id= "%d" x="%.3f" y="%.3f" z="%.3f" radius="0.8" '
954 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv+1,c2[0],c2[1],c2[2],r,g,b))
955 outf.write(
'<link id1= "%d" id2="%d" radius="0.8" '
956 'r="%.2f" g="%.2f" b="%.2f"/> \n' % (nv,nv+1,r,g,b))
959 for nstate
in range(self.nstates):
960 out_fns[nstate].write(
'</marker_set>\n')
961 out_fns[nstate].close()
963 for ch,r
in cmds[nstate]:
964 cmd+=
'#%i:%i.%s '%(nstate,r,ch)
968 def _get_contribution_info(self,xl,ncontr,use_CA=False):
969 """Return the particles at that contribution. If requested will return CA's instead"""
970 idx1=xl.get_contribution(ncontr)[0]
971 idx2=xl.get_contribution(ncontr)[1]
979 return idx1,idx2,dist
981 def get_best_stats(self,limit_to_state=None,limit_to_chains=None,exclude_chains='',use_CA=False):
982 ''' return the probability, best distance, two coords, and possibly the psi for each xl
983 @param limit_to_state Only examine contributions from one state
984 @param limit_to_chains Returns the particles for certain "easy to visualize" chains
985 @param exclude_chains Even if you limit, don't let one end be in this list.
986 Only works if you also limit chains
987 @param use_CA Limit to CA particles
990 for nxl
in range(self.rs.get_number_of_restraints()):
992 xl=IMP.isd.AtomicCrossLinkMSRestraint.get_from(self.rs.get_restraint(nxl))
999 for contr
in range(xl.get_number_of_contributions()):
1000 pp = xl.get_contribution(contr)
1007 if limit_to_state
is not None:
1009 if nstate!=limit_to_state:
1011 state_contrs.append(contr)
1014 if limit_to_chains
is not None:
1017 if (c1
in limit_to_chains
or c2
in limit_to_chains)
and (
1018 c1
not in exclude_chains
and c2
not in exclude_chains):
1019 if dist<low_dist_lim:
1026 if limit_to_state
is not None:
1027 this_info[
"prob"] = xl.evaluate_for_contributions(state_contrs,
None)
1029 this_info[
"prob"] = xl.unprotected_evaluate(
None)
1030 if limit_to_chains
is not None:
1031 this_info[
"low_pp"] = low_pp_lim
1033 this_info[
"low_pp"] = low_pp
1035 this_info[
"low_dist"] = low_dist
1036 if not self.one_psi:
1038 this_info[
"psi"] = pval
1039 ret.append(this_info)
1042 def print_stats(self):
1045 for nxl,s
in enumerate(stats):
1047 print(s[
"low_dist"])
1050 def get_output(self):
1051 output = super(AtomicCrossLinkMSRestraint, self).get_output()
1062 for nxl,s
in enumerate(stats):
1063 if s[
'low_dist']>20.0:
1065 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"Prob")]=str(s[
'prob'])
1066 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"BestDist")]=str(s[
'low_dist'])
1067 if not self.one_psi:
1068 output[
"AtomicXLRestraint_%i_%s"%(nxl,
"psi")]=str(s[
'psi'])
1069 output[
"AtomicXLRestraint_NumViol"] = str(bad_count)
1073 class CysteineCrossLinkRestraint(object):
1074 def __init__(self, root_hier, filename, cbeta=False,
1075 betatuple=(0.03, 0.1),
1076 disttuple=(0.0, 25.0, 1000),
1077 omegatuple=(1.0, 1000.0, 50),
1078 sigmatuple=(0.3, 0.3, 1),
1080 sigmaissampled=
False,
1081 weightissampled=
True,
1082 epsilonissampled=
True
1089 self.m = root_hier.get_model()
1092 self.epsilonmaxtrans = 0.01
1093 self.sigmamaxtrans = 0.1
1094 self.betamaxtrans = 0.01
1095 self.weightmaxtrans = 0.1
1097 self.outputlevel =
"low"
1098 self.betaissampled = betaissampled
1099 self.sigmaissampled = sigmaissampled
1100 self.weightissampled = weightissampled
1101 self.epsilonissampled = epsilonissampled
1103 betalower = betatuple[0]
1104 betaupper = betatuple[1]
1111 self.beta = IMP.pmi.tools.SetupNuisance(
1116 betaissampled).get_particle(
1119 self.sigma = IMP.pmi.tools.SetupNuisance(
1124 sigmaissampled).get_particle()
1126 self.weight = IMP.pmi.tools.SetupWeight(
1128 isoptimized=
False).get_particle(
1132 fl = IMP.pmi.tools.open_file_or_inline_text(filename)
1141 if t[5]
in self.epsilons:
1142 if 1.0 - float(t[4]) <= self.epsilons[t[5]].get_upper():
1143 self.epsilons[t[5]].set_upper(1.0 - float(t[4]))
1145 self.epsilons[t[5]] = IMP.pmi.tools.SetupNuisance(self.m,
1146 0.01, 0.01, 1.0 - float(t[4]), epsilonissampled).get_particle()
1147 up = self.epsilons[t[5]].get_upper()
1148 low = self.epsilons[t[5]].get_lower()
1150 self.epsilons[t[5]].set_upper(low)
1152 data.append((int(t[0]), t[1], int(t[2]), t[3], float(t[4]), t[5]))
1156 crossdata = IMP.pmi.tools.get_cross_link_data(
1157 "cysteine",
"cysteine_CA_FES.txt.standard",
1158 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1160 crossdata = IMP.pmi.tools.get_cross_link_data(
1161 "cysteine",
"cysteine_CB_FES.txt.standard",
1162 disttuple, omegatuple, sigmatuple, disttuple[1], disttuple[1], 1)
1165 fmod_grid = IMP.pmi.tools.get_grid(0.0, 1.0, 300,
True)
1166 omega2_grid = IMP.pmi.tools.get_log_grid(0.001, 10000.0, 100)
1167 beta_grid = IMP.pmi.tools.get_log_grid(betalower, betaupper, betangrid)
1170 print(
"--------------")
1171 print(
"CysteineCrossLink: attempting to create a restraint " + str(d))
1191 self.epsilons[epslabel],
1202 molecule=chain1, residue_index=resid1,
1204 p1 = p1.get_selected_particles()
1211 molecule=chain2, residue_index=resid2,
1213 p2 = p2.get_selected_particles()
1223 for t
in range(-1, 2):
1225 molecule=chain1, copy_index=0,
1226 residue_index=resid1 + t)
1227 p = p.get_selected_particles()
1233 "CysteineCrossLink: missing representation for "
1234 "residue %d of chain %s" % (resid1 + t, chain1),
1238 molecule=chain2, copy_index=0,
1239 residue_index=resid2 + t)
1240 p = p.get_selected_particles()
1246 "CysteineCrossLink: missing representation for "
1247 "residue %d of chain %s" % (resid2 + t, chain2),
1251 if (p1
is not None and p2
is not None):
1252 ccl.add_contribution(p1, p2)
1256 print(
"Distance_" + str(resid1) +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2,
IMP.core.get_distance(d1, d2))
1259 if (len(p1) == 3
and len(p2) == 3):
1260 p11n = p1[0].get_name()
1261 p12n = p1[1].get_name()
1262 p13n = p1[2].get_name()
1263 p21n = p2[0].get_name()
1264 p22n = p2[1].get_name()
1265 p23n = p2[2].get_name()
1267 print(
"CysteineCrossLink: generating CB cysteine cross-link restraint between")
1268 print(
"CysteineCrossLink: residue %d of chain %s and residue %d of chain %s" % (resid1, chain1, resid2, chain2))
1269 print(
"CysteineCrossLink: between particles %s %s %s and %s %s %s" % (p11n, p12n, p13n, p21n, p22n, p23n))
1271 ccl.add_contribution(p1, p2)
1274 self.rs.add_restraint(ccl)
1275 ccl.set_name(
"CysteineCrossLink_" + str(resid1)
1276 +
"_" + chain1 +
":" + str(resid2) +
"_" + chain2)
1279 self.weight.get_particle()
1280 ).set_weights_are_optimized(weightissampled)
1282 def set_label(self, label):
1285 def add_to_model(self):
1290 if self.epsilonissampled:
1291 for eps
in self.epsilons.keys():
1292 ps[
"Nuisances_CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1293 self.label] = ([self.epsilons[eps]], self.epsilonmaxtrans)
1294 if self.betaissampled:
1295 ps[
"Nuisances_CysteineCrossLinkRestraint_beta_" +
1296 self.label] = ([self.beta], self.betamaxtrans)
1297 if self.weightissampled:
1298 ps[
"Weights_CysteineCrossLinkRestraint_" +
1299 self.label] = ([self.weight], self.weightmaxtrans)
1300 if self.sigmaissampled:
1301 ps[
"Nuisances_CysteineCrossLinkRestraint_" +
1302 self.label] = ([self.sigma], self.sigmamaxtrans)
1305 def set_output_level(self, level="low"):
1307 self.outputlevel = level
1309 def get_restraint(self):
1312 def get_sigma(self):
1315 def get_output(self):
1317 score = self.rs.unprotected_evaluate(
None)
1318 output[
"_TotalScore"] = str(score)
1319 output[
"CysteineCrossLinkRestraint_Score_" + self.label] = str(score)
1320 output[
"CysteineCrossLinkRestraint_sigma_" +
1321 self.label] = str(self.sigma.get_scale())
1322 for eps
in self.epsilons.keys():
1323 output[
"CysteineCrossLinkRestraint_epsilon_" + eps +
"_" +
1324 self.label] = str(self.epsilons[eps].get_scale())
1325 output[
"CysteineCrossLinkRestraint_beta_" +
1326 self.label] = str(self.beta.get_scale())
1327 for n
in range(self.weight.get_number_of_states()):
1328 output[
"CysteineCrossLinkRestraint_weights_" +
1329 str(n) +
"_" + self.label] = str(self.weight.get_weight(n))
1331 if self.outputlevel ==
"high":
1332 for rst
in self.rs.get_restraints():
1333 output[
"CysteineCrossLinkRestraint_Total_Frequency_" +
1334 IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_name() +
1335 "_" + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_model_frequency()
1336 output[
"CysteineCrossLinkRestraint_Standard_Error_" +
1337 IMP.isd.CysteineCrossLinkRestraint.get_from(
1340 + self.label] = IMP.isd.CysteineCrossLinkRestraint.get_from(rst).get_standard_error()
1344 class DisulfideCrossLinkRestraint(object):
1345 def __init__(self, representation_or_hier,
1353 self.m = representation_or_hier.get_model()
1356 resolution=resolution)
1359 resolution=resolution)
1366 self.linear.set_slope(0.0)
1370 self.psi_dictionary={}
1371 self.sigma_dictionary={}
1372 self.psi_is_sampled =
False
1373 self.sigma_is_sampled =
False
1376 if len(ps1) > 1
or len(ps1) == 0:
1377 raise ValueError(
"DisulfideBondRestraint: ERROR> first selection pattern selects multiple particles or sero particles")
1379 if len(ps2) > 1
or len(ps2) == 0:
1380 raise ValueError(
"DisulfideBondRestraint: ERROR> second selection pattern selects multiple particles or sero particles")
1385 sigma=self.create_sigma(
"SIGMA_DISULFIDE_BOND")
1386 psi=self.create_psi(
"PSI_DISULFIDE_BOND")
1388 p1i = p1.get_index()
1389 p2i = p2.get_index()
1398 dr.add_contribution((p1i, p2i), (si, si), psii)
1402 pr.set_name(
"DISULFIDE_BOND_"+self.label)
1403 self.rslin.add_restraint(pr)
1406 self.rs.add_restraint(lw)
1408 self.xl[
"Particle1"]=p1
1409 self.xl[
"Particle2"]=p2
1410 self.xl[
"Sigma"]=sigma
1413 def add_to_model(self):
1418 def get_hierarchies(self):
1421 def get_restraint_sets(self):
1424 def get_restraint(self):
1427 def get_restraint_for_rmf(self):
1430 def get_restraints(self):
1432 for r
in self.rs.get_restraints():
1433 rlist.append(IMP.core.PairRestraint.get_from(r))
1436 def set_psi_is_sampled(self, is_sampled=True):
1437 self.psi_is_sampled = is_sampled
1439 def set_sigma_is_sampled(self, is_sampled=True):
1440 self.sigma_is_sampled = is_sampled
1443 def create_sigma(self, name):
1444 ''' a nuisance on the structural uncertainty '''
1445 if name
in self.sigma_dictionary:
1446 return self.sigma_dictionary[name][0]
1449 sigmaminnuis = 0.0000001
1450 sigmamaxnuis = 1000.0
1454 sigma = IMP.pmi.tools.SetupNuisance(self.m, sigmainit,
1455 sigmaminnuis, sigmamaxnuis, self.sigma_is_sampled).get_particle()
1456 self.sigma_dictionary[name] = (
1459 self.sigma_is_sampled)
1463 def create_psi(self, name):
1464 ''' a nuisance on the inconsistency '''
1465 if name
in self.psi_dictionary:
1466 return self.psi_dictionary[name][0]
1469 psiminnuis = 0.0000001
1470 psimaxnuis = 0.4999999
1474 psi = IMP.pmi.tools.SetupNuisance(self.m, psiinit,
1475 psiminnuis, psimaxnuis,
1476 self.psi_is_sampled).get_particle()
1477 self.psi_dictionary[name] = (
1480 self.psi_is_sampled)
1485 def get_output(self):
1487 score = self.rs.unprotected_evaluate(
None)
1488 output[
"_TotalScore"] = str(score)
1489 output[
"DisulfideBondRestraint_Data_Score_" + self.label] = str(score)
1490 output[
"DisulfideBondRestraint_Linear_Score_" +
1491 self.label] = self.rslin.unprotected_evaluate(
None)
1495 raise NotImplementedError(
" ")
def set_sigma_is_sampled
Switch on/off the sampling of sigma particles.
Setup cross-link distance restraints from mass spectrometry data.
Add weights to a particle.
A restraint for ambiguous cross-linking MS data and multiple state approach.
def get_best_stats
return the probability, best distance, two coords, and possibly the psi for each xl ...
def get_hierarchies
get the hierarchy
Various classes to hold sets of particles.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
void handle_use_deprecated(std::string message)
Restrain atom pairs based on a set of crosslinks.
Handles cross-link data sets.
Add scale parameter to particle.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
double get_distance(XYZR a, XYZR b)
Compute the sphere distance between a and b.
def get_particles_to_sample
Get the particles to be sampled by the IMP.pmi.sampler object.
Object used to hold a set of restraints.
Class for storing model, its restraints, constraints, and particles.
Classes to handle different kinds of restraints.
Warning related to handling of structures.
def set_output_level
Set the output level of the output.
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def get_restraints
get the restraints in a list
A decorator for a particle representing an atom.
This class allows to create filter functions that can be passed to the CrossLinkDataBase in this way:...
def get_movers
Get all need data to construct a mover in IMP.pmi.dof class.
def get_particle_pairs
Get a list of tuples containing the particle pairs.
def create_psi
This is called internally.
A decorator for a particle with x,y,z coordinates.
def create_sigma
This is called internally.
int get_state_index(Hierarchy h)
Walk up the hierarchy to find the current state.
Report key:value information on restraints.
def plot_violations
Create CMM files, one for each state, of all cross-links.
Modify a set of continuous variables using a normal distribution.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
def create_restraints_for_rmf
create dummy harmonic restraints for each XL but don't add to model Makes it easy to see each contrib...
The general base class for IMP exceptions.
Setup cross-link distance restraints at atomic level The "atomic" aspect is that it models the part...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Calculate the -Log of a list of restraints.
double get_distance(const VectorD< D > &v1, const VectorD< D > &v2)
Compute the distance between two vectors.
def get_restraint_sets
get the restraint set
Applies a PairScore to a Pair.
def get_output
Get the output of the restraint to be used by the IMP.pmi.output object.
def get_restraint_for_rmf
get the dummy restraints to be displayed in the rmf file
Functionality for loading, creating, manipulating and scoring atomic structures.
std::string get_chain_id(Hierarchy h)
Walk up the hierarchy to determine the chain id.
def load_nuisances_from_stat_file
Read a stat file and load all the sigmas.
Select hierarchy particles identified by the biological name.
this class handles a cross-link dataset and do filtering operations, adding cross-links, merge datasets...
def set_psi_is_sampled
Switch on/off the sampling of psi particles.
Warning for probably incorrect input parameters.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...
A restraint for cysteine cross-linking data.