1 """@namespace IMP.pmi.output
2 Classes for writing output files and processing them.
5 from __future__
import print_function, division
24 import cPickle
as pickle
28 class _ChainIDs(object):
29 """Map indices to multi-character chain IDs.
30 We label the first 26 chains A-Z, then we move to two-letter
31 chain IDs: AA through AZ, then BA through BZ, through to ZZ.
32 This continues with longer chain IDs."""
33 def __getitem__(self, ind):
34 chars = string.ascii_uppercase
38 ids.append(chars[ind % lc])
40 ids.append(chars[ind])
41 return "".join(reversed(ids))
45 """Base class for capturing a modeling protocol.
46 Unlike simple output of model coordinates, a complete
47 protocol includes the input data used, details on the restraints,
48 sampling, and clustering, as well as output models.
49 Use via IMP.pmi.topology.System.add_protocol_output().
51 @see IMP.pmi.mmcif.ProtocolOutput for a concrete subclass that outputs
58 if isinstance(elt, (tuple, list)):
59 for elt2
in _flatten(elt):
65 def _disambiguate_chain(chid, seen_chains):
66 """Make sure that the chain ID is unique; warn and correct if it isn't"""
71 if chid
in seen_chains:
72 warnings.warn(
"Duplicate chain ID '%s' encountered" % chid,
75 for suffix
in itertools.count(1):
76 new_chid = chid +
"%d" % suffix
77 if new_chid
not in seen_chains:
78 seen_chains.add(new_chid)
84 def _write_pdb_internal(flpdb, particle_infos_for_pdb, geometric_center,
85 write_all_residues_per_bead):
86 for n,tupl
in enumerate(particle_infos_for_pdb):
87 (xyz, atom_type, residue_type,
88 chain_id, residue_index, all_indexes, radius) = tupl
90 atom_type = IMP.atom.AT_CA
91 if write_all_residues_per_bead
and all_indexes
is not None:
92 for residue_number
in all_indexes:
94 IMP.atom.get_pdb_string((xyz[0] - geometric_center[0],
95 xyz[1] - geometric_center[1],
96 xyz[2] - geometric_center[2]),
97 n+1, atom_type, residue_type,
98 chain_id[:1], residue_number,
' ',
102 IMP.atom.get_pdb_string((xyz[0] - geometric_center[0],
103 xyz[1] - geometric_center[1],
104 xyz[2] - geometric_center[2]),
105 n+1, atom_type, residue_type,
106 chain_id[:1], residue_index,
' ',
108 flpdb.write(
"ENDMDL\n")
111 _Entity = collections.namedtuple(
'_Entity', (
'id',
'seq'))
112 _ChainInfo = collections.namedtuple(
'_ChainInfo', (
'entity',
'name'))
115 def _get_chain_info(chains, root_hier):
119 for mol
in IMP.atom.get_by_type(root_hier, IMP.atom.MOLECULE_TYPE):
121 chain_id = chains[molname]
123 seq = chain.get_sequence()
124 if seq
not in entities:
125 entities[seq] = e = _Entity(id=len(entities)+1, seq=seq)
126 all_entities.append(e)
127 entity = entities[seq]
128 info = _ChainInfo(entity=entity, name=molname)
129 chain_info[chain_id] = info
130 return chain_info, all_entities
133 def _write_mmcif_internal(flpdb, particle_infos_for_pdb, geometric_center,
134 write_all_residues_per_bead, chains, root_hier):
136 chain_info, entities = _get_chain_info(chains, root_hier)
138 writer = ihm.format.CifWriter(flpdb)
139 writer.start_block(
'model')
140 with writer.category(
"_entry")
as l:
143 with writer.loop(
"_entity", [
"id"])
as l:
147 with writer.loop(
"_entity_poly",
148 [
"entity_id",
"pdbx_seq_one_letter_code"])
as l:
150 l.write(entity_id=e.id, pdbx_seq_one_letter_code=e.seq)
152 with writer.loop(
"_struct_asym", [
"id",
"entity_id",
"details"])
as l:
153 for chid
in sorted(chains.values()):
154 ci = chain_info[chid]
155 l.write(id=chid, entity_id=ci.entity.id, details=ci.name)
157 with writer.loop(
"_atom_site",
158 [
"group_PDB",
"type_symbol",
"label_atom_id",
159 "label_comp_id",
"label_asym_id",
"label_seq_id",
161 "Cartn_x",
"Cartn_y",
"Cartn_z",
"label_entity_id",
162 "pdbx_pdb_model_num",
165 for n,tupl
in enumerate(particle_infos_for_pdb):
166 (xyz, atom_type, residue_type,
167 chain_id, residue_index, all_indexes, radius) = tupl
168 ci = chain_info[chain_id]
169 if atom_type
is None:
170 atom_type = IMP.atom.AT_CA
171 c = xyz - geometric_center
172 if write_all_residues_per_bead
and all_indexes
is not None:
173 for residue_number
in all_indexes:
174 l.write(group_PDB=
'ATOM',
176 label_atom_id=atom_type.get_string(),
177 label_comp_id=residue_type.get_string(),
178 label_asym_id=chain_id,
179 label_seq_id=residue_index,
180 auth_seq_id=residue_index, Cartn_x=c[0],
181 Cartn_y=c[1], Cartn_z=c[2], id=ordinal,
182 pdbx_pdb_model_num=1,
183 label_entity_id=ci.entity.id)
186 l.write(group_PDB=
'ATOM', type_symbol=
'C',
187 label_atom_id=atom_type.get_string(),
188 label_comp_id=residue_type.get_string(),
189 label_asym_id=chain_id,
190 label_seq_id=residue_index,
191 auth_seq_id=residue_index, Cartn_x=c[0],
192 Cartn_y=c[1], Cartn_z=c[2], id=ordinal,
193 pdbx_pdb_model_num=1,
194 label_entity_id=ci.entity.id)
199 """Class for easy writing of PDBs, RMFs, and stat files
201 @note Model should be updated prior to writing outputs.
203 def __init__(self, ascii=True,atomistic=False):
204 self.dictionary_pdbs = {}
206 self.dictionary_rmfs = {}
207 self.dictionary_stats = {}
208 self.dictionary_stats2 = {}
209 self.best_score_list =
None
210 self.nbestscoring =
None
212 self.replica_exchange =
False
217 self.chainids =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
219 self.multi_chainids = _ChainIDs()
221 self.particle_infos_for_pdb = {}
222 self.atomistic=atomistic
223 self.use_pmi2 =
False
226 """Get a list of all PDB files being output by this instance"""
227 return list(self.dictionary_pdbs.keys())
229 def get_rmf_names(self):
230 return list(self.dictionary_rmfs.keys())
232 def get_stat_names(self):
233 return list(self.dictionary_stats.keys())
235 def _init_dictchain(self, name, prot, multichar_chain=False):
236 self.dictchain[name] = {}
237 self.use_pmi2 =
False
243 self.atomistic =
True
244 for n,mol
in enumerate(IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE)):
249 chainids = self.multi_chainids
if multichar_chain
else self.chainids
250 for n, i
in enumerate(self.dictionary_pdbs[name].get_children()):
251 self.dictchain[name][i.get_name()] = chainids[n]
255 @param name The PDB filename
256 @param prot The hierarchy to write to this pdb file
257 @param mmcif If True, write PDBs in mmCIF format
258 @note if the PDB name is 'System' then will use Selection to get molecules
260 flpdb = open(name,
'w')
262 self.dictionary_pdbs[name] = prot
263 self._pdb_mmcif[name] = mmcif
264 self._init_dictchain(name, prot)
266 def write_psf(self,filename,name):
267 flpsf=open(filename,
'w')
268 flpsf.write(
"PSF CMAP CHEQ"+
"\n")
269 index_residue_pair_list={}
270 (particle_infos_for_pdb, geometric_center)=self.get_particle_infos_for_pdb_writing(name)
271 nparticles=len(particle_infos_for_pdb)
272 flpsf.write(str(nparticles)+
" !NATOM"+
"\n")
273 for n,p
in enumerate(particle_infos_for_pdb):
278 flpsf.write(
'{0:8d}{1:1s}{2:4s}{3:1s}{4:4s}{5:1s}{6:4s}{7:1s}{8:4s}{9:1s}{10:4s}{11:14.6f}{12:14.6f}{13:8d}{14:14.6f}{15:14.6f}'.format(atom_index,
" ",chain,
" ",str(resid),
" ",
'"'+residue_type.get_string()+
'"',
" ",
"C",
" ",
"C",1.0,0.0,0,0.0,0.0))
281 if chain
not in index_residue_pair_list:
282 index_residue_pair_list[chain]=[(atom_index,resid)]
284 index_residue_pair_list[chain].append((atom_index,resid))
289 for chain
in sorted(index_residue_pair_list.keys()):
291 ls=index_residue_pair_list[chain]
293 ls=sorted(ls, key=
lambda tup: tup[1])
295 indexes=[x[0]
for x
in ls]
298 nbonds=len(indexes_pairs)
299 flpsf.write(str(nbonds)+
" !NBOND: bonds"+
"\n")
302 for i
in range(0,len(indexes_pairs),4):
303 for bond
in indexes_pairs[i:i+4]:
304 flpsf.write(
'{0:8d}{1:8d}'.format(*bond))
307 del particle_infos_for_pdb
312 translate_to_geometric_center=
False,
313 write_all_residues_per_bead=
False):
315 (particle_infos_for_pdb,
316 geometric_center) = self.get_particle_infos_for_pdb_writing(name)
318 if not translate_to_geometric_center:
319 geometric_center = (0, 0, 0)
321 filemode =
'a' if appendmode
else 'w'
322 with open(name, filemode)
as flpdb:
323 if self._pdb_mmcif[name]:
324 _write_mmcif_internal(flpdb, particle_infos_for_pdb,
326 write_all_residues_per_bead,
327 self.dictchain[name],
328 self.dictionary_pdbs[name])
330 _write_pdb_internal(flpdb, particle_infos_for_pdb,
332 write_all_residues_per_bead)
335 """Get the protein name from the particle.
336 This is done by traversing the hierarchy."""
341 p, self.dictchain[name])
343 def get_particle_infos_for_pdb_writing(self, name):
353 particle_infos_for_pdb = []
355 geometric_center = [0, 0, 0]
361 ps =
IMP.atom.Selection(self.dictionary_pdbs[name],resolution=0).get_selected_particles()
365 for n, p
in enumerate(ps):
368 if protname
not in resindexes_dict:
369 resindexes_dict[protname] = []
373 rt = residue.get_residue_type()
374 resind = residue.get_index()
378 geometric_center[0] += xyz[0]
379 geometric_center[1] += xyz[1]
380 geometric_center[2] += xyz[2]
382 particle_infos_for_pdb.append((xyz,
383 atomtype, rt, self.dictchain[name][protname], resind,
None, radius))
384 resindexes_dict[protname].append(resind)
389 resind = residue.get_index()
392 if resind
in resindexes_dict[protname]:
395 resindexes_dict[protname].append(resind)
396 rt = residue.get_residue_type()
399 geometric_center[0] += xyz[0]
400 geometric_center[1] += xyz[1]
401 geometric_center[2] += xyz[2]
403 particle_infos_for_pdb.append((xyz,
None,
404 rt, self.dictchain[name][protname], resind,
None, radius))
408 resind = resindexes[len(resindexes) // 2]
409 if resind
in resindexes_dict[protname]:
412 resindexes_dict[protname].append(resind)
416 geometric_center[0] += xyz[0]
417 geometric_center[1] += xyz[1]
418 geometric_center[2] += xyz[2]
420 particle_infos_for_pdb.append((xyz,
None,
421 rt, self.dictchain[name][protname], resind, resindexes, radius))
427 if len(resindexes) > 0:
428 resind = resindexes[len(resindexes) // 2]
431 geometric_center[0] += xyz[0]
432 geometric_center[1] += xyz[1]
433 geometric_center[2] += xyz[2]
435 particle_infos_for_pdb.append((xyz,
None,
436 rt, self.dictchain[name][protname], resind, resindexes, radius))
439 geometric_center = (geometric_center[0] / atom_count,
440 geometric_center[1] / atom_count,
441 geometric_center[2] / atom_count)
445 particle_infos_for_pdb = sorted(particle_infos_for_pdb,
446 key=
lambda x: (len(x[3]), x[3], x[4]))
448 return (particle_infos_for_pdb, geometric_center)
451 def write_pdbs(self, appendmode=True, mmcif=False):
452 for pdb
in self.dictionary_pdbs.keys():
453 self.write_pdb(pdb, appendmode)
455 def init_pdb_best_scoring(self,
459 replica_exchange=
False, mmcif=
False):
463 self._pdb_best_scoring_mmcif = mmcif
464 fileext =
'.cif' if mmcif
else '.pdb'
465 self.suffixes.append(suffix)
466 self.replica_exchange = replica_exchange
467 if not self.replica_exchange:
471 self.best_score_list = []
475 self.best_score_file_name =
"best.scores.rex.py"
476 self.best_score_list = []
477 with open(self.best_score_file_name,
"w")
as best_score_file:
478 best_score_file.write(
479 "self.best_score_list=" + str(self.best_score_list) +
"\n")
481 self.nbestscoring = nbestscoring
482 for i
in range(self.nbestscoring):
483 name = suffix +
"." + str(i) + fileext
484 flpdb = open(name,
'w')
486 self.dictionary_pdbs[name] = prot
487 self._pdb_mmcif[name] = mmcif
488 self._init_dictchain(name, prot)
490 def write_pdb_best_scoring(self, score):
491 if self.nbestscoring
is None:
492 print(
"Output.write_pdb_best_scoring: init_pdb_best_scoring not run")
494 mmcif = self._pdb_best_scoring_mmcif
495 fileext =
'.cif' if mmcif
else '.pdb'
497 if self.replica_exchange:
499 with open(self.best_score_file_name)
as fh:
502 if len(self.best_score_list) < self.nbestscoring:
503 self.best_score_list.append(score)
504 self.best_score_list.sort()
505 index = self.best_score_list.index(score)
506 for suffix
in self.suffixes:
507 for i
in range(len(self.best_score_list) - 2, index - 1, -1):
508 oldname = suffix +
"." + str(i) + fileext
509 newname = suffix +
"." + str(i + 1) + fileext
511 if os.path.exists(newname):
513 os.rename(oldname, newname)
514 filetoadd = suffix +
"." + str(index) + fileext
515 self.write_pdb(filetoadd, appendmode=
False)
518 if score < self.best_score_list[-1]:
519 self.best_score_list.append(score)
520 self.best_score_list.sort()
521 self.best_score_list.pop(-1)
522 index = self.best_score_list.index(score)
523 for suffix
in self.suffixes:
524 for i
in range(len(self.best_score_list) - 1, index - 1, -1):
525 oldname = suffix +
"." + str(i) + fileext
526 newname = suffix +
"." + str(i + 1) + fileext
527 os.rename(oldname, newname)
528 filenametoremove = suffix + \
529 "." + str(self.nbestscoring) + fileext
530 os.remove(filenametoremove)
531 filetoadd = suffix +
"." + str(index) + fileext
532 self.write_pdb(filetoadd, appendmode=
False)
534 if self.replica_exchange:
536 with open(self.best_score_file_name,
"w")
as best_score_file:
537 best_score_file.write(
538 "self.best_score_list=" + str(self.best_score_list) +
'\n')
540 def init_rmf(self, name, hierarchies, rs=None, geometries=None, listofobjects=None):
542 Initialize an RMF file
544 @param name the name of the RMF file
545 @param hierarchies the hierarchies to be included (it is a list)
546 @param rs optional, the restraint sets (it is a list)
547 @param geometries optional, the geometries (it is a list)
548 @param listofobjects optional, the list of objects for the stat (it is a list)
550 rh = RMF.create_rmf_file(name)
553 outputkey_rmfkey=
None
557 if geometries
is not None:
559 if listofobjects
is not None:
560 cat = rh.get_category(
"stat")
562 for l
in listofobjects:
563 if not "get_output" in dir(l):
564 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(l))
565 output=l.get_output()
566 for outputkey
in output:
567 rmftag=RMF.string_tag
568 if isinstance(output[outputkey], float):
569 rmftag = RMF.float_tag
570 elif isinstance(output[outputkey], int):
572 elif isinstance(output[outputkey], str):
573 rmftag = RMF.string_tag
575 rmftag = RMF.string_tag
576 rmfkey=rh.get_key(cat, outputkey, rmftag)
577 outputkey_rmfkey[outputkey]=rmfkey
578 outputkey_rmfkey[
"rmf_file"]=rh.get_key(cat,
"rmf_file", RMF.string_tag)
579 outputkey_rmfkey[
"rmf_frame_index"]=rh.get_key(cat,
"rmf_frame_index", RMF.int_tag)
581 self.dictionary_rmfs[name] = (rh,cat,outputkey_rmfkey,listofobjects)
583 def add_restraints_to_rmf(self, name, objectlist):
584 for o
in _flatten(objectlist):
586 rs = o.get_restraint_for_rmf()
587 if not isinstance(rs, (list, tuple)):
590 rs = [o.get_restraint()]
592 self.dictionary_rmfs[name][0], rs)
594 def add_geometries_to_rmf(self, name, objectlist):
596 geos = o.get_geometries()
599 def add_particle_pair_from_restraints_to_rmf(self, name, objectlist):
602 pps = o.get_particle_pairs()
605 self.dictionary_rmfs[name][0],
608 def write_rmf(self, name):
610 if self.dictionary_rmfs[name][1]
is not None:
611 cat=self.dictionary_rmfs[name][1]
612 outputkey_rmfkey=self.dictionary_rmfs[name][2]
613 listofobjects=self.dictionary_rmfs[name][3]
614 for l
in listofobjects:
615 output=l.get_output()
616 for outputkey
in output:
617 rmfkey=outputkey_rmfkey[outputkey]
619 self.dictionary_rmfs[name][0].get_root_node().set_value(rmfkey,output[outputkey])
620 except NotImplementedError:
622 rmfkey = outputkey_rmfkey[
"rmf_file"]
623 self.dictionary_rmfs[name][0].get_root_node().set_value(rmfkey, name)
624 rmfkey = outputkey_rmfkey[
"rmf_frame_index"]
626 self.dictionary_rmfs[name][0].get_root_node().set_value(rmfkey, nframes-1)
627 self.dictionary_rmfs[name][0].flush()
629 def close_rmf(self, name):
630 rh = self.dictionary_rmfs[name][0]
631 del self.dictionary_rmfs[name]
634 def write_rmfs(self):
635 for rmfinfo
in self.dictionary_rmfs.keys():
636 self.write_rmf(rmfinfo[0])
638 def init_stat(self, name, listofobjects):
640 flstat = open(name,
'w')
643 flstat = open(name,
'wb')
647 for l
in listofobjects:
648 if not "get_output" in dir(l):
649 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(l))
650 self.dictionary_stats[name] = listofobjects
652 def set_output_entry(self, key, value):
653 self.initoutput.update({key: value})
655 def write_stat(self, name, appendmode=True):
656 output = self.initoutput
657 for obj
in self.dictionary_stats[name]:
660 dfiltered = dict((k, v)
for k, v
in d.items()
if k[0] !=
"_")
661 output.update(dfiltered)
669 flstat = open(name, writeflag)
670 flstat.write(
"%s \n" % output)
673 flstat = open(name, writeflag +
'b')
674 cPickle.dump(output, flstat, 2)
677 def write_stats(self):
678 for stat
in self.dictionary_stats.keys():
679 self.write_stat(stat)
681 def get_stat(self, name):
683 for obj
in self.dictionary_stats[name]:
684 output.update(obj.get_output())
687 def write_test(self, name, listofobjects):
694 flstat = open(name,
'w')
695 output = self.initoutput
696 for l
in listofobjects:
697 if not "get_test_output" in dir(l)
and not "get_output" in dir(l):
698 raise ValueError(
"Output: object %s doesn't have get_output() or get_test_output() method" % str(l))
699 self.dictionary_stats[name] = listofobjects
701 for obj
in self.dictionary_stats[name]:
703 d = obj.get_test_output()
707 dfiltered = dict((k, v)
for k, v
in d.items()
if k[0] !=
"_")
708 output.update(dfiltered)
712 flstat.write(
"%s \n" % output)
715 def test(self, name, listofobjects, tolerance=1e-5):
716 output = self.initoutput
717 for l
in listofobjects:
718 if not "get_test_output" in dir(l)
and not "get_output" in dir(l):
719 raise ValueError(
"Output: object %s doesn't have get_output() or get_test_output() method" % str(l))
720 for obj
in listofobjects:
722 output.update(obj.get_test_output())
724 output.update(obj.get_output())
729 flstat = open(name,
'r')
733 test_dict = ast.literal_eval(l)
736 old_value = str(test_dict[k])
737 new_value = str(output[k])
745 fold = float(old_value)
746 fnew = float(new_value)
747 diff = abs(fold - fnew)
749 print(
"%s: test failed, old value: %s new value %s; "
750 "diff %f > %f" % (str(k), str(old_value),
751 str(new_value), diff,
752 tolerance), file=sys.stderr)
754 elif test_dict[k] != output[k]:
755 if len(old_value) < 50
and len(new_value) < 50:
756 print(
"%s: test failed, old value: %s new value %s"
757 % (str(k), old_value, new_value), file=sys.stderr)
760 print(
"%s: test failed, omitting results (too long)"
761 % str(k), file=sys.stderr)
765 print(
"%s from old objects (file %s) not in new objects"
766 % (str(k), str(name)), file=sys.stderr)
770 def get_environment_variables(self):
772 return str(os.environ)
774 def get_versions_of_relevant_modules(self):
781 versions[
"ISD2_VERSION"] = IMP.isd2.get_module_version()
782 except (ImportError):
786 versions[
"ISD_EMXL_VERSION"] = IMP.isd_emxl.get_module_version()
787 except (ImportError):
797 listofsummedobjects=
None):
803 if listofsummedobjects
is None:
804 listofsummedobjects = []
805 if extralabels
is None:
807 flstat = open(name,
'w')
809 stat2_keywords = {
"STAT2HEADER":
"STAT2HEADER"}
810 stat2_keywords.update(
811 {
"STAT2HEADER_ENVIRON": str(self.get_environment_variables())})
812 stat2_keywords.update(
813 {
"STAT2HEADER_IMP_VERSIONS": str(self.get_versions_of_relevant_modules())})
816 for l
in listofobjects:
817 if not "get_output" in dir(l):
818 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(l))
822 dfiltered = dict((k, v)
823 for k, v
in d.items()
if k[0] !=
"_")
824 output.update(dfiltered)
827 for l
in listofsummedobjects:
829 if not "get_output" in dir(t):
830 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(t))
832 if "_TotalScore" not in t.get_output():
833 raise ValueError(
"Output: object %s doesn't have _TotalScore entry to be summed" % str(t))
835 output.update({l[1]: 0.0})
837 for k
in extralabels:
838 output.update({k: 0.0})
840 for n, k
in enumerate(output):
841 stat2_keywords.update({n: k})
842 stat2_inverse.update({k: n})
844 flstat.write(
"%s \n" % stat2_keywords)
846 self.dictionary_stats2[name] = (
852 def write_stat2(self, name, appendmode=True):
854 (listofobjects, stat2_inverse, listofsummedobjects,
855 extralabels) = self.dictionary_stats2[name]
858 for obj
in listofobjects:
859 od = obj.get_output()
860 dfiltered = dict((k, v)
for k, v
in od.items()
if k[0] !=
"_")
862 output.update({stat2_inverse[k]: od[k]})
865 for l
in listofsummedobjects:
869 partial_score += float(d[
"_TotalScore"])
870 output.update({stat2_inverse[l[1]]: str(partial_score)})
873 for k
in extralabels:
874 if k
in self.initoutput:
875 output.update({stat2_inverse[k]: self.initoutput[k]})
877 output.update({stat2_inverse[k]:
"None"})
884 flstat = open(name, writeflag)
885 flstat.write(
"%s \n" % output)
888 def write_stats2(self):
889 for stat
in self.dictionary_stats2.keys():
890 self.write_stat2(stat)
894 """Collect statistics from ProcessOutput.get_fields().
895 Counters of the total number of frames read, plus the models that
896 passed the various filters used in get_fields(), are provided."""
899 self.passed_get_every = 0
900 self.passed_filterout = 0
901 self.passed_filtertuple = 0
905 """A class for reading stat files (either rmf or ascii v1 and v2)"""
906 def __init__(self, filename):
907 self.filename = filename
912 if self.filename
is None:
913 raise ValueError(
"No file name provided. Use -h for help")
917 rh = RMF.open_rmf_file_read_only(self.filename)
919 cat=rh.get_category(
'stat')
920 rmf_klist=rh.get_keys(cat)
921 self.rmf_names_keys=dict([(rh.get_name(k),k)
for k
in rmf_klist])
925 f = open(self.filename,
"r")
928 for line
in f.readlines():
929 d = ast.literal_eval(line)
930 self.klist = list(d.keys())
932 if "STAT2HEADER" in self.klist:
935 if "STAT2HEADER" in str(k):
941 for k
in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
943 for k
in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
944 self.invstat2_dict = {}
946 self.invstat2_dict.update({stat2_dict[k]: k})
949 "Please convert to statfile v2.\n")
959 return sorted(self.rmf_names_keys.keys())
963 def show_keys(self, ncolumns=2, truncate=65):
964 IMP.pmi.tools.print_multicolumn(self.get_keys(), ncolumns, truncate)
966 def get_fields(self, fields, filtertuple=None, filterout=None, get_every=1,
969 Get the desired field names, and return a dictionary.
970 Namely, "fields" are the queried keys in the stat file (eg. ["Total_Score",...])
971 The returned data structure is a dictionary, where each key is a field and the value
972 is the time series (ie, frame ordered series)
973 of that field (ie, {"Total_Score":[Score_0,Score_1,Score_2,Score_3,...],....} )
975 @param fields (list of strings) queried keys in the stat file (eg. "Total_Score"....)
976 @param filterout specify if you want to "grep" out something from
977 the file, so that it is faster
978 @param filtertuple a tuple that contains
979 ("TheKeyToBeFiltered",relationship,value)
980 where relationship = "<", "==", or ">"
981 @param get_every only read every Nth line from the file
982 @param statistics if provided, accumulate statistics in an
983 OutputStatistics object
986 if statistics
is None:
994 rh = RMF.open_rmf_file_read_only(self.filename)
995 nframes=rh.get_number_of_frames()
996 for i
in range(nframes):
997 statistics.total += 1
999 statistics.passed_get_every += 1
1000 statistics.passed_filterout += 1
1002 if not filtertuple
is None:
1003 keytobefiltered = filtertuple[0]
1004 relationship = filtertuple[1]
1005 value = filtertuple[2]
1006 datavalue=rh.get_root_node().get_value(self.rmf_names_keys[keytobefiltered])
1007 if self.isfiltered(datavalue,relationship,value):
continue
1009 statistics.passed_filtertuple += 1
1010 for field
in fields:
1011 outdict[field].append(rh.get_root_node().get_value(self.rmf_names_keys[field]))
1014 f = open(self.filename,
"r")
1017 for line
in f.readlines():
1018 statistics.total += 1
1019 if not filterout
is None:
1020 if filterout
in line:
1022 statistics.passed_filterout += 1
1025 if line_number % get_every != 0:
1026 if line_number == 1
and self.isstat2:
1027 statistics.total -= 1
1028 statistics.passed_filterout -= 1
1030 statistics.passed_get_every += 1
1034 d = ast.literal_eval(line)
1036 print(
"# Warning: skipped line number " + str(line_number) +
" not a valid line")
1041 if not filtertuple
is None:
1042 keytobefiltered = filtertuple[0]
1043 relationship = filtertuple[1]
1044 value = filtertuple[2]
1045 datavalue=d[keytobefiltered]
1046 if self.isfiltered(datavalue, relationship, value):
continue
1048 statistics.passed_filtertuple += 1
1049 [outdict[field].append(d[field])
for field
in fields]
1052 if line_number == 1:
1053 statistics.total -= 1
1054 statistics.passed_filterout -= 1
1055 statistics.passed_get_every -= 1
1058 if not filtertuple
is None:
1059 keytobefiltered = filtertuple[0]
1060 relationship = filtertuple[1]
1061 value = filtertuple[2]
1062 datavalue=d[self.invstat2_dict[keytobefiltered]]
1063 if self.isfiltered(datavalue, relationship, value):
continue
1065 statistics.passed_filtertuple += 1
1066 [outdict[field].append(d[self.invstat2_dict[field]])
for field
in fields]
1072 def isfiltered(self,datavalue,relationship,refvalue):
1075 fdatavalue=float(datavalue)
1077 raise ValueError(
"ProcessOutput.filter: datavalue cannot be converted into a float")
1079 if relationship ==
"<":
1080 if float(datavalue) >= refvalue:
1082 if relationship ==
">":
1083 if float(datavalue) <= refvalue:
1085 if relationship ==
"==":
1086 if float(datavalue) != refvalue:
1092 """ class to allow more advanced handling of RMF files.
1093 It is both a container and a IMP.atom.Hierarchy.
1094 - it is iterable (while loading the corresponding frame)
1095 - Item brackets [] load the corresponding frame
1096 - slice create an iterator
1097 - can relink to another RMF file
1101 @param model: the IMP.Model()
1102 @param rmf_file_name: str, path of the rmf file
1106 self.rh_ref = RMF.open_rmf_file_read_only(rmf_file_name)
1108 raise TypeError(
"Wrong rmf file name or type: %s"% str(rmf_file_name))
1111 self.root_hier_ref = hs[0]
1112 IMP.atom.Hierarchy.__init__(self, self.root_hier_ref)
1114 self.ColorHierarchy=
None
1119 Link to another RMF file
1121 self.rh_ref = RMF.open_rmf_file_read_only(rmf_file_name)
1123 if self.ColorHierarchy:
1124 self.ColorHierarchy.method()
1125 RMFHierarchyHandler.set_frame(self,0)
1127 def set_frame(self,index):
1131 print(
"skipping frame %s:%d\n"%(self.current_rmf, index))
1135 return self.rh_ref.get_number_of_frames()
1137 def __getitem__(self,int_slice_adaptor):
1138 if isinstance(int_slice_adaptor, int):
1139 self.set_frame(int_slice_adaptor)
1140 return int_slice_adaptor
1141 elif isinstance(int_slice_adaptor, slice):
1142 return self.__iter__(int_slice_adaptor)
1144 raise TypeError(
"Unknown Type")
1147 return self.get_number_of_frames()
1149 def __iter__(self,slice_key=None):
1150 if slice_key
is None:
1151 for nframe
in range(len(self)):
1154 for nframe
in list(range(len(self)))[slice_key]:
1157 class CacheHierarchyCoordinates(object):
1158 def __init__(self,StatHierarchyHandler):
1165 self.current_index=
None
1166 self.rmfh=StatHierarchyHandler
1168 self.model=self.rmfh.get_model()
1173 self.nrms.append(nrm)
1176 self.xyzs.append(fb)
1178 def do_store(self,index):
1179 self.rb_trans[index]={}
1180 self.nrm_coors[index]={}
1181 self.xyz_coors[index]={}
1183 self.rb_trans[index][rb]=rb.get_reference_frame()
1184 for nrm
in self.nrms:
1185 self.nrm_coors[index][nrm]=nrm.get_internal_coordinates()
1186 for xyz
in self.xyzs:
1187 self.xyz_coors[index][xyz]=xyz.get_coordinates()
1188 self.current_index=index
1190 def do_update(self,index):
1191 if self.current_index!=index:
1193 rb.set_reference_frame(self.rb_trans[index][rb])
1194 for nrm
in self.nrms:
1195 nrm.set_internal_coordinates(self.nrm_coors[index][nrm])
1196 for xyz
in self.xyzs:
1197 xyz.set_coordinates(self.xyz_coors[index][xyz])
1198 self.current_index=index
1202 return len(self.rb_trans.keys())
1204 def __getitem__(self,index):
1205 if isinstance(index, int):
1206 return index
in self.rb_trans.keys()
1208 raise TypeError(
"Unknown Type")
1211 return self.get_number_of_frames()
1217 """ class to link stat files to several rmf files """
1218 def __init__(self,model=None,stat_file=None,number_best_scoring_models=None,score_key=None,StatHierarchyHandler=None,cache=None):
1221 @param model: IMP.Model()
1222 @param stat_file: either 1) a list or 2) a single stat file names (either rmfs or ascii, or pickled data or pickled cluster), 3) a dictionary containing an rmf/ascii
1223 stat file name as key and a list of frames as values
1224 @param number_best_scoring_models:
1225 @param StatHierarchyHandler: copy constructor input object
1226 @param cache: cache coordinates and rigid body transformations.
1229 if not StatHierarchyHandler
is None:
1232 self.model=StatHierarchyHandler.model
1233 self.data=StatHierarchyHandler.data
1234 self.number_best_scoring_models=StatHierarchyHandler.number_best_scoring_models
1236 self.current_rmf=StatHierarchyHandler.current_rmf
1237 self.current_frame=
None
1238 self.current_index=
None
1239 self.score_threshold=StatHierarchyHandler.score_threshold
1240 self.score_key=StatHierarchyHandler.score_key
1241 self.cache=StatHierarchyHandler.cache
1242 RMFHierarchyHandler.__init__(self, self.model,self.current_rmf)
1244 self.cache=CacheHierarchyCoordinates(self)
1253 self.number_best_scoring_models=number_best_scoring_models
1256 if score_key
is None:
1257 self.score_key=
"Total_Score"
1259 self.score_key=score_key
1261 self.current_rmf=
None
1262 self.current_frame=
None
1263 self.current_index=
None
1264 self.score_threshold=
None
1266 if isinstance(stat_file, str):
1267 self.add_stat_file(stat_file)
1268 elif isinstance(stat_file, list):
1270 self.add_stat_file(f)
1272 def add_stat_file(self,stat_file):
1274 '''check that it is not a pickle file with saved data from a previous calculation'''
1275 self.load_data(stat_file)
1277 if self.number_best_scoring_models:
1278 scores = self.get_scores()
1279 max_score = sorted(scores)[0:min(len(self), self.number_best_scoring_models)][-1]
1280 self.do_filter_by_score(max_score)
1282 except pickle.UnpicklingError:
1283 '''alternatively read the ascii stat files'''
1285 scores,rmf_files,rmf_frame_indexes,features = self.get_info_from_stat_file(stat_file, self.score_threshold)
1286 except (KeyError, SyntaxError):
1290 rh = RMF.open_rmf_file_read_only(stat_file)
1291 nframes = rh.get_number_of_frames()
1292 scores=[0.0]*nframes
1293 rmf_files=[stat_file]*nframes
1294 rmf_frame_indexes=range(nframes)
1300 if len(set(rmf_files)) > 1:
1301 raise (
"Multiple RMF files found")
1304 print(
"StatHierarchyHandler: Error: Trying to set none as rmf_file (probably empty stat file), aborting")
1307 for n,index
in enumerate(rmf_frame_indexes):
1308 featn_dict=dict([(k,features[k][n])
for k
in features])
1311 if self.number_best_scoring_models:
1312 scores=self.get_scores()
1313 max_score=sorted(scores)[0:min(len(self),self.number_best_scoring_models)][-1]
1314 self.do_filter_by_score(max_score)
1316 if not self.is_setup:
1317 RMFHierarchyHandler.__init__(self, self.model,self.get_rmf_names()[0])
1319 self.cache=CacheHierarchyCoordinates(self)
1323 self.current_rmf=self.get_rmf_names()[0]
1327 def save_data(self,filename='data.pkl'):
1328 with open(filename,
'wb')
as fl:
1329 pickle.dump(self.data, fl)
1331 def load_data(self,filename='data.pkl'):
1332 with open(filename,
'rb')
as fl:
1333 data_structure=pickle.load(fl)
1335 if not isinstance(data_structure, list):
1336 raise TypeError(
"%filename should contain a list of IMP.pmi.output.DataEntry or IMP.pmi.output.Cluster" % filename)
1339 self.data=data_structure
1342 for cluster
in data_structure:
1343 nmodels+=len(cluster)
1344 self.data=[
None]*nmodels
1345 for cluster
in data_structure:
1346 for n,data
in enumerate(cluster):
1347 index=cluster.members[n]
1348 self.data[index]=data
1350 raise TypeError(
"%filename should contain a list of IMP.pmi.output.DataEntry or IMP.pmi.output.Cluster" % filename)
1352 def set_frame(self,index):
1353 if self.cache
is not None and self.cache[index]:
1354 self.cache.do_update(index)
1356 nm=self.data[index].rmf_name
1357 fidx=self.data[index].rmf_index
1358 if nm != self.current_rmf:
1361 self.current_frame=-1
1362 if fidx!=self.current_frame:
1363 RMFHierarchyHandler.set_frame(self, fidx)
1364 self.current_frame=fidx
1365 if self.cache
is not None:
1366 self.cache.do_store(index)
1368 self.current_index = index
1370 def __getitem__(self,int_slice_adaptor):
1371 if isinstance(int_slice_adaptor, int):
1372 self.set_frame(int_slice_adaptor)
1373 return self.data[int_slice_adaptor]
1374 elif isinstance(int_slice_adaptor, slice):
1375 return self.__iter__(int_slice_adaptor)
1377 raise TypeError(
"Unknown Type")
1380 return len(self.data)
1382 def __iter__(self,slice_key=None):
1383 if slice_key
is None:
1384 for i
in range(len(self)):
1387 for i
in range(len(self))[slice_key]:
1390 def do_filter_by_score(self,maximum_score):
1391 self.data=[d
for d
in self.data
if d.score<=maximum_score]
1393 def get_scores(self):
1394 return [d.score
for d
in self.data]
1396 def get_feature_series(self,feature_name):
1397 return [d.features[feature_name]
for d
in self.data]
1399 def get_feature_names(self):
1400 return self.data[0].features.keys()
1402 def get_rmf_names(self):
1403 return [d.rmf_name
for d
in self.data]
1405 def get_stat_files_names(self):
1406 return [d.stat_file
for d
in self.data]
1408 def get_rmf_indexes(self):
1409 return [d.rmf_index
for d
in self.data]
1411 def get_info_from_stat_file(self, stat_file, score_threshold=None):
1415 score_key=self.score_key,
1417 rmf_file_key=
"rmf_file",
1418 rmf_file_frame_key=
"rmf_frame_index",
1419 prefiltervalue=score_threshold,
1424 scores = [float(y)
for y
in models[2]]
1425 rmf_files = models[0]
1426 rmf_frame_indexes = models[1]
1428 return scores, rmf_files, rmf_frame_indexes,features
1433 A class to store data associated to a model
1435 def __init__(self,stat_file=None,rmf_name=None,rmf_index=None,score=None,features=None):
1436 self.rmf_name=rmf_name
1437 self.rmf_index=rmf_index
1439 self.features=features
1440 self.stat_file=stat_file
1443 s=
"IMP.pmi.output.DataEntry\n"
1444 s+=
"---- stat file %s \n"%(self.stat_file)
1445 s+=
"---- rmf file %s \n"%(self.rmf_name)
1446 s+=
"---- rmf index %s \n"%(str(self.rmf_index))
1447 s+=
"---- score %s \n"%(str(self.score))
1448 s+=
"---- number of features %s \n"%(str(len(self.features.keys())))
1454 A container for models organized into clusters
1456 def __init__(self,cid=None):
1460 self.center_index=
None
1461 self.members_data={}
1463 def add_member(self,index,data=None):
1464 self.members.append(index)
1465 self.members_data[index]=data
1466 self.average_score=self.compute_score()
1468 def compute_score(self):
1470 score=sum([d.score
for d
in self])/len(self)
1471 except AttributeError:
1476 s=
"IMP.pmi.output.Cluster\n"
1477 s+=
"---- cluster_id %s \n"%str(self.cluster_id)
1478 s+=
"---- precision %s \n"%str(self.precision)
1479 s+=
"---- average score %s \n"%str(self.average_score)
1480 s+=
"---- number of members %s \n"%str(len(self.members))
1481 s+=
"---- center index %s \n"%str(self.center_index)
1484 def __getitem__(self,int_slice_adaptor):
1485 if isinstance(int_slice_adaptor, int):
1486 index=self.members[int_slice_adaptor]
1487 return self.members_data[index]
1488 elif isinstance(int_slice_adaptor, slice):
1489 return self.__iter__(int_slice_adaptor)
1491 raise TypeError(
"Unknown Type")
1494 return len(self.members)
1496 def __iter__(self,slice_key=None):
1497 if slice_key
is None:
1498 for i
in range(len(self)):
1501 for i
in range(len(self))[slice_key]:
1504 def __add__(self, other):
1505 self.members+=other.members
1506 self.members_data.update(other.members_data)
1507 self.average_score=self.compute_score()
1509 self.center_index=
None
1513 def plot_clusters_populations(clusters):
1516 for cluster
in clusters:
1517 indexes.append(cluster.cluster_id)
1518 populations.append(len(cluster))
1520 import matplotlib.pyplot
as plt
1521 fig, ax = plt.subplots()
1522 ax.bar(indexes, populations, 0.5, color=
'r') #, yerr=men_std)
1523 ax.set_ylabel('Population')
1524 ax.set_xlabel((
'Cluster index'))
1527 def plot_clusters_precisions(clusters):
1530 for cluster
in clusters:
1531 indexes.append(cluster.cluster_id)
1533 prec=cluster.precision
1534 print(cluster.cluster_id,prec)
1537 precisions.append(prec)
1539 import matplotlib.pyplot
as plt
1540 fig, ax = plt.subplots()
1541 ax.bar(indexes, precisions, 0.5, color=
'r') #, yerr=men_std)
1542 ax.set_ylabel('Precision [A]')
1543 ax.set_xlabel((
'Cluster index'))
1546 def plot_clusters_scores(clusters):
1549 for cluster
in clusters:
1550 indexes.append(cluster.cluster_id)
1552 for data
in cluster:
1553 values[-1].append(data.score)
1556 valuename=
"Scores", positionname=
"Cluster index", xlabels=
None,scale_plot_length=1.0)
1558 class CrossLinkIdentifierDatabase(object):
1562 def check_key(self,key):
1563 if key
not in self.clidb:
1566 def set_unique_id(self,key,value):
1568 self.clidb[key][
"XLUniqueID"]=str(value)
1570 def set_protein1(self,key,value):
1572 self.clidb[key][
"Protein1"]=str(value)
1574 def set_protein2(self,key,value):
1576 self.clidb[key][
"Protein2"]=str(value)
1578 def set_residue1(self,key,value):
1580 self.clidb[key][
"Residue1"]=int(value)
1582 def set_residue2(self,key,value):
1584 self.clidb[key][
"Residue2"]=int(value)
1586 def set_idscore(self,key,value):
1588 self.clidb[key][
"IDScore"]=float(value)
1590 def set_state(self,key,value):
1592 self.clidb[key][
"State"]=int(value)
1594 def set_sigma1(self,key,value):
1596 self.clidb[key][
"Sigma1"]=str(value)
1598 def set_sigma2(self,key,value):
1600 self.clidb[key][
"Sigma2"]=str(value)
1602 def set_psi(self,key,value):
1604 self.clidb[key][
"Psi"]=str(value)
1606 def get_unique_id(self,key):
1607 return self.clidb[key][
"XLUniqueID"]
1609 def get_protein1(self,key):
1610 return self.clidb[key][
"Protein1"]
1612 def get_protein2(self,key):
1613 return self.clidb[key][
"Protein2"]
1615 def get_residue1(self,key):
1616 return self.clidb[key][
"Residue1"]
1618 def get_residue2(self,key):
1619 return self.clidb[key][
"Residue2"]
1621 def get_idscore(self,key):
1622 return self.clidb[key][
"IDScore"]
1624 def get_state(self,key):
1625 return self.clidb[key][
"State"]
1627 def get_sigma1(self,key):
1628 return self.clidb[key][
"Sigma1"]
1630 def get_sigma2(self,key):
1631 return self.clidb[key][
"Sigma2"]
1633 def get_psi(self,key):
1634 return self.clidb[key][
"Psi"]
1636 def set_float_feature(self,key,value,feature_name):
1638 self.clidb[key][feature_name]=float(value)
1640 def set_int_feature(self,key,value,feature_name):
1642 self.clidb[key][feature_name]=int(value)
1644 def set_string_feature(self,key,value,feature_name):
1646 self.clidb[key][feature_name]=str(value)
1648 def get_feature(self,key,feature_name):
1649 return self.clidb[key][feature_name]
1651 def write(self,filename):
1652 with open(filename,
'wb')
as handle:
1653 pickle.dump(self.clidb,handle)
1655 def load(self,filename):
1656 with open(filename,
'rb')
as handle:
1657 self.clidb=pickle.load(handle)
1660 """Plot the given fields and save a figure as `output`.
1661 The fields generally are extracted from a stat file
1662 using ProcessOutput.get_fields()."""
1663 import matplotlib
as mpl
1665 import matplotlib.pyplot
as plt
1667 plt.rc(
'lines', linewidth=4)
1668 fig, axs = plt.subplots(nrows=len(fields))
1669 fig.set_size_inches(10.5, 5.5 * len(fields))
1674 if framemin
is None:
1676 if framemax
is None:
1677 framemax = len(fields[key])
1678 x = list(range(framemin, framemax))
1679 y = [float(y)
for y
in fields[key][framemin:framemax]]
1682 axs[n].set_title(key, size=
"xx-large")
1683 axs[n].tick_params(labelsize=18, pad=10)
1686 axs.set_title(key, size=
"xx-large")
1687 axs.tick_params(labelsize=18, pad=10)
1691 plt.subplots_adjust(hspace=0.3)
1696 name, values_lists, valuename=
None, bins=40, colors=
None, format=
"png",
1697 reference_xline=
None, yplotrange=
None, xplotrange=
None, normalized=
True,
1699 '''Plot a list of histograms from a value list.
1700 @param name the name of the plot
1701 @param value_lists the list of list of values eg: [[...],[...],[...]]
1702 @param valuename the y-label
1703 @param bins the number of bins
1704 @param colors If None, will use rainbow. Else will use specific list
1705 @param format output format
1706 @param reference_xline plot a reference line parallel to the y-axis
1707 @param yplotrange the range for the y-axis
1708 @param normalized whether the histogram is normalized or not
1709 @param leg_names names for the legend
1712 import matplotlib
as mpl
1714 import matplotlib.pyplot
as plt
1715 import matplotlib.cm
as cm
1716 fig = plt.figure(figsize=(18.0, 9.0))
1719 colors = cm.rainbow(np.linspace(0, 1, len(values_lists)))
1720 for nv,values
in enumerate(values_lists):
1722 if leg_names
is not None:
1727 [float(y)
for y
in values],
1730 density=normalized,histtype=
'step',lw=4,
1734 plt.tick_params(labelsize=12, pad=10)
1735 if valuename
is None:
1736 plt.xlabel(name, size=
"xx-large")
1738 plt.xlabel(valuename, size=
"xx-large")
1739 plt.ylabel(
"Frequency", size=
"xx-large")
1741 if not yplotrange
is None:
1743 if not xplotrange
is None:
1744 plt.xlim(xplotrange)
1748 if not reference_xline
is None:
1755 plt.savefig(name +
"." + format, dpi=150, transparent=
True)
1759 valuename=
"None", positionname=
"None", xlabels=
None,scale_plot_length=1.0):
1761 Plot time series as boxplots.
1762 fields is a list of time series, positions are the x-values
1763 valuename is the y-label, positionname is the x-label
1766 import matplotlib
as mpl
1768 import matplotlib.pyplot
as plt
1769 from matplotlib.patches
import Polygon
1772 fig = plt.figure(figsize=(float(len(positions))*scale_plot_length, 5.0))
1773 fig.canvas.set_window_title(name)
1775 ax1 = fig.add_subplot(111)
1777 plt.subplots_adjust(left=0.1, right=0.990, top=0.95, bottom=0.4)
1779 bps.append(plt.boxplot(values, notch=0, sym=
'', vert=1,
1780 whis=1.5, positions=positions))
1782 plt.setp(bps[-1][
'boxes'], color=
'black', lw=1.5)
1783 plt.setp(bps[-1][
'whiskers'], color=
'black', ls=
":", lw=1.5)
1785 if frequencies
is not None:
1786 for n,v
in enumerate(values):
1787 plist=[positions[n]]*len(v)
1788 ax1.plot(plist, v,
'gx', alpha=0.7, markersize=7)
1791 if not xlabels
is None:
1792 ax1.set_xticklabels(xlabels)
1793 plt.xticks(rotation=90)
1794 plt.xlabel(positionname)
1795 plt.ylabel(valuename)
1797 plt.savefig(name+
".pdf",dpi=150)
1801 def plot_xy_data(x,y,title=None,out_fn=None,display=True,set_plot_yaxis_range=None,
1802 xlabel=
None,ylabel=
None):
1803 import matplotlib
as mpl
1805 import matplotlib.pyplot
as plt
1806 plt.rc(
'lines', linewidth=2)
1808 fig, ax = plt.subplots(nrows=1)
1809 fig.set_size_inches(8,4.5)
1810 if title
is not None:
1811 fig.canvas.set_window_title(title)
1814 ax.plot(x,y,color=
'r')
1815 if set_plot_yaxis_range
is not None:
1816 x1,x2,y1,y2=plt.axis()
1817 y1=set_plot_yaxis_range[0]
1818 y2=set_plot_yaxis_range[1]
1819 plt.axis((x1,x2,y1,y2))
1820 if title
is not None:
1822 if xlabel
is not None:
1823 ax.set_xlabel(xlabel)
1824 if ylabel
is not None:
1825 ax.set_ylabel(ylabel)
1826 if out_fn
is not None:
1827 plt.savefig(out_fn+
".pdf")
1832 def plot_scatter_xy_data(x,y,labelx="None",labely="None",
1833 xmin=
None,xmax=
None,ymin=
None,ymax=
None,
1834 savefile=
False,filename=
"None.eps",alpha=0.75):
1836 import matplotlib
as mpl
1838 import matplotlib.pyplot
as plt
1840 from matplotlib
import rc
1842 rc(
'font',**{
'family':
'sans-serif',
'sans-serif':[
'Helvetica']})
1845 fig, axs = plt.subplots(1)
1849 axs0.set_xlabel(labelx, size=
"xx-large")
1850 axs0.set_ylabel(labely, size=
"xx-large")
1851 axs0.tick_params(labelsize=18, pad=10)
1855 plot2.append(axs0.plot(x, y,
'o', color=
'k',lw=2, ms=0.1, alpha=alpha, c=
"w"))
1864 fig.set_size_inches(8.0, 8.0)
1865 fig.subplots_adjust(left=0.161, right=0.850, top=0.95, bottom=0.11)
1866 if (
not ymin
is None)
and (
not ymax
is None):
1867 axs0.set_ylim(ymin,ymax)
1868 if (
not xmin
is None)
and (
not xmax
is None):
1869 axs0.set_xlim(xmin,xmax)
1873 fig.savefig(filename, dpi=300)
1876 def get_graph_from_hierarchy(hier):
1880 (graph, depth, depth_dict) = recursive_graph(
1881 hier, graph, depth, depth_dict)
1884 node_labels_dict = {}
1886 for key
in depth_dict:
1887 node_size_dict = 10 / depth_dict[key]
1888 if depth_dict[key] < 3:
1889 node_labels_dict[key] = key
1891 node_labels_dict[key] =
""
1892 draw_graph(graph, labels_dict=node_labels_dict)
1895 def recursive_graph(hier, graph, depth, depth_dict):
1898 index = str(hier.get_particle().
get_index())
1899 name1 = nameh +
"|#" + index
1900 depth_dict[name1] = depth
1904 if len(children) == 1
or children
is None:
1906 return (graph, depth, depth_dict)
1910 (graph, depth, depth_dict) = recursive_graph(
1911 c, graph, depth, depth_dict)
1913 index = str(c.get_particle().
get_index())
1914 namec = nameh +
"|#" + index
1915 graph.append((name1, namec))
1918 return (graph, depth, depth_dict)
1921 def draw_graph(graph, labels_dict=None, graph_layout='spring',
1922 node_size=5, node_color=
None, node_alpha=0.3,
1923 node_text_size=11, fixed=
None, pos=
None,
1924 edge_color=
'blue', edge_alpha=0.3, edge_thickness=1,
1926 validation_edges=
None,
1927 text_font=
'sans-serif',
1930 import matplotlib
as mpl
1932 import networkx
as nx
1933 import matplotlib.pyplot
as plt
1934 from math
import sqrt, pi
1940 if isinstance(edge_thickness, list):
1941 for edge,weight
in zip(graph,edge_thickness):
1942 G.add_edge(edge[0], edge[1], weight=weight)
1945 G.add_edge(edge[0], edge[1])
1947 if node_color==
None:
1948 node_color_rgb=(0,0,0)
1949 node_color_hex=
"000000"
1954 for node
in G.nodes():
1955 cctuple=cc.rgb(node_color[node])
1956 tmpcolor_rgb.append((cctuple[0]/255,cctuple[1]/255,cctuple[2]/255))
1957 tmpcolor_hex.append(node_color[node])
1958 node_color_rgb=tmpcolor_rgb
1959 node_color_hex=tmpcolor_hex
1962 if isinstance(node_size, dict):
1964 for node
in G.nodes():
1965 size=sqrt(node_size[node])/pi*10.0
1966 tmpsize.append(size)
1969 for n,node
in enumerate(G.nodes()):
1970 color=node_color_hex[n]
1972 nx.set_node_attributes(G,
"graphics", {node : {
'type':
'ellipse',
'w': size,
'h': size,
'fill':
'#'+color,
'label': node}})
1973 nx.set_node_attributes(G,
"LabelGraphics", {node : {
'type':
'text',
'text':node,
'color':
'#000000',
'visible':
'true'}})
1975 for edge
in G.edges():
1976 nx.set_edge_attributes(G,
"graphics", {edge : {
'width': 1,
'fill':
'#000000'}})
1978 for ve
in validation_edges:
1980 if (ve[0],ve[1])
in G.edges():
1981 print(
"found forward")
1982 nx.set_edge_attributes(G,
"graphics", {ve : {
'width': 1,
'fill':
'#00FF00'}})
1983 elif (ve[1],ve[0])
in G.edges():
1984 print(
"found backward")
1985 nx.set_edge_attributes(G,
"graphics", {(ve[1],ve[0]) : {
'width': 1,
'fill':
'#00FF00'}})
1987 G.add_edge(ve[0], ve[1])
1989 nx.set_edge_attributes(G,
"graphics", {ve : {
'width': 1,
'fill':
'#FF0000'}})
1993 if graph_layout ==
'spring':
1995 graph_pos = nx.spring_layout(G,k=1.0/8.0,fixed=fixed,pos=pos)
1996 elif graph_layout ==
'spectral':
1997 graph_pos = nx.spectral_layout(G)
1998 elif graph_layout ==
'random':
1999 graph_pos = nx.random_layout(G)
2001 graph_pos = nx.shell_layout(G)
2005 nx.draw_networkx_nodes(G, graph_pos, node_size=node_size,
2006 alpha=node_alpha, node_color=node_color_rgb,
2008 nx.draw_networkx_edges(G, graph_pos, width=edge_thickness,
2009 alpha=edge_alpha, edge_color=edge_color)
2010 nx.draw_networkx_labels(
2011 G, graph_pos, labels=labels_dict, font_size=node_text_size,
2012 font_family=text_font)
2014 plt.savefig(out_filename)
2015 nx.write_gml(G,
'out.gml')
2023 from ipyD3
import d3object
2024 from IPython.display
import display
2026 d3 = d3object(width=800,
2031 title=
'Example table with d3js',
2032 desc=
'An example table created created with d3js with data generated with Python.')
2118 [72.0, 60.0, 60.0, 10.0, 120.0, 172.0, 1092.0, 675.0, 408.0, 360.0, 156.0, 100.0]]
2119 data = [list(i)
for i
in zip(*data)]
2120 sRows = [[
'January',
2132 sColumns = [[
'Prod {0}'.format(i)
for i
in range(1, 9)],
2133 [
None,
'',
None,
None,
'Group 1',
None,
None,
'Group 2']]
2134 d3.addSimpleTable(data,
2135 fontSizeCells=[12, ],
2138 sRowsMargins=[5, 50, 0],
2139 sColsMargins=[5, 20, 10],
2142 addOutsideBorders=-1,
2146 html = d3.render(mode=[
'html',
'show'])
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A container for models organized into clusters.
A class for reading stat files (either rmf or ascii v1 and v2)
atom::Hierarchies create_hierarchies(RMF::FileConstHandle fh, Model *m)
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def plot_field_histogram
Plot a list of histograms from a value list.
def plot_fields_box_plots
Plot time series as boxplots.
Utility classes and functions for reading and storing PMI files.
def get_best_models
Given a list of stat files, read them all and find the best models.
A class to store data associated to a model.
void handle_use_deprecated(std::string message)
std::string get_module_version()
Return the version of this module, as a string.
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
Collect statistics from ProcessOutput.get_fields().
def get_fields
Get the desired field names, and return a dictionary.
Warning related to handling of structures.
static bool get_is_setup(Model *m, ParticleIndex pi)
def link_to_rmf
Link to another RMF file.
std::string get_molecule_name_and_copy(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and get the "molname.copynum".
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def init_pdb
Init PDB Writing.
int get_number_of_frames(const ::npctransport_proto::Assignment &config, double time_step)
A decorator for a particle representing an atom.
Base class for capturing a modeling protocol.
void load_frame(RMF::FileConstHandle file, RMF::FrameID frame)
Load the given RMF frame into the state of the linked objects.
A decorator for a particle with x,y,z coordinates.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
Class for easy writing of PDBs, RMFs, and stat files.
void add_geometries(RMF::NodeHandle parent, const display::GeometriesTemp &r)
Add geometries to a given parent node.
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
A decorator for a particle that is part of a rigid body but not rigid.
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Display a segment connecting a pair of particles.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
def get_pdb_names
Get a list of all PDB files being output by this instance.
def get_prot_name_from_particle
Get the protein name from the particle.
class to link stat files to several rmf files
class to allow more advanced handling of RMF files.
void link_hierarchies(RMF::FileConstHandle fh, const atom::Hierarchies &hs)
def plot_fields
Plot the given fields and save a figure as output.
void add_geometry(RMF::FileHandle file, display::Geometry *r)
Add a single geometry to the file.
Store info for a chain of a protein.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
def init_rmf
Initialize an RMF file.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
std::string get_module_version()
Return the version of this module, as a string.
A decorator for a particle with x,y,z coordinates and a radius.