1 """@namespace IMP.pmi.output
2 Classes for writing output files and processing them.
5 from __future__
import print_function, division
18 import cPickle
as pickle
23 """Base class for capturing a modeling protocol.
24 Unlike simple output of model coordinates, a complete
25 protocol includes the input data used, details on the restraints,
26 sampling, and clustering, as well as output models.
27 Use via IMP.pmi.representation.Representation.add_protocol_output()
29 IMP.pmi.topology.System.add_protocol_output() (for PMI 2).
31 @see IMP.pmi.mmcif.ProtocolOutput for a concrete subclass that outputs
40 if t
is tuple
or t
is list:
41 for elt2
in _flatten(elt):
48 """Class for easy writing of PDBs, RMFs, and stat files"""
49 def __init__(self, ascii=True,atomistic=False):
50 self.dictionary_pdbs = {}
51 self.dictionary_rmfs = {}
52 self.dictionary_stats = {}
53 self.dictionary_stats2 = {}
54 self.best_score_list =
None
55 self.nbestscoring =
None
57 self.replica_exchange =
False
61 self.chainids =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
63 self.particle_infos_for_pdb = {}
64 self.atomistic=atomistic
67 def get_pdb_names(self):
68 return list(self.dictionary_pdbs.keys())
70 def get_rmf_names(self):
71 return list(self.dictionary_rmfs.keys())
73 def get_stat_names(self):
74 return list(self.dictionary_stats.keys())
76 def _init_dictchain(self, name, prot):
77 self.dictchain[name] = {}
84 for n,mol
in enumerate(IMP.atom.get_by_type(prot,IMP.atom.MOLECULE_TYPE)):
88 for n, i
in enumerate(self.dictionary_pdbs[name].get_children()):
89 self.dictchain[name][i.get_name()] = self.chainids[n]
93 @param name The PDB filename
94 @param prot The hierarchy to write to this pdb file
95 \note if the PDB name is 'System' then will use Selection to get molecules
97 flpdb = open(name,
'w')
99 self.dictionary_pdbs[name] = prot
100 self._init_dictchain(name, prot)
102 def write_psf(self,filename,name):
103 flpsf=open(filename,
'w')
104 flpsf.write(
"PSF CMAP CHEQ"+
"\n")
105 index_residue_pair_list={}
106 (particle_infos_for_pdb, geometric_center)=self.get_particle_infos_for_pdb_writing(name)
107 nparticles=len(particle_infos_for_pdb)
108 flpsf.write(str(nparticles)+
" !NATOM"+
"\n")
109 for n,p
in enumerate(particle_infos_for_pdb):
114 flpsf.write(
'{0:8d}{1:1s}{2:4s}{3:1s}{4:4s}{5:1s}{6:4s}{7:1s}{8:4s}{9:1s}{10:4s}{11:14.6f}{12:14.6f}{13:8d}{14:14.6f}{15:14.6f}'.format(atom_index,
" ",chain,
" ",str(resid),
" ",
'"'+residue_type.get_string()+
'"',
" ",
"C",
" ",
"C",1.0,0.0,0,0.0,0.0))
117 if chain
not in index_residue_pair_list:
118 index_residue_pair_list[chain]=[(atom_index,resid)]
120 index_residue_pair_list[chain].append((atom_index,resid))
125 for chain
in sorted(index_residue_pair_list.keys()):
127 ls=index_residue_pair_list[chain]
129 ls=sorted(ls, key=
lambda tup: tup[1])
131 indexes=[x[0]
for x
in ls]
134 nbonds=len(indexes_pairs)
135 flpsf.write(str(nbonds)+
" !NBOND: bonds"+
"\n")
137 sublists=[indexes_pairs[i:i+4]
for i
in range(0,len(indexes_pairs),4)]
142 flpsf.write(
'{0:8d}{1:8d}{2:8d}{3:8d}{4:8d}{5:8d}{6:8d}{7:8d}'.format(ip[0][0],ip[0][1],
143 ip[1][0],ip[1][1],ip[2][0],ip[2][1],ip[3][0],ip[3][1]))
145 flpsf.write(
'{0:8d}{1:8d}{2:8d}{3:8d}{4:8d}{5:8d}'.format(ip[0][0],ip[0][1],ip[1][0],
146 ip[1][1],ip[2][0],ip[2][1]))
148 flpsf.write(
'{0:8d}{1:8d}{2:8d}{3:8d}'.format(ip[0][0],ip[0][1],ip[1][0],ip[1][1]))
150 flpsf.write(
'{0:8d}{1:8d}'.format(ip[0][0],ip[0][1]))
153 del particle_infos_for_pdb
158 translate_to_geometric_center=
False,
159 write_all_residues_per_bead=
False):
161 flpdb = open(name,
'a')
163 flpdb = open(name,
'w')
165 (particle_infos_for_pdb,
166 geometric_center) = self.get_particle_infos_for_pdb_writing(name)
168 if not translate_to_geometric_center:
169 geometric_center = (0, 0, 0)
171 for n,tupl
in enumerate(particle_infos_for_pdb):
172 (xyz, atom_type, residue_type,
173 chain_id, residue_index, all_indexes, radius) = tupl
174 if atom_type
is None:
175 atom_type = IMP.atom.AT_CA
176 if ( (write_all_residues_per_bead)
and (all_indexes
is not None) ):
177 for residue_number
in all_indexes:
178 flpdb.write(IMP.atom.get_pdb_string((xyz[0] - geometric_center[0],
179 xyz[1] - geometric_center[1],
180 xyz[2] - geometric_center[2]),
181 n+1, atom_type, residue_type,
182 chain_id, residue_number,
' ',1.00,radius))
184 flpdb.write(IMP.atom.get_pdb_string((xyz[0] - geometric_center[0],
185 xyz[1] - geometric_center[1],
186 xyz[2] - geometric_center[2]),
187 n+1, atom_type, residue_type,
188 chain_id, residue_index,
' ',1.00,radius))
189 flpdb.write(
"ENDMDL\n")
192 del particle_infos_for_pdb
195 """Get the protein name from the particle.
196 This is done by traversing the hierarchy."""
201 p, self.dictchain[name])
203 def get_particle_infos_for_pdb_writing(self, name):
213 particle_infos_for_pdb = []
215 geometric_center = [0, 0, 0]
221 ps =
IMP.atom.Selection(self.dictionary_pdbs[name],resolution=0).get_selected_particles()
225 for n, p
in enumerate(ps):
228 if protname
not in resindexes_dict:
229 resindexes_dict[protname] = []
233 rt = residue.get_residue_type()
234 resind = residue.get_index()
238 geometric_center[0] += xyz[0]
239 geometric_center[1] += xyz[1]
240 geometric_center[2] += xyz[2]
242 particle_infos_for_pdb.append((xyz,
243 atomtype, rt, self.dictchain[name][protname], resind,
None, radius))
244 resindexes_dict[protname].append(resind)
249 resind = residue.get_index()
252 if resind
in resindexes_dict[protname]:
255 resindexes_dict[protname].append(resind)
256 rt = residue.get_residue_type()
259 geometric_center[0] += xyz[0]
260 geometric_center[1] += xyz[1]
261 geometric_center[2] += xyz[2]
263 particle_infos_for_pdb.append((xyz,
None,
264 rt, self.dictchain[name][protname], resind,
None, radius))
268 resind = resindexes[len(resindexes) // 2]
269 if resind
in resindexes_dict[protname]:
272 resindexes_dict[protname].append(resind)
276 geometric_center[0] += xyz[0]
277 geometric_center[1] += xyz[1]
278 geometric_center[2] += xyz[2]
280 particle_infos_for_pdb.append((xyz,
None,
281 rt, self.dictchain[name][protname], resind, resindexes, radius))
287 if len(resindexes) > 0:
288 resind = resindexes[len(resindexes) // 2]
291 geometric_center[0] += xyz[0]
292 geometric_center[1] += xyz[1]
293 geometric_center[2] += xyz[2]
295 particle_infos_for_pdb.append((xyz,
None,
296 rt, self.dictchain[name][protname], resind, resindexes, radius))
299 geometric_center = (geometric_center[0] / atom_count,
300 geometric_center[1] / atom_count,
301 geometric_center[2] / atom_count)
303 particle_infos_for_pdb = sorted(particle_infos_for_pdb, key=operator.itemgetter(3, 4))
305 return (particle_infos_for_pdb, geometric_center)
308 def write_pdbs(self, appendmode=True):
309 for pdb
in self.dictionary_pdbs.keys():
310 self.write_pdb(pdb, appendmode)
312 def init_pdb_best_scoring(self,
316 replica_exchange=
False):
320 self.suffixes.append(suffix)
321 self.replica_exchange = replica_exchange
322 if not self.replica_exchange:
326 self.best_score_list = []
330 self.best_score_file_name =
"best.scores.rex.py"
331 self.best_score_list = []
332 best_score_file = open(self.best_score_file_name,
"w")
333 best_score_file.write(
334 "self.best_score_list=" + str(self.best_score_list))
335 best_score_file.close()
337 self.nbestscoring = nbestscoring
338 for i
in range(self.nbestscoring):
339 name = suffix +
"." + str(i) +
".pdb"
340 flpdb = open(name,
'w')
342 self.dictionary_pdbs[name] = prot
343 self._init_dictchain(name, prot)
345 def write_pdb_best_scoring(self, score):
346 if self.nbestscoring
is None:
347 print(
"Output.write_pdb_best_scoring: init_pdb_best_scoring not run")
350 if self.replica_exchange:
352 exec(open(self.best_score_file_name).read())
354 if len(self.best_score_list) < self.nbestscoring:
355 self.best_score_list.append(score)
356 self.best_score_list.sort()
357 index = self.best_score_list.index(score)
358 for suffix
in self.suffixes:
359 for i
in range(len(self.best_score_list) - 2, index - 1, -1):
360 oldname = suffix +
"." + str(i) +
".pdb"
361 newname = suffix +
"." + str(i + 1) +
".pdb"
363 if os.path.exists(newname):
365 os.rename(oldname, newname)
366 filetoadd = suffix +
"." + str(index) +
".pdb"
367 self.write_pdb(filetoadd, appendmode=
False)
370 if score < self.best_score_list[-1]:
371 self.best_score_list.append(score)
372 self.best_score_list.sort()
373 self.best_score_list.pop(-1)
374 index = self.best_score_list.index(score)
375 for suffix
in self.suffixes:
376 for i
in range(len(self.best_score_list) - 1, index - 1, -1):
377 oldname = suffix +
"." + str(i) +
".pdb"
378 newname = suffix +
"." + str(i + 1) +
".pdb"
379 os.rename(oldname, newname)
380 filenametoremove = suffix + \
381 "." + str(self.nbestscoring) +
".pdb"
382 os.remove(filenametoremove)
383 filetoadd = suffix +
"." + str(index) +
".pdb"
384 self.write_pdb(filetoadd, appendmode=
False)
386 if self.replica_exchange:
388 best_score_file = open(self.best_score_file_name,
"w")
389 best_score_file.write(
390 "self.best_score_list=" + str(self.best_score_list))
391 best_score_file.close()
393 def init_rmf(self, name, hierarchies, rs=None, geometries=None):
394 rh = RMF.create_rmf_file(name)
398 if geometries
is not None:
400 self.dictionary_rmfs[name] = rh
402 def add_restraints_to_rmf(self, name, objectlist):
403 flatobjectlist=_flatten(objectlist)
404 for o
in flatobjectlist:
406 rs = o.get_restraint_for_rmf()
408 rs = o.get_restraint()
410 self.dictionary_rmfs[name],
413 def add_geometries_to_rmf(self, name, objectlist):
415 geos = o.get_geometries()
418 def add_particle_pair_from_restraints_to_rmf(self, name, objectlist):
421 pps = o.get_particle_pairs()
424 self.dictionary_rmfs[name],
427 def write_rmf(self, name):
429 self.dictionary_rmfs[name].flush()
431 def close_rmf(self, name):
432 del self.dictionary_rmfs[name]
434 def write_rmfs(self):
435 for rmf
in self.dictionary_rmfs.keys():
438 def init_stat(self, name, listofobjects):
440 flstat = open(name,
'w')
443 flstat = open(name,
'wb')
447 for l
in listofobjects:
448 if not "get_output" in dir(l):
449 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(l))
450 self.dictionary_stats[name] = listofobjects
452 def set_output_entry(self, key, value):
453 self.initoutput.update({key: value})
455 def write_stat(self, name, appendmode=True):
456 output = self.initoutput
457 for obj
in self.dictionary_stats[name]:
460 dfiltered = dict((k, v)
for k, v
in d.items()
if k[0] !=
"_")
461 output.update(dfiltered)
469 flstat = open(name, writeflag)
470 flstat.write(
"%s \n" % output)
473 flstat = open(name, writeflag +
'b')
474 cPickle.dump(output, flstat, 2)
477 def write_stats(self):
478 for stat
in self.dictionary_stats.keys():
479 self.write_stat(stat)
481 def get_stat(self, name):
483 for obj
in self.dictionary_stats[name]:
484 output.update(obj.get_output())
487 def write_test(self, name, listofobjects):
494 flstat = open(name,
'w')
495 output = self.initoutput
496 for l
in listofobjects:
497 if not "get_test_output" in dir(l)
and not "get_output" in dir(l):
498 raise ValueError(
"Output: object %s doesn't have get_output() or get_test_output() method" % str(l))
499 self.dictionary_stats[name] = listofobjects
501 for obj
in self.dictionary_stats[name]:
503 d = obj.get_test_output()
507 dfiltered = dict((k, v)
for k, v
in d.items()
if k[0] !=
"_")
508 output.update(dfiltered)
512 flstat.write(
"%s \n" % output)
515 def test(self, name, listofobjects, tolerance=1e-5):
516 output = self.initoutput
517 for l
in listofobjects:
518 if not "get_test_output" in dir(l)
and not "get_output" in dir(l):
519 raise ValueError(
"Output: object %s doesn't have get_output() or get_test_output() method" % str(l))
520 for obj
in listofobjects:
522 output.update(obj.get_test_output())
524 output.update(obj.get_output())
529 flstat = open(name,
'r')
533 test_dict = ast.literal_eval(l)
536 old_value = str(test_dict[k])
537 new_value = str(output[k])
545 fold = float(old_value)
546 fnew = float(new_value)
547 diff = abs(fold - fnew)
549 print(
"%s: test failed, old value: %s new value %s; "
550 "diff %f > %f" % (str(k), str(old_value),
551 str(new_value), diff,
552 tolerance), file=sys.stderr)
554 elif test_dict[k] != output[k]:
555 if len(old_value) < 50
and len(new_value) < 50:
556 print(
"%s: test failed, old value: %s new value %s"
557 % (str(k), old_value, new_value), file=sys.stderr)
560 print(
"%s: test failed, omitting results (too long)"
561 % str(k), file=sys.stderr)
565 print(
"%s from old objects (file %s) not in new objects"
566 % (str(k), str(name)), file=sys.stderr)
569 def get_environment_variables(self):
571 return str(os.environ)
573 def get_versions_of_relevant_modules(self):
580 except (ImportError):
584 versions[
"ISD2_VERSION"] = IMP.isd2.get_module_version()
585 except (ImportError):
589 versions[
"ISD_EMXL_VERSION"] = IMP.isd_emxl.get_module_version()
590 except (ImportError):
600 listofsummedobjects=
None):
606 if listofsummedobjects
is None:
607 listofsummedobjects = []
608 if extralabels
is None:
610 flstat = open(name,
'w')
612 stat2_keywords = {
"STAT2HEADER":
"STAT2HEADER"}
613 stat2_keywords.update(
614 {
"STAT2HEADER_ENVIRON": str(self.get_environment_variables())})
615 stat2_keywords.update(
616 {
"STAT2HEADER_IMP_VERSIONS": str(self.get_versions_of_relevant_modules())})
619 for l
in listofobjects:
620 if not "get_output" in dir(l):
621 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(l))
625 dfiltered = dict((k, v)
626 for k, v
in d.items()
if k[0] !=
"_")
627 output.update(dfiltered)
630 for l
in listofsummedobjects:
632 if not "get_output" in dir(t):
633 raise ValueError(
"Output: object %s doesn't have get_output() method" % str(t))
635 if "_TotalScore" not in t.get_output():
636 raise ValueError(
"Output: object %s doesn't have _TotalScore entry to be summed" % str(t))
638 output.update({l[1]: 0.0})
640 for k
in extralabels:
641 output.update({k: 0.0})
643 for n, k
in enumerate(output):
644 stat2_keywords.update({n: k})
645 stat2_inverse.update({k: n})
647 flstat.write(
"%s \n" % stat2_keywords)
649 self.dictionary_stats2[name] = (
655 def write_stat2(self, name, appendmode=True):
657 (listofobjects, stat2_inverse, listofsummedobjects,
658 extralabels) = self.dictionary_stats2[name]
661 for obj
in listofobjects:
662 od = obj.get_output()
663 dfiltered = dict((k, v)
for k, v
in od.items()
if k[0] !=
"_")
665 output.update({stat2_inverse[k]: od[k]})
668 for l
in listofsummedobjects:
672 partial_score += float(d[
"_TotalScore"])
673 output.update({stat2_inverse[l[1]]: str(partial_score)})
676 for k
in extralabels:
677 if k
in self.initoutput:
678 output.update({stat2_inverse[k]: self.initoutput[k]})
680 output.update({stat2_inverse[k]:
"None"})
687 flstat = open(name, writeflag)
688 flstat.write(
"%s \n" % output)
691 def write_stats2(self):
692 for stat
in self.dictionary_stats2.keys():
693 self.write_stat2(stat)
697 """A class for reading stat files"""
698 def __init__(self, filename):
699 self.filename = filename
704 if not self.filename
is None:
705 f = open(self.filename,
"r")
707 raise ValueError(
"No file name provided. Use -h for help")
710 for line
in f.readlines():
711 d = ast.literal_eval(line)
712 self.klist = list(d.keys())
714 if "STAT2HEADER" in self.klist:
717 if "STAT2HEADER" in str(k):
723 for k
in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
725 for k
in sorted(stat2_dict.items(), key=operator.itemgetter(1))]
726 self.invstat2_dict = {}
728 self.invstat2_dict.update({stat2_dict[k]: k})
731 "Please convert to statfile v2.\n")
741 def show_keys(self, ncolumns=2, truncate=65):
742 IMP.pmi.tools.print_multicolumn(self.get_keys(), ncolumns, truncate)
744 def get_fields(self, fields, filtertuple=None, filterout=None, get_every=1):
746 Get the desired field names, and return a dictionary.
747 Namely, "fields" are the queried keys in the stat file (eg. ["Total_Score",...])
748 The returned data structure is a dictionary, where each key is a field and the value
749 is the time series (ie, frame ordered series)
750 of that field (ie, {"Total_Score":[Score_0,Score_1,Score_2,Score_3,...],....} )
752 @param fields (list of strings) queried keys in the stat file (eg. "Total_Score"....)
753 @param filterout specify if you want to "grep" out something from
754 the file, so that it is faster
755 @param filtertuple a tuple that contains
756 ("TheKeyToBeFiltered",relationship,value)
757 where relationship = "<", "==", or ">"
758 @param get_every only read every Nth line from the file
766 f = open(self.filename,
"r")
769 for line
in f.readlines():
770 if not filterout
is None:
771 if filterout
in line:
775 if line_number % get_every != 0:
780 d = ast.literal_eval(line)
782 print(
"# Warning: skipped line number " + str(line_number) +
" not a valid line")
787 if not filtertuple
is None:
788 keytobefiltered = filtertuple[0]
789 relationship = filtertuple[1]
790 value = filtertuple[2]
791 if relationship ==
"<":
792 if float(d[keytobefiltered]) >= value:
794 if relationship ==
">":
795 if float(d[keytobefiltered]) <= value:
797 if relationship ==
"==":
798 if float(d[keytobefiltered]) != value:
800 [outdict[field].append(d[field])
for field
in fields]
806 if not filtertuple
is None:
807 keytobefiltered = filtertuple[0]
808 relationship = filtertuple[1]
809 value = filtertuple[2]
810 if relationship ==
"<":
811 if float(d[self.invstat2_dict[keytobefiltered]]) >= value:
813 if relationship ==
">":
814 if float(d[self.invstat2_dict[keytobefiltered]]) <= value:
816 if relationship ==
"==":
817 if float(d[self.invstat2_dict[keytobefiltered]]) != value:
820 [outdict[field].append(d[self.invstat2_dict[field]])
827 class CrossLinkIdentifierDatabase(object):
831 def check_key(self,key):
832 if key
not in self.clidb:
835 def set_unique_id(self,key,value):
837 self.clidb[key][
"XLUniqueID"]=str(value)
839 def set_protein1(self,key,value):
841 self.clidb[key][
"Protein1"]=str(value)
843 def set_protein2(self,key,value):
845 self.clidb[key][
"Protein2"]=str(value)
847 def set_residue1(self,key,value):
849 self.clidb[key][
"Residue1"]=int(value)
851 def set_residue2(self,key,value):
853 self.clidb[key][
"Residue2"]=int(value)
855 def set_idscore(self,key,value):
857 self.clidb[key][
"IDScore"]=float(value)
859 def set_state(self,key,value):
861 self.clidb[key][
"State"]=int(value)
863 def set_sigma1(self,key,value):
865 self.clidb[key][
"Sigma1"]=str(value)
867 def set_sigma2(self,key,value):
869 self.clidb[key][
"Sigma2"]=str(value)
871 def set_psi(self,key,value):
873 self.clidb[key][
"Psi"]=str(value)
875 def get_unique_id(self,key):
876 return self.clidb[key][
"XLUniqueID"]
878 def get_protein1(self,key):
879 return self.clidb[key][
"Protein1"]
881 def get_protein2(self,key):
882 return self.clidb[key][
"Protein2"]
884 def get_residue1(self,key):
885 return self.clidb[key][
"Residue1"]
887 def get_residue2(self,key):
888 return self.clidb[key][
"Residue2"]
890 def get_idscore(self,key):
891 return self.clidb[key][
"IDScore"]
893 def get_state(self,key):
894 return self.clidb[key][
"State"]
896 def get_sigma1(self,key):
897 return self.clidb[key][
"Sigma1"]
899 def get_sigma2(self,key):
900 return self.clidb[key][
"Sigma2"]
902 def get_psi(self,key):
903 return self.clidb[key][
"Psi"]
905 def set_float_feature(self,key,value,feature_name):
907 self.clidb[key][feature_name]=float(value)
909 def set_int_feature(self,key,value,feature_name):
911 self.clidb[key][feature_name]=int(value)
913 def set_string_feature(self,key,value,feature_name):
915 self.clidb[key][feature_name]=str(value)
917 def get_feature(self,key,feature_name):
918 return self.clidb[key][feature_name]
920 def write(self,filename):
922 with open(filename,
'wb')
as handle:
923 pickle.dump(self.clidb,handle)
925 def load(self,filename):
927 with open(filename,
'rb')
as handle:
928 self.clidb=pickle.load(handle)
930 def plot_fields(fields, framemin=None, framemax=None):
931 import matplotlib
as mpl
933 import matplotlib.pyplot
as plt
935 plt.rc(
'lines', linewidth=4)
936 fig, axs = plt.subplots(nrows=len(fields))
937 fig.set_size_inches(10.5, 5.5 * len(fields))
938 plt.rc(
'axes', color_cycle=[
'r'])
945 framemax = len(fields[key])
946 x = list(range(framemin, framemax))
947 y = [float(y)
for y
in fields[key][framemin:framemax]]
950 axs[n].set_title(key, size=
"xx-large")
951 axs[n].tick_params(labelsize=18, pad=10)
954 axs.set_title(key, size=
"xx-large")
955 axs.tick_params(labelsize=18, pad=10)
959 plt.subplots_adjust(hspace=0.3)
964 name, values_lists, valuename=
None, bins=40, colors=
None, format=
"png",
965 reference_xline=
None, yplotrange=
None, xplotrange=
None,normalized=
True,
968 '''Plot a list of histograms from a value list.
969 @param name the name of the plot
970 @param value_lists the list of list of values eg: [[...],[...],[...]]
971 @param valuename the y-label
972 @param bins the number of bins
973 @param colors If None, will use rainbow. Else will use specific list
974 @param format output format
975 @param reference_xline plot a reference line parallel to the y-axis
976 @param yplotrange the range for the y-axis
977 @param normalized whether the histogram is normalized or not
978 @param leg_names names for the legend
981 import matplotlib
as mpl
983 import matplotlib.pyplot
as plt
984 import matplotlib.cm
as cm
985 fig = plt.figure(figsize=(18.0, 9.0))
988 colors = cm.rainbow(np.linspace(0, 1, len(values_lists)))
989 for nv,values
in enumerate(values_lists):
991 if leg_names
is not None:
996 [float(y)
for y
in values],
999 normed=normalized,histtype=
'step',lw=4,
1003 plt.tick_params(labelsize=12, pad=10)
1004 if valuename
is None:
1005 plt.xlabel(name, size=
"xx-large")
1007 plt.xlabel(valuename, size=
"xx-large")
1008 plt.ylabel(
"Frequency", size=
"xx-large")
1010 if not yplotrange
is None:
1012 if not xplotrange
is None:
1013 plt.xlim(xplotrange)
1017 if not reference_xline
is None:
1024 plt.savefig(name +
"." + format, dpi=150, transparent=
True)
1029 valuename=
"None", positionname=
"None", xlabels=
None,scale_plot_length=1.0):
1031 Plot time series as boxplots.
1032 fields is a list of time series, positions are the x-values
1033 valuename is the y-label, positionname is the x-label
1036 import matplotlib
as mpl
1038 import matplotlib.pyplot
as plt
1039 from matplotlib.patches
import Polygon
1042 fig = plt.figure(figsize=(float(len(positions))*scale_plot_length, 5.0))
1043 fig.canvas.set_window_title(name)
1045 ax1 = fig.add_subplot(111)
1047 plt.subplots_adjust(left=0.1, right=0.990, top=0.95, bottom=0.4)
1049 bps.append(plt.boxplot(values, notch=0, sym=
'', vert=1,
1050 whis=1.5, positions=positions))
1052 plt.setp(bps[-1][
'boxes'], color=
'black', lw=1.5)
1053 plt.setp(bps[-1][
'whiskers'], color=
'black', ls=
":", lw=1.5)
1055 if frequencies
is not None:
1056 for n,v
in enumerate(values):
1057 plist=[positions[n]]*len(v)
1058 ax1.plot(plist, v,
'gx', alpha=0.7, markersize=7)
1061 if not xlabels
is None:
1062 ax1.set_xticklabels(xlabels)
1063 plt.xticks(rotation=90)
1064 plt.xlabel(positionname)
1065 plt.ylabel(valuename)
1067 plt.savefig(name+
".pdf",dpi=150)
1071 def plot_xy_data(x,y,title=None,out_fn=None,display=True,set_plot_yaxis_range=None,
1072 xlabel=
None,ylabel=
None):
1073 import matplotlib
as mpl
1075 import matplotlib.pyplot
as plt
1076 plt.rc(
'lines', linewidth=2)
1078 fig, ax = plt.subplots(nrows=1)
1079 fig.set_size_inches(8,4.5)
1080 if title
is not None:
1081 fig.canvas.set_window_title(title)
1084 ax.plot(x,y,color=
'r')
1085 if set_plot_yaxis_range
is not None:
1086 x1,x2,y1,y2=plt.axis()
1087 y1=set_plot_yaxis_range[0]
1088 y2=set_plot_yaxis_range[1]
1089 plt.axis((x1,x2,y1,y2))
1090 if title
is not None:
1092 if xlabel
is not None:
1093 ax.set_xlabel(xlabel)
1094 if ylabel
is not None:
1095 ax.set_ylabel(ylabel)
1096 if out_fn
is not None:
1097 plt.savefig(out_fn+
".pdf")
1102 def plot_scatter_xy_data(x,y,labelx="None",labely="None",
1103 xmin=
None,xmax=
None,ymin=
None,ymax=
None,
1104 savefile=
False,filename=
"None.eps",alpha=0.75):
1106 import matplotlib
as mpl
1108 import matplotlib.pyplot
as plt
1110 from matplotlib
import rc
1112 rc(
'font',**{
'family':
'sans-serif',
'sans-serif':[
'Helvetica']})
1115 fig, axs = plt.subplots(1)
1119 axs0.set_xlabel(labelx, size=
"xx-large")
1120 axs0.set_ylabel(labely, size=
"xx-large")
1121 axs0.tick_params(labelsize=18, pad=10)
1125 plot2.append(axs0.plot(x, y,
'o', color=
'k',lw=2, ms=0.1, alpha=alpha, c=
"w"))
1134 fig.set_size_inches(8.0, 8.0)
1135 fig.subplots_adjust(left=0.161, right=0.850, top=0.95, bottom=0.11)
1136 if (
not ymin
is None)
and (
not ymax
is None):
1137 axs0.set_ylim(ymin,ymax)
1138 if (
not xmin
is None)
and (
not xmax
is None):
1139 axs0.set_xlim(xmin,xmax)
1143 fig.savefig(filename, dpi=300)
1146 def get_graph_from_hierarchy(hier):
1150 (graph, depth, depth_dict) = recursive_graph(
1151 hier, graph, depth, depth_dict)
1154 node_labels_dict = {}
1156 for key
in depth_dict:
1157 node_size_dict = 10 / depth_dict[key]
1158 if depth_dict[key] < 3:
1159 node_labels_dict[key] = key
1161 node_labels_dict[key] =
""
1162 draw_graph(graph, labels_dict=node_labels_dict)
1165 def recursive_graph(hier, graph, depth, depth_dict):
1168 index = str(hier.get_particle().
get_index())
1169 name1 = nameh +
"|#" + index
1170 depth_dict[name1] = depth
1174 if len(children) == 1
or children
is None:
1176 return (graph, depth, depth_dict)
1180 (graph, depth, depth_dict) = recursive_graph(
1181 c, graph, depth, depth_dict)
1183 index = str(c.get_particle().
get_index())
1184 namec = nameh +
"|#" + index
1185 graph.append((name1, namec))
1188 return (graph, depth, depth_dict)
1191 def draw_graph(graph, labels_dict=None, graph_layout='spring',
1192 node_size=5, node_color=
None, node_alpha=0.3,
1193 node_text_size=11, fixed=
None, pos=
None,
1194 edge_color=
'blue', edge_alpha=0.3, edge_thickness=1,
1196 validation_edges=
None,
1197 text_font=
'sans-serif',
1200 import matplotlib
as mpl
1202 import networkx
as nx
1203 import matplotlib.pyplot
as plt
1204 from math
import sqrt, pi
1210 if type(edge_thickness)
is list:
1211 for edge,weight
in zip(graph,edge_thickness):
1212 G.add_edge(edge[0], edge[1], weight=weight)
1215 G.add_edge(edge[0], edge[1])
1217 if node_color==
None:
1218 node_color_rgb=(0,0,0)
1219 node_color_hex=
"000000"
1224 for node
in G.nodes():
1225 cctuple=cc.rgb(node_color[node])
1226 tmpcolor_rgb.append((cctuple[0]/255,cctuple[1]/255,cctuple[2]/255))
1227 tmpcolor_hex.append(node_color[node])
1228 node_color_rgb=tmpcolor_rgb
1229 node_color_hex=tmpcolor_hex
1232 if type(node_size)
is dict:
1234 for node
in G.nodes():
1235 size=sqrt(node_size[node])/pi*10.0
1236 tmpsize.append(size)
1239 for n,node
in enumerate(G.nodes()):
1240 color=node_color_hex[n]
1242 nx.set_node_attributes(G,
"graphics", {node : {
'type':
'ellipse',
'w': size,
'h': size,
'fill':
'#'+color,
'label': node}})
1243 nx.set_node_attributes(G,
"LabelGraphics", {node : {
'type':
'text',
'text':node,
'color':
'#000000',
'visible':
'true'}})
1245 for edge
in G.edges():
1246 nx.set_edge_attributes(G,
"graphics", {edge : {
'width': 1,
'fill':
'#000000'}})
1248 for ve
in validation_edges:
1250 if (ve[0],ve[1])
in G.edges():
1251 print(
"found forward")
1252 nx.set_edge_attributes(G,
"graphics", {ve : {
'width': 1,
'fill':
'#00FF00'}})
1253 elif (ve[1],ve[0])
in G.edges():
1254 print(
"found backward")
1255 nx.set_edge_attributes(G,
"graphics", {(ve[1],ve[0]) : {
'width': 1,
'fill':
'#00FF00'}})
1257 G.add_edge(ve[0], ve[1])
1259 nx.set_edge_attributes(G,
"graphics", {ve : {
'width': 1,
'fill':
'#FF0000'}})
1263 if graph_layout ==
'spring':
1265 graph_pos = nx.spring_layout(G,k=1.0/8.0,fixed=fixed,pos=pos)
1266 elif graph_layout ==
'spectral':
1267 graph_pos = nx.spectral_layout(G)
1268 elif graph_layout ==
'random':
1269 graph_pos = nx.random_layout(G)
1271 graph_pos = nx.shell_layout(G)
1275 nx.draw_networkx_nodes(G, graph_pos, node_size=node_size,
1276 alpha=node_alpha, node_color=node_color_rgb,
1278 nx.draw_networkx_edges(G, graph_pos, width=edge_thickness,
1279 alpha=edge_alpha, edge_color=edge_color)
1280 nx.draw_networkx_labels(
1281 G, graph_pos, labels=labels_dict, font_size=node_text_size,
1282 font_family=text_font)
1284 plt.savefig(out_filename)
1285 nx.write_gml(G,
'out.gml')
1293 from ipyD3
import d3object
1294 from IPython.display
import display
1296 d3 = d3object(width=800,
1301 title=
'Example table with d3js',
1302 desc=
'An example table created created with d3js with data generated with Python.')
1388 [72.0, 60.0, 60.0, 10.0, 120.0, 172.0, 1092.0, 675.0, 408.0, 360.0, 156.0, 100.0]]
1389 data = [list(i)
for i
in zip(*data)]
1390 sRows = [[
'January',
1402 sColumns = [[
'Prod {0}'.format(i)
for i
in range(1, 9)],
1403 [
None,
'',
None,
None,
'Group 1',
None,
None,
'Group 2']]
1404 d3.addSimpleTable(data,
1405 fontSizeCells=[12, ],
1408 sRowsMargins=[5, 50, 0],
1409 sColsMargins=[5, 20, 10],
1412 addOutsideBorders=-1,
1416 html = d3.render(mode=[
'html',
'show'])
static bool get_is_setup(const IMP::ParticleAdaptor &p)
A class for reading stat files.
RMF::FrameID save_frame(RMF::FileHandle file, std::string name="")
Save the current state of the linked objects as a new RMF frame.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
def plot_field_histogram
Plot a list of histograms from a value list.
def plot_fields_box_plots
Plot time series as boxplots.
void handle_use_deprecated(std::string message)
std::string get_module_version()
void write_pdb(const Selection &mhd, TextOutput out, unsigned int model=1)
def get_fields
Get the desired field names, and return a dictionary.
static bool get_is_setup(Model *m, ParticleIndex pi)
std::string get_molecule_name_and_copy(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and get the "molname.copynum".
The standard decorator for manipulating molecular structures.
Ints get_index(const ParticlesTemp &particles, const Subset &subset, const Subsets &excluded)
def init_pdb
Init PDB Writing.
A decorator for a particle representing an atom.
Base class for capturing a modeling protocol.
A decorator for a particle with x,y,z coordinates.
void add_hierarchies(RMF::NodeHandle fh, const atom::Hierarchies &hs)
Class for easy writing of PDBs, RMFs, and stat files.
void add_geometries(RMF::NodeHandle parent, const display::GeometriesTemp &r)
Add geometries to a given parent node.
void add_restraints(RMF::NodeHandle fh, const Restraints &hs)
bool get_is_canonical(atom::Hierarchy h)
Walk up a PMI2 hierarchy/representations and check if the root is named System.
Display a segment connecting a pair of particles.
A decorator for a residue.
Basic functionality that is expected to be used by a wide variety of IMP users.
def get_prot_name_from_particle
Get the protein name from the particle.
void add_geometry(RMF::FileHandle file, display::Geometry *r)
Add a single geometry to the file.
Store info for a chain of a protein.
Python classes to represent, score, sample and analyze models.
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
std::string get_module_version()
A decorator for a particle with x,y,z coordinates and a radius.