IMP logo
IMP Reference Guide  2.17.0
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 from __future__ import print_function
6 #
7 # The Inferential Structure Determination (ISD) software library
8 #
9 # Authors: Michael Habeck and Wolfgang Rieping
10 #
11 # Copyright (C) Michael Habeck and Wolfgang Rieping
12 #
13 # All rights reserved.
14 #
15 # NO WARRANTY. This library is provided 'as is' without warranty of any
16 # kind, expressed or implied, including, but not limited to the implied
17 # warranties of merchantability and fitness for a particular purpose or
18 # a warranty of non-infringement.
19 #
20 # Distribution of substantively modified versions of this module is
21 # prohibited without the explicit permission of the copyright holders.
22 #
23 
24 import atexit
25 import sys
26 import time
27 import os
28 import os.path
29 import socket
30 
31 
32 try:
33  from queue import Queue # python3
34 except ImportError:
35  from Queue import Queue # python2
36 from threading import Thread
37 
38 debug = False
39 
40 code = {
41  'A': 'ALA',
42  'R': 'ARG',
43  'N': 'ASN',
44  'D': 'ASP',
45  'C': 'CYS',
46  'E': 'GLU',
47  'Q': 'GLN',
48  'G': 'GLY',
49  'H': 'HIS',
50  'I': 'ILE',
51  'L': 'LEU',
52  'K': 'LYS',
53  'M': 'MET',
54  'F': 'PHE',
55  'P': 'PRO',
56  'S': 'SER',
57  'T': 'THR',
58  'W': 'TRP',
59  'Y': 'TYR',
60  'V': 'VAL'
61 }
62 
63 
64 def average(x):
65  return sum(x) / float(len(x))
66 
67 
68 def atexit_register(*args):
69 
70  atexit.register(*args)
71 
72 
73 def atexit_unregister(func):
74 
75  exit_funcs = [x[0] for x in atexit._exithandlers]
76 
77  try:
78  i = exit_funcs.index(func)
79  except:
80  return
81 
82  atexit._exithandlers.pop(i)
83 
84 
85 class WatchDog(Thread):
86 
87  def __init__(self, timeout, debug=False, logfile=None):
88  """
89  timeout: in minutes.
90  """
91 
92  Thread.__init__(self)
93 
94  self.timeout = timeout * 60.
95  self.debug = debug
96  self._last_ping = None
97  self._stop = False
98 
99  if logfile is not None:
100  logfile = os.path.expanduser(logfile)
101 
102  self.logfile = logfile
103 
104  self.setDaemon(True)
105 
106  def stop(self):
107  self._stop = True
108 
109  def set(self, x):
110  "set the _last_ping variable of the WatchDog instance"
111 
112  if self.debug:
113  print('Watchdog: set(%s) called.' % str(x))
114 
115  self._last_ping = x
116 
117  def run(self):
118  """run the Watchdog thread, which sits in a loop sleeping for timeout/4. at
119  each iteration, and if abs(time() - _last_ping) > timeout, exits.
120  """
121 
122  while not self._stop:
123 
124  if self._last_ping is not None:
125  delta = abs(self._last_ping - time.time())
126  else:
127  delta = None
128 
129  if self.debug:
130 
131  if delta is None:
132  val = 'N/A s'
133  else:
134  val = '%.0f s' % delta
135 
136  print('Watchdog: last life sign %s ago; timeout is %d min(s).' % \
137  (val, self.timeout / 60.))
138 
139  if self._last_ping is not None and delta > self.timeout:
140 
141  s = 'No life sign for > %d minute(s)' % (self.timeout / 60.)
142 
143  print(s + ', exiting...')
144 
145  if self.logfile is not None:
146 
147  if os.path.exists(self.logfile):
148  mode = 'a'
149  else:
150  mode = 'w'
151 
152  try:
153  f = open(self.logfile, mode)
154  f.write(
155  s + '; host %s, %s\n' %
156  (socket.gethostname(), time.ctime()))
157  f.close()
158 
159  except IOError:
160  pass
161 
162  if not self.debug:
163  os._exit(0)
164  else:
165  print('Watchdog: keeping Python interpreter alive.')
166  self.stop()
167 
168  time.sleep(self.timeout / 4.)
169 
170 
171 class SpinWheel:
172 
173  symbols = ('-', '/', '|', '\')
174 
175  def __init__(self):
176  self.state = 0
177 
178  def update(self, s=''):
179 
180  import sys
181 
182  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
183  sys.stdout.flush()
184 
185  self.state = (self.state + 1) % len(self.symbols)
186 
187 
188 class Pipe(object):
189 
190  """implements a FIFO pipe that merges lists (see self.put)"""
191 
192  def __init__(self, length=-1):
193 
194  self.length = length
195  self.pipe = []
196 
197  def put(self, x):
198  """if x is subscriptable, insert its contents at the beginning of the pipe.
199  Else insert the element itself.
200  If the pipe is full, drop the oldest element.
201  """
202 
203  try:
204  x[0]
205  self.pipe = list(x) + self.pipe
206 
207  except:
208  self.pipe.insert(0, x)
209 
210  if self.length > 0 and len(self.pipe) > self.length:
211  self.pipe = self.pipe[:-1]
212 
213  def append(self, x):
214  """ x must be a list and will be appended to the end of the pipe, dropping
215  rightmost elements if necessary
216  """
217 
218  self.pipe = (list(x) + self.pipe)[:self.length]
219 
220  def get(self):
221  """returns the oldest element, without popping it out of the pipe.
222  Popping occurs in the put() method
223  """
224  return self.pipe[-1]
225 
226  def __getitem__(self, index):
227  return self.pipe.__getitem__(index)
228 
229  def __len__(self):
230  return len(self.pipe)
231 
232  def __str__(self):
233  return str(self.pipe)
234 
235  def is_full(self):
236  return len(self.pipe) == self.length
237 
238  __repr__ = __str__
239 
240 
241 class SortedQueue(Queue):
242 
243  def sort(self):
244 
245  from numpy.oldnumeric import array
246  from Isd.misc.mathutils import average
247 
248  self.queue.sort(lambda a, b: cmp(average(a.time), average(b.time)))
249 
250  self.times = array([average(x.time) for x in self.queue])
251 
252  def _put(self, item):
253 
254  Queue._put(self, item)
255  self.sort()
256 
257  def _get(self):
258 
259  from numpy.oldnumeric import power
260  from Isd.misc.mathutils import draw_dirichlet, rescale_uniform
261 
262  # compute "probabilities"
263 
264  p = 1. - rescale_uniform(self.times)
265  p = power(p, 2.)
266 
267  index = draw_dirichlet(p)
268 
269  val = self.queue[index]
270 
271  self.queue = self.queue[:index] + self.queue[index + 1:]
272 
273  if len(self.queue):
274  self.sort()
275 
276  return val
277 
278 
279 def load_pdb(filename):
280 
281  import os
282 
283  from Scientific.IO.PDB import Structure
284 
285  return Structure(os.path.expanduser(filename))
286 
287 
288 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
289 
290  from glob import glob
291  from shutil import copyfile
292  import os
293 
294  if pattern is None:
295  pattern = '*'
296 
297  file_list = glob(os.path.join(src_path, pattern))
298 
299  for f in file_list:
300  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
301 
302  if verbose:
303  print(f)
304 
305 
306 def touch(filename):
307 
308  try:
309  f = open(filename, 'w')
310  f.close()
311 
312  except IOError as error:
313  import os
314  if os.path.isdir(filename):
315  pass
316  else:
317  raise IOError(error)
318 
319 # Yannick
320 
321 
322 def read_sequence_file(filename, first_residue_number=1):
323  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
324  no:3-letter code. Fails on unknown amino acids.
325  """
326 
327  filename = os.path.abspath(filename)
328  try:
329  f = open(filename)
330  except IOError as msg:
331  raise IOError('Could not open sequence file "%s".' % filename)
332  seq = f.read().upper()
333 
334  if seq.startswith('>'):
335  print("Detected FASTA 1-letter sequence")
336  pos = seq.find('\n')
337  # get rid of first line and get sequence in one line
338  seq = ''.join(seq[pos + 1:].split())
339  names = [code[i] for i in seq]
340  numbers = list(range(first_residue_number, first_residue_number + len(seq)))
341  return dict(list(zip(numbers, names)))
342  else:
343  l = seq.split()
344  for x in l:
345  if not x in list(code.values()):
346  print('Warning: unknown 3-letter code: %s' % x)
347  numbers = list(range(first_residue_number, first_residue_number + len(l)))
348  return dict(list(zip(numbers, l)))
349 
350 # Yannick
351 
352 
353 def check_residue(a, b):
354  "checks whether residue codes a and b are the same, doing necessary conversions"
355  a = a.upper()
356  b = b.upper()
357  if len(a) == 1:
358  if a not in code:
359  print('Warning: unknown 1-letter code: %s' % a)
360  return False
361  a = code[a]
362  if len(b) == 1:
363  if b not in code:
364  print('Warning: unknown 1-letter code: %s' % b)
365  return False
366  b = code[b]
367  if len(a) != 3:
368  print('Unknown residue code %s' % a)
369  return False
370  if len(b) != 3:
371  print('Unknown residue code %s' % b)
372  return False
373  if a != b:
374  print('Residues %s and %s are not the same' % (a, b))
375  return False
376  else:
377  return True
378 
379 
380 def my_glob(x, do_touch=False):
381 
382  from glob import glob
383 
384  if do_touch:
385 
386  import os
387 
388  path, name = os.path.split(x)
389 
390  # os.system('touch %s' % path) #this is very inefficient
391  touch(path) # this is better (4x to 6x faster)
392 
393  return glob(x)
394 
395 
396 def Dump(this, filename, gzip=0, mode='w', bin=1):
397  """
398  Dump(this, filename, gzip = 0)
399  Supports also '~' or '~user'.
400  """
401 
402  import os
403  try:
404  import cPickle as pickle
405  except ImportError:
406  import pickle
407 
408  filename = os.path.expanduser(filename)
409 
410  if not mode in ['w', 'a']:
411  raise ValueError("mode has to be 'w' (write) or 'a' (append)")
412 
413  if gzip:
414  import gzip
415  f = gzip.GzipFile(filename, mode)
416  else:
417  f = open(filename, mode)
418 
419  pickle.dump(this, f, bin)
420 
421  f.close()
422 
423 
424 def Load(filename, gzip=0, force=0):
425  """
426  Load(filename, gzip=0, force=0)
427 
428  force: returns all objects that could be unpickled. Useful
429  when unpickling of sequential objects fails at some point.
430  """
431  import pickle
432  import os
433 
434  filename = os.path.expanduser(filename)
435 
436  if gzip:
437  import gzip
438  try:
439  f = gzip.GzipFile(filename)
440  except:
441  return
442 
443  f = open(filename, 'rb')
444 
445  objects = None
446 
447  eof = 0
448  n = 0
449 
450  while not eof:
451 
452  try:
453  object = pickle.load(f)
454 
455  if objects is None:
456  objects = object
457 
458  else:
459  objects += object
460 
461  n += 1
462 
463  except EOFError:
464  eof = 1
465 
466  except Exception:
467  print('Could not load chunk %d. Stopped.' % n)
468 
469  if force:
470  eof = 1
471  else:
472  object = pickle.load(f)
473 
474  f.close()
475 
476  return objects
477 
478 
479 def get_pdb(pdb_entry, dest='.', verbose_level=0):
480 
481  import ftplib
482  from tempfile import mktemp
483  import os
484 
485  url = 'ftp.ebi.ac.uk'
486  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
487  filename_template = 'pdb%s.ent.gz'
488 
489  dest = os.path.expanduser(dest)
490 
491  ftp = ftplib.FTP(url)
492  ftp.login()
493  ftp.set_debuglevel(verbose_level)
494 
495  ftp.cwd(path)
496 
497  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
498 
499  f = open(filename, 'wb')
500 
501  try:
502  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
503  f.write)
504 
505  f.close()
506 
507  ftp.quit()
508 
509  except ftplib.error_perm:
510  raise IOError('File %s not found on server' % filename)
511 
512  os.system('gunzip -f %s' % filename)
513 
514 
515 def compile_index_list(chain, atom_names, residue_index_list=None):
516 
517  if residue_index_list is None:
518  residue_index_list = list(range(len(chain)))
519 
520  index_list = []
521 
522  names = atom_names
523 
524  index_map = {}
525 
526  i = 0
527 
528  for res_index in residue_index_list:
529 
530  if atom_names is None:
531  names = sorted(chain[res_index].keys())
532 
533  for n in names:
534 
535  if n in chain[res_index]:
536  index = chain[res_index][n].index
537  index_list.append(index)
538  index_map[index] = i
539  i += 1
540 
541  return index_list, index_map
542 
543 
544 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
545  residue_index_list=None, atom_index_list=None):
546 
547  from numpy.oldnumeric import array, take
548 
549  if indices is None:
550  indices = list(range(len(E)))
551 
552  chain = universe.get_polymer()
553 
554  if atom_index_list is None:
555  atom_index_list, index_map = compile_index_list(chain, atom_names,
556  residue_index_list)
557 
558  l = []
559 
560  for i in indices:
561 
562  chain.set_torsions(E.torsion_angles[i], 1)
563 
564  X = array(take(universe.X, atom_index_list))
565 
566  l.append(X)
567 
568  return array(l)
569 
570 
571 def map_angles(angles, period=None):
572  """
573  maps angles into interval [-pi,pi]
574  """
575 
576  from numpy.oldnumeric import fmod, greater, logical_not
577 
578  if period is None:
579  from numpy.oldnumeric import pi as period
580 
581  mask = greater(angles, 0.)
582 
583  return mask * (fmod(angles + period, 2 * period) - period) + \
584  logical_not(mask) * (fmod(angles - period, 2 * period) + period)
585 
586 
587 def remove_from_dict(d, items):
588 
589  for item in items:
590  if item in d:
591  del d[item]
592 
593 
594 def myrange(a, b, n):
595 
596  from numpy.oldnumeric import arange
597 
598  step = (b - a) / (n - 1)
599 
600  x = arange(a, b + step, step)
601 
602  return x[:n]
603 
604 
605 def indent(lines, prefix):
606 
607  tag = ' ' * len(str(prefix))
608 
609  lines[0] = prefix + lines[0]
610  lines = [lines[0]] + list(map(lambda s, t=tag: t + s, lines[1:]))
611 
612  return '\n'.join(lines)
613 
614 
615 def make_block(s, length=80, tol=10):
616  blocks = s.split('\n')
617  l = []
618  for block in blocks:
619  l += _make_block(block, length, tol)
620 
621  return l
622 
623 
624 def _make_block(s, length, tol):
625 
626  l = s.split(' ')
627  l = [(w, ' ') for w in l]
628 
629  words = []
630  for ll in l:
631  g = ll[0].split('/')
632  g = [w + '/' for w in g]
633  g[-1] = g[-1][:-1] + ' '
634 
635  words += g
636 
637  l = []
638  line = ''
639 
640  for i in range(len(words)):
641  word = words[i]
642 
643  if len(line + word) <= length:
644  line += word
645 
646  else:
647  if length - len(line) > tol:
648  m = length - len(line)
649  line += word[:m]
650  word = word[m:]
651 
652  if len(line) > 1 and line[0] == ' ' and \
653  line[1] != ' ':
654  line = line[1:]
655 
656  l.append(line)
657  line = word
658 
659  line = line[:-1]
660  if len(line) > 1 and line[0] == ' ' and \
661  line[1] != ' ':
662  line = line[1:]
663 
664  l.append(line)
665 
666  return l
667 
668 
669 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
670  gzip=False, bin=True):
671 
672  try:
673  Dump(x, filename, gzip=gzip, bin=bin)
674 
675  except IOError as msg:
676 
677  import time
678 
679  if err_msg is None:
680  print('IOError: %s' % str(msg))
681 
682  else:
683  if show_io_err:
684  print('%s. %s' % (str(msg), err_msg))
685  else:
686  print(err_msg)
687 
688  while True:
689 
690  # wait for 10 minutes
691 
692  time.sleep(60. * delay)
693 
694  try:
695  Dump(x, filename, gzip=gzip, bin=bin)
696  break
697 
698  except IOError:
699  continue
700 
701 
702 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
703  gzip=False, mode='w', bin=True):
704 
705  import os
706  import tempfile
707 
708  path, _filename = os.path.split(filename)
709 
710  temp_path, temp_filename = os.path.split(tempfile.mktemp())
711  temp_filename = os.path.join(path, temp_filename)
712 
713  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
714  gzip, bin)
715 
716  # if that worked, dump properly
717 
718  if mode == 'w':
719  os.rename(temp_filename, filename)
720 
721  elif mode == 'a':
722  os.unlink(temp_filename)
723  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
724 
725  else:
726  raise Exception('Mode "%s" invalid.' % mode)
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:571
def put
if x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:197
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:188
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:220
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:322
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:213
The general base class for IMP exceptions.
Definition: exception.h:48
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:353