IMP logo
IMP Reference Guide  2.20.1
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 from __future__ import print_function
6 #
7 # The Inferential Structure Determination (ISD) software library
8 #
9 # Authors: Michael Habeck and Wolfgang Rieping
10 #
11 # Copyright (C) Michael Habeck and Wolfgang Rieping
12 #
13 # All rights reserved.
14 #
15 # NO WARRANTY. This library is provided 'as is' without warranty of any
16 # kind, expressed or implied, including, but not limited to the implied
17 # warranties of merchantability and fitness for a particular purpose or
18 # a warranty of non-infringement.
19 #
20 # Distribution of substantively modified versions of this module is
21 # prohibited without the explicit permission of the copyright holders.
22 #
23 
24 import atexit
25 import sys
26 import time
27 import os
28 import os.path
29 import socket
30 
31 
32 try:
33  from queue import Queue # python3
34 except ImportError:
35  from Queue import Queue # python2
36 from threading import Thread
37 
38 debug = False
39 
40 code = {
41  'A': 'ALA',
42  'R': 'ARG',
43  'N': 'ASN',
44  'D': 'ASP',
45  'C': 'CYS',
46  'E': 'GLU',
47  'Q': 'GLN',
48  'G': 'GLY',
49  'H': 'HIS',
50  'I': 'ILE',
51  'L': 'LEU',
52  'K': 'LYS',
53  'M': 'MET',
54  'F': 'PHE',
55  'P': 'PRO',
56  'S': 'SER',
57  'T': 'THR',
58  'W': 'TRP',
59  'Y': 'TYR',
60  'V': 'VAL'
61 }
62 
63 
64 def average(x):
65  return sum(x) / float(len(x))
66 
67 
68 def atexit_register(*args):
69 
70  atexit.register(*args)
71 
72 
73 def atexit_unregister(func):
74 
75  exit_funcs = [x[0] for x in atexit._exithandlers]
76 
77  try:
78  i = exit_funcs.index(func)
79  except ValueError:
80  return
81 
82  atexit._exithandlers.pop(i)
83 
84 
85 class WatchDog(Thread):
86 
87  def __init__(self, timeout, debug=False, logfile=None):
88  """
89  timeout: in minutes.
90  """
91 
92  Thread.__init__(self)
93 
94  self.timeout = timeout * 60.
95  self.debug = debug
96  self._last_ping = None
97  self._stop = False
98 
99  if logfile is not None:
100  logfile = os.path.expanduser(logfile)
101 
102  self.logfile = logfile
103 
104  self.setDaemon(True)
105 
106  def stop(self):
107  self._stop = True
108 
109  def set(self, x):
110  "set the _last_ping variable of the WatchDog instance"
111 
112  if self.debug:
113  print('Watchdog: set(%s) called.' % str(x))
114 
115  self._last_ping = x
116 
117  def run(self):
118  """run the Watchdog thread, which sits in a loop sleeping for
119  timeout/4. at each iteration, and
120  if abs(time() - _last_ping) > timeout, exits.
121  """
122 
123  while not self._stop:
124 
125  if self._last_ping is not None:
126  delta = abs(self._last_ping - time.time())
127  else:
128  delta = None
129 
130  if self.debug:
131 
132  if delta is None:
133  val = 'N/A s'
134  else:
135  val = '%.0f s' % delta
136 
137  print('Watchdog: last life sign %s ago; timeout is %d min(s).'
138  % (val, self.timeout / 60.))
139 
140  if self._last_ping is not None and delta > self.timeout:
141 
142  s = 'No life sign for > %d minute(s)' % (self.timeout / 60.)
143 
144  print(s + ', exiting...')
145 
146  if self.logfile is not None:
147 
148  if os.path.exists(self.logfile):
149  mode = 'a'
150  else:
151  mode = 'w'
152 
153  try:
154  f = open(self.logfile, mode)
155  f.write(
156  s + '; host %s, %s\n' %
157  (socket.gethostname(), time.ctime()))
158  f.close()
159 
160  except IOError:
161  pass
162 
163  if not self.debug:
164  os._exit(0)
165  else:
166  print('Watchdog: keeping Python interpreter alive.')
167  self.stop()
168 
169  time.sleep(self.timeout / 4.)
170 
171 
172 class SpinWheel:
173 
174  symbols = ('-', '/', '|', '\')
175 
176  def __init__(self):
177  self.state = 0
178 
179  def update(self, s=''):
180  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
181  sys.stdout.flush()
182 
183  self.state = (self.state + 1) % len(self.symbols)
184 
185 
186 class Pipe(object):
187 
188  """implements a FIFO pipe that merges lists (see self.put)"""
189 
190  def __init__(self, length=-1):
191 
192  self.length = length
193  self.pipe = []
194 
195  def put(self, x):
196  """If x is subscriptable, insert its contents at the beginning of
197  the pipe. Else insert the element itself.
198  If the pipe is full, drop the oldest element.
199  """
200 
201  try:
202  x[0]
203  self.pipe = list(x) + self.pipe
204 
205  except TypeError:
206  self.pipe.insert(0, x)
207 
208  if self.length > 0 and len(self.pipe) > self.length:
209  self.pipe = self.pipe[:-1]
210 
211  def append(self, x):
212  """x must be a list and will be appended to the end of the pipe,
213  dropping rightmost elements if necessary
214  """
215 
216  self.pipe = (list(x) + self.pipe)[:self.length]
217 
218  def get(self):
219  """returns the oldest element, without popping it out of the pipe.
220  Popping occurs in the put() method
221  """
222  return self.pipe[-1]
223 
224  def __getitem__(self, index):
225  return self.pipe.__getitem__(index)
226 
227  def __len__(self):
228  return len(self.pipe)
229 
230  def __str__(self):
231  return str(self.pipe)
232 
233  def is_full(self):
234  return len(self.pipe) == self.length
235 
236  __repr__ = __str__
237 
238 
239 class SortedQueue(Queue):
240 
241  def sort(self):
242 
243  from numpy.oldnumeric import array
244  from Isd.misc.mathutils import average
245 
246  self.queue.sort(lambda a, b: cmp(average(a.time), average(b.time)))
247 
248  self.times = array([average(x.time) for x in self.queue])
249 
250  def _put(self, item):
251 
252  Queue._put(self, item)
253  self.sort()
254 
255  def _get(self):
256 
257  from numpy.oldnumeric import power
258  from Isd.misc.mathutils import draw_dirichlet, rescale_uniform
259 
260  # compute "probabilities"
261 
262  p = 1. - rescale_uniform(self.times)
263  p = power(p, 2.)
264 
265  index = draw_dirichlet(p)
266 
267  val = self.queue[index]
268 
269  self.queue = self.queue[:index] + self.queue[index + 1:]
270 
271  if len(self.queue):
272  self.sort()
273 
274  return val
275 
276 
277 def load_pdb(filename):
278 
279  import os
280 
281  from Scientific.IO.PDB import Structure
282 
283  return Structure(os.path.expanduser(filename))
284 
285 
286 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
287 
288  from glob import glob
289  from shutil import copyfile
290  import os
291 
292  if pattern is None:
293  pattern = '*'
294 
295  file_list = glob(os.path.join(src_path, pattern))
296 
297  for f in file_list:
298  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
299 
300  if verbose:
301  print(f)
302 
303 
304 def touch(filename):
305 
306  try:
307  f = open(filename, 'w')
308  f.close()
309 
310  except IOError as error:
311  import os
312  if os.path.isdir(filename):
313  pass
314  else:
315  raise IOError(error)
316 
317 # Yannick
318 
319 
320 def read_sequence_file(filename, first_residue_number=1):
321  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
322  no:3-letter code. Fails on unknown amino acids.
323  """
324 
325  filename = os.path.abspath(filename)
326  try:
327  f = open(filename)
328  except IOError:
329  raise IOError('Could not open sequence file "%s".' % filename)
330  seq = f.read().upper()
331 
332  if seq.startswith('>'):
333  print("Detected FASTA 1-letter sequence")
334  pos = seq.find('\n')
335  # get rid of first line and get sequence in one line
336  seq = ''.join(seq[pos + 1:].split())
337  names = [code[i] for i in seq]
338  numbers = list(range(first_residue_number,
339  first_residue_number + len(seq)))
340  return dict(list(zip(numbers, names)))
341  else:
342  spl = seq.split()
343  for x in spl:
344  if x not in code.values():
345  print('Warning: unknown 3-letter code: %s' % x)
346  numbers = list(range(first_residue_number,
347  first_residue_number + len(spl)))
348  return dict(list(zip(numbers, spl)))
349 
350 # Yannick
351 
352 
353 def check_residue(a, b):
354  """checks whether residue codes a and b are the same, doing necessary
355  conversions"""
356  a = a.upper()
357  b = b.upper()
358  if len(a) == 1:
359  if a not in code:
360  print('Warning: unknown 1-letter code: %s' % a)
361  return False
362  a = code[a]
363  if len(b) == 1:
364  if b not in code:
365  print('Warning: unknown 1-letter code: %s' % b)
366  return False
367  b = code[b]
368  if len(a) != 3:
369  print('Unknown residue code %s' % a)
370  return False
371  if len(b) != 3:
372  print('Unknown residue code %s' % b)
373  return False
374  if a != b:
375  print('Residues %s and %s are not the same' % (a, b))
376  return False
377  else:
378  return True
379 
380 
381 def my_glob(x, do_touch=False):
382 
383  from glob import glob
384 
385  if do_touch:
386 
387  import os
388 
389  path, name = os.path.split(x)
390 
391  # os.system('touch %s' % path) #this is very inefficient
392  touch(path) # this is better (4x to 6x faster)
393 
394  return glob(x)
395 
396 
397 def Dump(this, filename, gzip=0, mode='w', bin=1):
398  """
399  Dump(this, filename, gzip = 0)
400  Supports also '~' or '~user'.
401  """
402 
403  import os
404  try:
405  import cPickle as pickle
406  except ImportError:
407  import pickle
408 
409  filename = os.path.expanduser(filename)
410 
411  if mode not in ['w', 'a']:
412  raise ValueError("mode has to be 'w' (write) or 'a' (append)")
413 
414  if gzip:
415  import gzip
416  f = gzip.GzipFile(filename, mode)
417  else:
418  f = open(filename, mode)
419 
420  pickle.dump(this, f, bin)
421 
422  f.close()
423 
424 
425 def Load(filename, gzip=0, force=0):
426  """
427  Load(filename, gzip=0, force=0)
428 
429  force: returns all objects that could be unpickled. Useful
430  when unpickling of sequential objects fails at some point.
431  """
432  import pickle
433  import os
434 
435  filename = os.path.expanduser(filename)
436 
437  if gzip:
438  import gzip
439  try:
440  f = gzip.GzipFile(filename)
441  except IOError:
442  return
443 
444  f = open(filename, 'rb')
445 
446  objects = None
447 
448  eof = 0
449  n = 0
450 
451  while not eof:
452 
453  try:
454  object = pickle.load(f)
455 
456  if objects is None:
457  objects = object
458 
459  else:
460  objects += object
461 
462  n += 1
463 
464  except EOFError:
465  eof = 1
466 
467  except Exception:
468  print('Could not load chunk %d. Stopped.' % n)
469 
470  if force:
471  eof = 1
472  else:
473  object = pickle.load(f)
474 
475  f.close()
476 
477  return objects
478 
479 
480 def get_pdb(pdb_entry, dest='.', verbose_level=0):
481 
482  import ftplib
483  import os
484 
485  url = 'ftp.ebi.ac.uk'
486  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
487  filename_template = 'pdb%s.ent.gz'
488 
489  dest = os.path.expanduser(dest)
490 
491  ftp = ftplib.FTP(url)
492  ftp.login()
493  ftp.set_debuglevel(verbose_level)
494 
495  ftp.cwd(path)
496 
497  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
498 
499  f = open(filename, 'wb')
500 
501  try:
502  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
503  f.write)
504 
505  f.close()
506 
507  ftp.quit()
508 
509  except ftplib.error_perm:
510  raise IOError('File %s not found on server' % filename)
511 
512  os.system('gunzip -f %s' % filename)
513 
514 
515 def compile_index_list(chain, atom_names, residue_index_list=None):
516 
517  if residue_index_list is None:
518  residue_index_list = list(range(len(chain)))
519 
520  index_list = []
521 
522  names = atom_names
523 
524  index_map = {}
525 
526  i = 0
527 
528  for res_index in residue_index_list:
529 
530  if atom_names is None:
531  names = sorted(chain[res_index].keys())
532 
533  for n in names:
534 
535  if n in chain[res_index]:
536  index = chain[res_index][n].index
537  index_list.append(index)
538  index_map[index] = i
539  i += 1
540 
541  return index_list, index_map
542 
543 
544 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
545  residue_index_list=None, atom_index_list=None):
546 
547  from numpy.oldnumeric import array, take
548 
549  if indices is None:
550  indices = list(range(len(E)))
551 
552  chain = universe.get_polymer()
553 
554  if atom_index_list is None:
555  atom_index_list, index_map = compile_index_list(chain, atom_names,
556  residue_index_list)
557 
558  coord = []
559 
560  for i in indices:
561 
562  chain.set_torsions(E.torsion_angles[i], 1)
563 
564  X = array(take(universe.X, atom_index_list))
565 
566  coord.append(X)
567 
568  return array(coord)
569 
570 
571 def map_angles(angles, period=None):
572  """
573  maps angles into interval [-pi,pi]
574  """
575 
576  from numpy.oldnumeric import fmod, greater, logical_not
577 
578  if period is None:
579  from numpy.oldnumeric import pi as period
580 
581  mask = greater(angles, 0.)
582 
583  return mask * (fmod(angles + period, 2 * period) - period) + \
584  logical_not(mask) * (fmod(angles - period, 2 * period) + period)
585 
586 
587 def remove_from_dict(d, items):
588 
589  for item in items:
590  if item in d:
591  del d[item]
592 
593 
594 def myrange(a, b, n):
595 
596  from numpy.oldnumeric import arange
597 
598  step = (b - a) / (n - 1)
599 
600  x = arange(a, b + step, step)
601 
602  return x[:n]
603 
604 
605 def indent(lines, prefix):
606 
607  tag = ' ' * len(str(prefix))
608 
609  lines[0] = prefix + lines[0]
610  lines = [lines[0]] + list(map(lambda s, t=tag: t + s, lines[1:]))
611 
612  return '\n'.join(lines)
613 
614 
615 def make_block(s, length=80, tol=10):
616  blocks = s.split('\n')
617  spl = []
618  for block in blocks:
619  spl += _make_block(block, length, tol)
620 
621  return spl
622 
623 
624 def _make_block(s, length, tol):
625 
626  spl = s.split(' ')
627  spl = [(w, ' ') for w in spl]
628 
629  words = []
630  for ll in spl:
631  g = ll[0].split('/')
632  g = [w + '/' for w in g]
633  g[-1] = g[-1][:-1] + ' '
634 
635  words += g
636 
637  spl = []
638  line = ''
639 
640  for i in range(len(words)):
641  word = words[i]
642 
643  if len(line + word) <= length:
644  line += word
645 
646  else:
647  if length - len(line) > tol:
648  m = length - len(line)
649  line += word[:m]
650  word = word[m:]
651 
652  if len(line) > 1 and line[0] == ' ' and \
653  line[1] != ' ':
654  line = line[1:]
655 
656  spl.append(line)
657  line = word
658 
659  line = line[:-1]
660  if len(line) > 1 and line[0] == ' ' and \
661  line[1] != ' ':
662  line = line[1:]
663 
664  spl.append(line)
665 
666  return spl
667 
668 
669 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
670  gzip=False, bin=True):
671 
672  try:
673  Dump(x, filename, gzip=gzip, bin=bin)
674 
675  except IOError as msg:
676 
677  import time
678 
679  if err_msg is None:
680  print('IOError: %s' % str(msg))
681 
682  else:
683  if show_io_err:
684  print('%s. %s' % (str(msg), err_msg))
685  else:
686  print(err_msg)
687 
688  while True:
689 
690  # wait for 10 minutes
691 
692  time.sleep(60. * delay)
693 
694  try:
695  Dump(x, filename, gzip=gzip, bin=bin)
696  break
697 
698  except IOError:
699  continue
700 
701 
702 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
703  gzip=False, mode='w', bin=True):
704 
705  import os
706  import tempfile
707 
708  path, _filename = os.path.split(filename)
709 
710  temp_path, temp_filename = os.path.split(tempfile.mktemp())
711  temp_filename = os.path.join(path, temp_filename)
712 
713  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
714  gzip, bin)
715 
716  # if that worked, dump properly
717 
718  if mode == 'w':
719  os.rename(temp_filename, filename)
720 
721  elif mode == 'a':
722  os.unlink(temp_filename)
723  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
724 
725  else:
726  raise Exception('Mode "%s" invalid.' % mode)
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:571
def put
If x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:195
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:186
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:218
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:320
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:211
The general base class for IMP exceptions.
Definition: exception.h:48
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:353