IMP logo
IMP Reference Guide  2.21.0
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 from __future__ import print_function
6 #
7 # The Inferential Structure Determination (ISD) software library
8 #
9 # Authors: Michael Habeck and Wolfgang Rieping
10 #
11 # Copyright (C) Michael Habeck and Wolfgang Rieping
12 #
13 # All rights reserved.
14 #
15 # NO WARRANTY. This library is provided 'as is' without warranty of any
16 # kind, expressed or implied, including, but not limited to the implied
17 # warranties of merchantability and fitness for a particular purpose or
18 # a warranty of non-infringement.
19 #
20 # Distribution of substantively modified versions of this module is
21 # prohibited without the explicit permission of the copyright holders.
22 #
23 
24 import atexit
25 import sys
26 import time
27 import os
28 import os.path
29 import socket
30 from threading import Thread
31 
32 debug = False
33 
34 code = {
35  'A': 'ALA',
36  'R': 'ARG',
37  'N': 'ASN',
38  'D': 'ASP',
39  'C': 'CYS',
40  'E': 'GLU',
41  'Q': 'GLN',
42  'G': 'GLY',
43  'H': 'HIS',
44  'I': 'ILE',
45  'L': 'LEU',
46  'K': 'LYS',
47  'M': 'MET',
48  'F': 'PHE',
49  'P': 'PRO',
50  'S': 'SER',
51  'T': 'THR',
52  'W': 'TRP',
53  'Y': 'TYR',
54  'V': 'VAL'
55 }
56 
57 
58 def average(x):
59  return sum(x) / float(len(x))
60 
61 
62 def atexit_register(*args):
63 
64  atexit.register(*args)
65 
66 
67 def atexit_unregister(func):
68 
69  exit_funcs = [x[0] for x in atexit._exithandlers]
70 
71  try:
72  i = exit_funcs.index(func)
73  except ValueError:
74  return
75 
76  atexit._exithandlers.pop(i)
77 
78 
79 class WatchDog(Thread):
80 
81  def __init__(self, timeout, debug=False, logfile=None):
82  """
83  timeout: in minutes.
84  """
85 
86  Thread.__init__(self)
87 
88  self.timeout = timeout * 60.
89  self.debug = debug
90  self._last_ping = None
91  self._stop = False
92 
93  if logfile is not None:
94  logfile = os.path.expanduser(logfile)
95 
96  self.logfile = logfile
97 
98  self.setDaemon(True)
99 
100  def stop(self):
101  self._stop = True
102 
103  def set(self, x):
104  "set the _last_ping variable of the WatchDog instance"
105 
106  if self.debug:
107  print('Watchdog: set(%s) called.' % str(x))
108 
109  self._last_ping = x
110 
111  def run(self):
112  """run the Watchdog thread, which sits in a loop sleeping for
113  timeout/4. at each iteration, and
114  if abs(time() - _last_ping) > timeout, exits.
115  """
116 
117  while not self._stop:
118 
119  if self._last_ping is not None:
120  delta = abs(self._last_ping - time.time())
121  else:
122  delta = None
123 
124  if self.debug:
125 
126  if delta is None:
127  val = 'N/A s'
128  else:
129  val = '%.0f s' % delta
130 
131  print('Watchdog: last life sign %s ago; timeout is %d min(s).'
132  % (val, self.timeout / 60.))
133 
134  if self._last_ping is not None and delta > self.timeout:
135 
136  s = 'No life sign for > %d minute(s)' % (self.timeout / 60.)
137 
138  print(s + ', exiting...')
139 
140  if self.logfile is not None:
141 
142  if os.path.exists(self.logfile):
143  mode = 'a'
144  else:
145  mode = 'w'
146 
147  try:
148  f = open(self.logfile, mode)
149  f.write(
150  s + '; host %s, %s\n' %
151  (socket.gethostname(), time.ctime()))
152  f.close()
153 
154  except IOError:
155  pass
156 
157  if not self.debug:
158  os._exit(0)
159  else:
160  print('Watchdog: keeping Python interpreter alive.')
161  self.stop()
162 
163  time.sleep(self.timeout / 4.)
164 
165 
166 class SpinWheel:
167 
168  symbols = ('-', '/', '|', '\')
169 
170  def __init__(self):
171  self.state = 0
172 
173  def update(self, s=''):
174  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
175  sys.stdout.flush()
176 
177  self.state = (self.state + 1) % len(self.symbols)
178 
179 
180 class Pipe(object):
181 
182  """implements a FIFO pipe that merges lists (see self.put)"""
183 
184  def __init__(self, length=-1):
185 
186  self.length = length
187  self.pipe = []
188 
189  def put(self, x):
190  """If x is subscriptable, insert its contents at the beginning of
191  the pipe. Else insert the element itself.
192  If the pipe is full, drop the oldest element.
193  """
194 
195  try:
196  x[0]
197  self.pipe = list(x) + self.pipe
198 
199  except TypeError:
200  self.pipe.insert(0, x)
201 
202  if self.length > 0 and len(self.pipe) > self.length:
203  self.pipe = self.pipe[:-1]
204 
205  def append(self, x):
206  """x must be a list and will be appended to the end of the pipe,
207  dropping rightmost elements if necessary
208  """
209 
210  self.pipe = (list(x) + self.pipe)[:self.length]
211 
212  def get(self):
213  """returns the oldest element, without popping it out of the pipe.
214  Popping occurs in the put() method
215  """
216  return self.pipe[-1]
217 
218  def __getitem__(self, index):
219  return self.pipe.__getitem__(index)
220 
221  def __len__(self):
222  return len(self.pipe)
223 
224  def __str__(self):
225  return str(self.pipe)
226 
227  def is_full(self):
228  return len(self.pipe) == self.length
229 
230  __repr__ = __str__
231 
232 
233 def load_pdb(filename):
234 
235  import os
236 
237  from Scientific.IO.PDB import Structure
238 
239  return Structure(os.path.expanduser(filename))
240 
241 
242 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
243 
244  from glob import glob
245  from shutil import copyfile
246  import os
247 
248  if pattern is None:
249  pattern = '*'
250 
251  file_list = glob(os.path.join(src_path, pattern))
252 
253  for f in file_list:
254  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
255 
256  if verbose:
257  print(f)
258 
259 
260 def touch(filename):
261 
262  try:
263  f = open(filename, 'w')
264  f.close()
265 
266  except IOError as error:
267  import os
268  if os.path.isdir(filename):
269  pass
270  else:
271  raise IOError(error)
272 
273 # Yannick
274 
275 
276 def read_sequence_file(filename, first_residue_number=1):
277  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
278  no:3-letter code. Fails on unknown amino acids.
279  """
280 
281  filename = os.path.abspath(filename)
282  try:
283  f = open(filename)
284  except IOError:
285  raise IOError('Could not open sequence file "%s".' % filename)
286  seq = f.read().upper()
287 
288  if seq.startswith('>'):
289  print("Detected FASTA 1-letter sequence")
290  pos = seq.find('\n')
291  # get rid of first line and get sequence in one line
292  seq = ''.join(seq[pos + 1:].split())
293  names = [code[i] for i in seq]
294  numbers = list(range(first_residue_number,
295  first_residue_number + len(seq)))
296  return dict(list(zip(numbers, names)))
297  else:
298  spl = seq.split()
299  for x in spl:
300  if x not in code.values():
301  print('Warning: unknown 3-letter code: %s' % x)
302  numbers = list(range(first_residue_number,
303  first_residue_number + len(spl)))
304  return dict(list(zip(numbers, spl)))
305 
306 # Yannick
307 
308 
309 def check_residue(a, b):
310  """checks whether residue codes a and b are the same, doing necessary
311  conversions"""
312  a = a.upper()
313  b = b.upper()
314  if len(a) == 1:
315  if a not in code:
316  print('Warning: unknown 1-letter code: %s' % a)
317  return False
318  a = code[a]
319  if len(b) == 1:
320  if b not in code:
321  print('Warning: unknown 1-letter code: %s' % b)
322  return False
323  b = code[b]
324  if len(a) != 3:
325  print('Unknown residue code %s' % a)
326  return False
327  if len(b) != 3:
328  print('Unknown residue code %s' % b)
329  return False
330  if a != b:
331  print('Residues %s and %s are not the same' % (a, b))
332  return False
333  else:
334  return True
335 
336 
337 def my_glob(x, do_touch=False):
338 
339  from glob import glob
340 
341  if do_touch:
342 
343  import os
344 
345  path, name = os.path.split(x)
346 
347  # os.system('touch %s' % path) #this is very inefficient
348  touch(path) # this is better (4x to 6x faster)
349 
350  return glob(x)
351 
352 
353 def Dump(this, filename, gzip=0, mode='w', bin=1):
354  """
355  Dump(this, filename, gzip = 0)
356  Supports also '~' or '~user'.
357  """
358 
359  import os
360  try:
361  import cPickle as pickle
362  except ImportError:
363  import pickle
364 
365  filename = os.path.expanduser(filename)
366 
367  if mode not in ['w', 'a']:
368  raise ValueError("mode has to be 'w' (write) or 'a' (append)")
369 
370  if gzip:
371  import gzip
372  f = gzip.GzipFile(filename, mode)
373  else:
374  f = open(filename, mode)
375 
376  pickle.dump(this, f, bin)
377 
378  f.close()
379 
380 
381 def Load(filename, gzip=0, force=0):
382  """
383  Load(filename, gzip=0, force=0)
384 
385  force: returns all objects that could be unpickled. Useful
386  when unpickling of sequential objects fails at some point.
387  """
388  import pickle
389  import os
390 
391  filename = os.path.expanduser(filename)
392 
393  if gzip:
394  import gzip
395  try:
396  f = gzip.GzipFile(filename)
397  except IOError:
398  return
399 
400  f = open(filename, 'rb')
401 
402  objects = None
403 
404  eof = 0
405  n = 0
406 
407  while not eof:
408 
409  try:
410  object = pickle.load(f)
411 
412  if objects is None:
413  objects = object
414 
415  else:
416  objects += object
417 
418  n += 1
419 
420  except EOFError:
421  eof = 1
422 
423  except Exception:
424  print('Could not load chunk %d. Stopped.' % n)
425 
426  if force:
427  eof = 1
428  else:
429  object = pickle.load(f)
430 
431  f.close()
432 
433  return objects
434 
435 
436 def get_pdb(pdb_entry, dest='.', verbose_level=0):
437 
438  import ftplib
439  import os
440 
441  url = 'ftp.ebi.ac.uk'
442  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
443  filename_template = 'pdb%s.ent.gz'
444 
445  dest = os.path.expanduser(dest)
446 
447  ftp = ftplib.FTP(url)
448  ftp.login()
449  ftp.set_debuglevel(verbose_level)
450 
451  ftp.cwd(path)
452 
453  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
454 
455  f = open(filename, 'wb')
456 
457  try:
458  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
459  f.write)
460 
461  f.close()
462 
463  ftp.quit()
464 
465  except ftplib.error_perm:
466  raise IOError('File %s not found on server' % filename)
467 
468  os.system('gunzip -f %s' % filename)
469 
470 
471 def compile_index_list(chain, atom_names, residue_index_list=None):
472 
473  if residue_index_list is None:
474  residue_index_list = list(range(len(chain)))
475 
476  index_list = []
477 
478  names = atom_names
479 
480  index_map = {}
481 
482  i = 0
483 
484  for res_index in residue_index_list:
485 
486  if atom_names is None:
487  names = sorted(chain[res_index].keys())
488 
489  for n in names:
490 
491  if n in chain[res_index]:
492  index = chain[res_index][n].index
493  index_list.append(index)
494  index_map[index] = i
495  i += 1
496 
497  return index_list, index_map
498 
499 
500 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
501  residue_index_list=None, atom_index_list=None):
502 
503  from numpy.oldnumeric import array, take
504 
505  if indices is None:
506  indices = list(range(len(E)))
507 
508  chain = universe.get_polymer()
509 
510  if atom_index_list is None:
511  atom_index_list, index_map = compile_index_list(chain, atom_names,
512  residue_index_list)
513 
514  coord = []
515 
516  for i in indices:
517 
518  chain.set_torsions(E.torsion_angles[i], 1)
519 
520  X = array(take(universe.X, atom_index_list))
521 
522  coord.append(X)
523 
524  return array(coord)
525 
526 
527 def map_angles(angles, period=None):
528  """
529  maps angles into interval [-pi,pi]
530  """
531 
532  from numpy.oldnumeric import fmod, greater, logical_not
533 
534  if period is None:
535  from numpy.oldnumeric import pi as period
536 
537  mask = greater(angles, 0.)
538 
539  return mask * (fmod(angles + period, 2 * period) - period) + \
540  logical_not(mask) * (fmod(angles - period, 2 * period) + period)
541 
542 
543 def remove_from_dict(d, items):
544 
545  for item in items:
546  if item in d:
547  del d[item]
548 
549 
550 def myrange(a, b, n):
551 
552  from numpy.oldnumeric import arange
553 
554  step = (b - a) / (n - 1)
555 
556  x = arange(a, b + step, step)
557 
558  return x[:n]
559 
560 
561 def indent(lines, prefix):
562 
563  tag = ' ' * len(str(prefix))
564 
565  lines[0] = prefix + lines[0]
566  lines = [lines[0]] + list(map(lambda s, t=tag: t + s, lines[1:]))
567 
568  return '\n'.join(lines)
569 
570 
571 def make_block(s, length=80, tol=10):
572  blocks = s.split('\n')
573  spl = []
574  for block in blocks:
575  spl += _make_block(block, length, tol)
576 
577  return spl
578 
579 
580 def _make_block(s, length, tol):
581 
582  spl = s.split(' ')
583  spl = [(w, ' ') for w in spl]
584 
585  words = []
586  for ll in spl:
587  g = ll[0].split('/')
588  g = [w + '/' for w in g]
589  g[-1] = g[-1][:-1] + ' '
590 
591  words += g
592 
593  spl = []
594  line = ''
595 
596  for i in range(len(words)):
597  word = words[i]
598 
599  if len(line + word) <= length:
600  line += word
601 
602  else:
603  if length - len(line) > tol:
604  m = length - len(line)
605  line += word[:m]
606  word = word[m:]
607 
608  if len(line) > 1 and line[0] == ' ' and \
609  line[1] != ' ':
610  line = line[1:]
611 
612  spl.append(line)
613  line = word
614 
615  line = line[:-1]
616  if len(line) > 1 and line[0] == ' ' and \
617  line[1] != ' ':
618  line = line[1:]
619 
620  spl.append(line)
621 
622  return spl
623 
624 
625 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
626  gzip=False, bin=True):
627 
628  try:
629  Dump(x, filename, gzip=gzip, bin=bin)
630 
631  except IOError as msg:
632 
633  import time
634 
635  if err_msg is None:
636  print('IOError: %s' % str(msg))
637 
638  else:
639  if show_io_err:
640  print('%s. %s' % (str(msg), err_msg))
641  else:
642  print(err_msg)
643 
644  while True:
645 
646  # wait for 10 minutes
647 
648  time.sleep(60. * delay)
649 
650  try:
651  Dump(x, filename, gzip=gzip, bin=bin)
652  break
653 
654  except IOError:
655  continue
656 
657 
658 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
659  gzip=False, mode='w', bin=True):
660 
661  import os
662  import tempfile
663 
664  path, _filename = os.path.split(filename)
665 
666  temp_path, temp_filename = os.path.split(tempfile.mktemp())
667  temp_filename = os.path.join(path, temp_filename)
668 
669  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
670  gzip, bin)
671 
672  # if that worked, dump properly
673 
674  if mode == 'w':
675  os.rename(temp_filename, filename)
676 
677  elif mode == 'a':
678  os.unlink(temp_filename)
679  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
680 
681  else:
682  raise Exception('Mode "%s" invalid.' % mode)
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:527
def put
If x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:189
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:180
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:212
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:276
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:205
The general base class for IMP exceptions.
Definition: exception.h:48
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:309