IMP  2.2.0
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 #
6 # The Inferential Structure Determination (ISD) software library
7 #
8 # Authors: Michael Habeck and Wolfgang Rieping
9 #
10 # Copyright (C) Michael Habeck and Wolfgang Rieping
11 #
12 # All rights reserved.
13 #
14 # NO WARRANTY. This library is provided 'as is' without warranty of any
15 # kind, expressed or implied, including, but not limited to the implied
16 # warranties of merchantability and fitness for a particular purpose or
17 # a warranty of non-infringement.
18 #
19 # Distribution of substantively modified versions of this module is
20 # prohibited without the explicit permission of the copyright holders.
21 #
22 
23 import atexit
24 import sys
25 import time
26 import os
27 import os.path
28 import socket
29 
30 
31 from Queue import Queue
32 from threading import Thread
33 
34 debug = False
35 
36 code = {
37  'A': 'ALA',
38  'R': 'ARG',
39  'N': 'ASN',
40  'D': 'ASP',
41  'C': 'CYS',
42  'E': 'GLU',
43  'Q': 'GLN',
44  'G': 'GLY',
45  'H': 'HIS',
46  'I': 'ILE',
47  'L': 'LEU',
48  'K': 'LYS',
49  'M': 'MET',
50  'F': 'PHE',
51  'P': 'PRO',
52  'S': 'SER',
53  'T': 'THR',
54  'W': 'TRP',
55  'Y': 'TYR',
56  'V': 'VAL'
57 }
58 
59 
60 def average(x):
61  return sum(x) / float(len(x))
62 
63 
64 def atexit_register(*args):
65 
66  atexit.register(*args)
67 
68 
69 def atexit_unregister(func):
70 
71  exit_funcs = [x[0] for x in atexit._exithandlers]
72 
73  try:
74  i = exit_funcs.index(func)
75  except:
76  return
77 
78  atexit._exithandlers.pop(i)
79 
80 
81 class WatchDog(Thread):
82 
83  def __init__(self, timeout, debug=False, logfile=None):
84  """
85  timeout: in minutes.
86  """
87 
88  Thread.__init__(self)
89 
90  self.timeout = timeout * 60.
91  self.debug = debug
92  self._last_ping = None
93  self._stop = False
94 
95  if logfile is not None:
96  logfile = os.path.expanduser(logfile)
97 
98  self.logfile = logfile
99 
100  self.setDaemon(True)
101 
102  def stop(self):
103  self._stop = True
104 
105  def set(self, x):
106  "set the _last_ping variable of the WatchDog instance"
107 
108  if self.debug:
109  print 'Watchdog: set(%s) called.' % str(x)
110 
111  self._last_ping = x
112 
113  def run(self):
114  """run the Watchdog thread, which sits in a loop sleeping for timeout/4. at
115  each iteration, and if abs(time() - _last_ping) > timeout, exits.
116  """
117 
118  while not self._stop:
119 
120  if self._last_ping is not None:
121  delta = abs(self._last_ping - time.time())
122  else:
123  delta = None
124 
125  if self.debug:
126 
127  if delta is None:
128  val = 'N/A s'
129  else:
130  val = '%.0f s' % delta
131 
132  print 'Watchdog: last life sign %s ago; timeout is %d min(s).' % \
133  (val, self.timeout / 60.)
134 
135  if self._last_ping is not None and delta > self.timeout:
136 
137  s = 'No life sign for > %d minute(s)' % (self.timeout / 60.)
138 
139  print s + ', exiting...'
140 
141  if self.logfile is not None:
142 
143  if os.path.exists(self.logfile):
144  mode = 'a'
145  else:
146  mode = 'w'
147 
148  try:
149  f = open(self.logfile, mode)
150  f.write(
151  s + '; host %s, %s\n' %
152  (socket.gethostname(), time.ctime()))
153  f.close()
154 
155  except IOError:
156  pass
157 
158  if not self.debug:
159  os._exit(0)
160  else:
161  print 'Watchdog: keeping Python interpreter alive.'
162  self.stop()
163 
164  time.sleep(self.timeout / 4.)
165 
166 
167 class SpinWheel:
168 
169  symbols = ('-', '/', '|', '\\')
170 
171  def __init__(self):
172  self.state = 0
173 
174  def update(self, s=''):
175 
176  import sys
177 
178  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
179  sys.stdout.flush()
180 
181  self.state = (self.state + 1) % len(self.symbols)
182 
183 
184 class Pipe(object):
185 
186  """implements a FIFO pipe that merges lists (see self.put)"""
187 
188  def __init__(self, length=-1):
189 
190  self.length = length
191  self.pipe = []
192 
193  def put(self, x):
194  """if x is subscriptable, insert its contents at the beginning of the pipe.
195  Else insert the element itself.
196  If the pipe is full, drop the oldest element.
197  """
198 
199  try:
200  x[0]
201  self.pipe = list(x) + self.pipe
202 
203  except:
204  self.pipe.insert(0, x)
205 
206  if self.length > 0 and len(self.pipe) > self.length:
207  self.pipe = self.pipe[:-1]
208 
209  def append(self, x):
210  """ x must be a list and will be appended to the end of the pipe, dropping
211  rightmost elements if necessary
212  """
213 
214  self.pipe = (list(x) + self.pipe)[:self.length]
215 
216  def get(self):
217  """returns the oldest element, without popping it out of the pipe.
218  Popping occurs in the put() method
219  """
220  return self.pipe[-1]
221 
222  def __getitem__(self, index):
223  return self.pipe.__getitem__(index)
224 
225  def __len__(self):
226  return len(self.pipe)
227 
228  def __str__(self):
229  return str(self.pipe)
230 
231  def is_full(self):
232  return len(self.pipe) == self.length
233 
234  __repr__ = __str__
235 
236 
237 class SortedQueue(Queue):
238 
239  def sort(self):
240 
241  from numpy.oldnumeric import array
242  from Isd.misc.mathutils import average
243 
244  self.queue.sort(lambda a, b: cmp(average(a.time), average(b.time)))
245 
246  self.times = array([average(x.time) for x in self.queue])
247 
248  def _put(self, item):
249 
250  Queue._put(self, item)
251  self.sort()
252 
253  def _get(self):
254 
255  from numpy.oldnumeric import power
256  from Isd.misc.mathutils import draw_dirichlet, rescale_uniform
257 
258  # compute "probabilities"
259 
260  p = 1. - rescale_uniform(self.times)
261  p = power(p, 2.)
262 
263  index = draw_dirichlet(p)
264 
265  val = self.queue[index]
266 
267  self.queue = self.queue[:index] + self.queue[index + 1:]
268 
269  if len(self.queue):
270  self.sort()
271 
272  return val
273 
274 
275 def load_pdb(filename):
276 
277  import os
278 
279  from Scientific.IO.PDB import Structure
280 
281  return Structure(os.path.expanduser(filename))
282 
283 
284 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
285 
286  from glob import glob
287  from shutil import copyfile
288  import os
289 
290  if pattern is None:
291  pattern = '*'
292 
293  file_list = glob(os.path.join(src_path, pattern))
294 
295  for f in file_list:
296  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
297 
298  if verbose:
299  print f
300 
301 
302 def touch(filename):
303 
304  try:
305  f = open(filename, 'w')
306  f.close()
307 
308  except IOError as error:
309  import os
310  if os.path.isdir(filename):
311  pass
312  else:
313  raise IOError(error)
314 
315 # Yannick
316 
317 
318 def read_sequence_file(filename, first_residue_number=1):
319  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
320  no:3-letter code. Fails on unknown amino acids.
321  """
322 
323  filename = os.path.abspath(filename)
324  try:
325  f = open(filename)
326  except IOError as msg:
327  raise IOError('Could not open sequence file "%s".' % filename)
328  seq = f.read().upper()
329 
330  if seq.startswith('>'):
331  print "Detected FASTA 1-letter sequence"
332  pos = seq.find('\n')
333  # get rid of first line and get sequence in one line
334  seq = ''.join(seq[pos + 1:].split())
335  names = [code[i] for i in seq]
336  numbers = range(first_residue_number, first_residue_number + len(seq))
337  return dict(zip(numbers, names))
338  else:
339  l = seq.split()
340  for x in l:
341  if not x in code.values():
342  print 'Warning: unknown 3-letter code: %s' % x
343  numbers = range(first_residue_number, first_residue_number + len(l))
344  return dict(zip(numbers, l))
345 
346 # Yannick
347 
348 
349 def check_residue(a, b):
350  "checks whether residue codes a and b are the same, doing necessary conversions"
351  a = a.upper()
352  b = b.upper()
353  if len(a) == 1:
354  if a not in code:
355  print 'Warning: unknown 1-letter code: %s' % a
356  return False
357  a = code[a]
358  if len(b) == 1:
359  if b not in code:
360  print 'Warning: unknown 1-letter code: %s' % b
361  return False
362  b = code[b]
363  if len(a) != 3:
364  print 'Unknown residue code %s' % a
365  return False
366  if len(b) != 3:
367  print 'Unknown residue code %s' % b
368  return False
369  if a != b:
370  print 'Residues %s and %s are not the same' % (a, b)
371  return False
372  else:
373  return True
374 
375 
376 def my_glob(x, do_touch=False):
377 
378  from glob import glob
379 
380  if do_touch:
381 
382  import os
383 
384  path, name = os.path.split(x)
385 
386  # os.system('touch %s' % path) #this is very inefficient
387  touch(path) # this is better (4x to 6x faster)
388 
389  return glob(x)
390 
391 
392 def Dump(this, filename, gzip=0, mode='w', bin=1):
393  """
394  Dump(this, filename, gzip = 0)
395  Supports also '~' or '~user'.
396  """
397 
398  import os
399  import cPickle
400 
401  filename = os.path.expanduser(filename)
402 
403  if not mode in ['w', 'a']:
404  raise "mode has to be 'w' (write) or 'a' (append)"
405 
406  if gzip:
407  import gzip
408  f = gzip.GzipFile(filename, mode)
409  else:
410  f = open(filename, mode)
411 
412  cPickle.dump(this, f, bin)
413 
414  f.close()
415 
416 
417 def Load(filename, gzip=0, force=0):
418  """
419  Load(filename, gzip=0, force=0)
420 
421  force: returns all objects that could be unpickled. Useful
422  when unpickling of sequential objects fails at some point.
423  """
424  import cPickle
425  import os
426 
427  filename = os.path.expanduser(filename)
428 
429  if gzip:
430  import gzip
431  try:
432  f = gzip.GzipFile(filename)
433  except:
434  return
435 
436  f = open(filename)
437 
438  objects = None
439 
440  eof = 0
441  n = 0
442 
443  while not eof:
444 
445  try:
446  object = cPickle.load(f)
447 
448  if objects is None:
449  objects = object
450 
451  else:
452  objects += object
453 
454  n += 1
455 
456  except EOFError:
457  eof = 1
458 
459  except Exception:
460  print 'Could not load chunk %d. Stopped.' % n
461 
462  if force:
463  eof = 1
464  else:
465  object = cPickle.load(f)
466 
467  f.close()
468 
469  return objects
470 
471 
472 def get_pdb(pdb_entry, dest='.', verbose_level=0):
473 
474  import ftplib
475  from tempfile import mktemp
476  import os
477 
478  url = 'ftp.ebi.ac.uk'
479  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
480  filename_template = 'pdb%s.ent.gz'
481 
482  dest = os.path.expanduser(dest)
483 
484  ftp = ftplib.FTP(url)
485  ftp.login()
486  ftp.set_debuglevel(verbose_level)
487 
488  ftp.cwd(path)
489 
490  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
491 
492  f = open(filename, 'wb')
493 
494  try:
495  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
496  f.write)
497 
498  f.close()
499 
500  ftp.quit()
501 
502  except ftplib.error_perm:
503  raise IOError('File %s not found on server' % filename)
504 
505  os.system('gunzip -f %s' % filename)
506 
507 
508 def compile_index_list(chain, atom_names, residue_index_list=None):
509 
510  if residue_index_list is None:
511  residue_index_list = range(len(chain))
512 
513  index_list = []
514 
515  names = atom_names
516 
517  index_map = {}
518 
519  i = 0
520 
521  for res_index in residue_index_list:
522 
523  if atom_names is None:
524  names = sorted(chain[res_index].keys())
525 
526  for n in names:
527 
528  if n in chain[res_index]:
529  index = chain[res_index][n].index
530  index_list.append(index)
531  index_map[index] = i
532  i += 1
533 
534  return index_list, index_map
535 
536 
537 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
538  residue_index_list=None, atom_index_list=None):
539 
540  from numpy.oldnumeric import array, take
541 
542  if indices is None:
543  indices = range(len(E))
544 
545  chain = universe.get_polymer()
546 
547  if atom_index_list is None:
548  atom_index_list, index_map = compile_index_list(chain, atom_names,
549  residue_index_list)
550 
551  l = []
552 
553  for i in indices:
554 
555  chain.set_torsions(E.torsion_angles[i], 1)
556 
557  X = array(take(universe.X, atom_index_list))
558 
559  l.append(X)
560 
561  return array(l)
562 
563 
564 def map_angles(angles, period=None):
565  """
566  maps angles into interval [-pi,pi]
567  """
568 
569  from numpy.oldnumeric import fmod, greater, logical_not
570 
571  if period is None:
572  from numpy.oldnumeric import pi as period
573 
574  mask = greater(angles, 0.)
575 
576  return mask * (fmod(angles + period, 2 * period) - period) + \
577  logical_not(mask) * (fmod(angles - period, 2 * period) + period)
578 
579 
580 def remove_from_dict(d, items):
581 
582  for item in items:
583  if item in d:
584  del d[item]
585 
586 
587 def myrange(a, b, n):
588 
589  from numpy.oldnumeric import arange
590 
591  step = (b - a) / (n - 1)
592 
593  x = arange(a, b + step, step)
594 
595  return x[:n]
596 
597 
598 def indent(lines, prefix):
599 
600  tag = ' ' * len(str(prefix))
601 
602  lines[0] = prefix + lines[0]
603  lines = [lines[0]] + map(lambda s, t=tag: t + s, lines[1:])
604 
605  return '\n'.join(lines)
606 
607 
608 def make_block(s, length=80, tol=10):
609  blocks = s.split('\n')
610  l = []
611  for block in blocks:
612  l += _make_block(block, length, tol)
613 
614  return l
615 
616 
617 def _make_block(s, length, tol):
618 
619  l = s.split(' ')
620  l = [(w, ' ') for w in l]
621 
622  words = []
623  for ll in l:
624  g = ll[0].split('/')
625  g = [w + '/' for w in g]
626  g[-1] = g[-1][:-1] + ' '
627 
628  words += g
629 
630  l = []
631  line = ''
632 
633  for i in range(len(words)):
634  word = words[i]
635 
636  if len(line + word) <= length:
637  line += word
638 
639  else:
640  if length - len(line) > tol:
641  m = length - len(line)
642  line += word[:m]
643  word = word[m:]
644 
645  if len(line) > 1 and line[0] == ' ' and \
646  line[1] != ' ':
647  line = line[1:]
648 
649  l.append(line)
650  line = word
651 
652  line = line[:-1]
653  if len(line) > 1 and line[0] == ' ' and \
654  line[1] != ' ':
655  line = line[1:]
656 
657  l.append(line)
658 
659  return l
660 
661 
662 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
663  gzip=False, bin=True):
664 
665  try:
666  Dump(x, filename, gzip=gzip, bin=bin)
667 
668  except IOError as msg:
669 
670  import time
671 
672  if err_msg is None:
673  print 'IOError: %s' % str(msg)
674 
675  else:
676  if show_io_err:
677  print '%s. %s' % (str(msg), err_msg)
678  else:
679  print err_msg
680 
681  while True:
682 
683  # wait for 10 minutes
684 
685  time.sleep(60. * delay)
686 
687  try:
688  Dump(x, filename, gzip=gzip, bin=bin)
689  break
690 
691  except IOError:
692  continue
693 
694 
695 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
696  gzip=False, mode='w', bin=True):
697 
698  import os
699  import tempfile
700 
701  path, _filename = os.path.split(filename)
702 
703  temp_path, temp_filename = os.path.split(tempfile.mktemp())
704  temp_filename = os.path.join(path, temp_filename)
705 
706  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
707  gzip, bin)
708 
709  # if that worked, dump properly
710 
711  if mode == 'w':
712  os.rename(temp_filename, filename)
713 
714  elif mode == 'a':
715  os.unlink(temp_filename)
716  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
717 
718  else:
719  raise Exception('Mode "%s" invalid.' % mode)
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:564
def put
if x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:193
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:184
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:216
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:318
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:209
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:349