IMP  2.0.0
The Integrative Modeling Platform
utils.py
1 ##
2 ## The Inferential Structure Determination (ISD) software library
3 ##
4 ## Authors: Michael Habeck and Wolfgang Rieping
5 ##
6 ## Copyright (C) Michael Habeck and Wolfgang Rieping
7 ##
8 ## All rights reserved.
9 ##
10 ## NO WARRANTY. This library is provided 'as is' without warranty of any
11 ## kind, expressed or implied, including, but not limited to the implied
12 ## warranties of merchantability and fitness for a particular purpose or
13 ## a warranty of non-infringement.
14 ##
15 ## Distribution of substantively modified versions of this module is
16 ## prohibited without the explicit permission of the copyright holders.
17 ##
18 
19 import atexit
20 import sys
21 import time
22 import os
23 import os.path
24 import socket
25 
26 
27 from Queue import Queue
28 from threading import Thread
29 
30 debug = False
31 
32 code={
33  'A':'ALA',
34  'R':'ARG',
35  'N':'ASN',
36  'D':'ASP',
37  'C':'CYS',
38  'E':'GLU',
39  'Q':'GLN',
40  'G':'GLY',
41  'H':'HIS',
42  'I':'ILE',
43  'L':'LEU',
44  'K':'LYS',
45  'M':'MET',
46  'F':'PHE',
47  'P':'PRO',
48  'S':'SER',
49  'T':'THR',
50  'W':'TRP',
51  'Y':'TYR',
52  'V':'VAL'
53  }
54 
55 
56 def average(x):
57  return sum(x)/float(len(x))
58 
59 def atexit_register(*args):
60 
61  atexit.register(*args)
62 
63 def atexit_unregister(func):
64 
65  exit_funcs= [x[0] for x in atexit._exithandlers]
66 
67  try:
68  i = exit_funcs.index(func)
69  except:
70  return
71 
72  atexit._exithandlers.pop( i )
73 
74 class WatchDog(Thread):
75 
76  def __init__(self, timeout, debug=False, logfile=None):
77 
78  """
79  timeout: in minutes.
80  """
81 
82  Thread.__init__(self)
83 
84  self.timeout = timeout*60.
85  self.debug = debug
86  self._last_ping = None
87  self._stop = False
88 
89  if logfile is not None:
90  logfile = os.path.expanduser(logfile)
91 
92  self.logfile = logfile
93 
94  self.setDaemon(True)
95 
96  def stop(self):
97  self._stop = True
98 
99  def set(self, x):
100  "set the _last_ping variable of the WatchDog instance"
101 
102  if self.debug:
103  print 'Watchdog: set(%s) called.' % str(x)
104 
105  self._last_ping = x
106 
107  def run(self):
108  """run the Watchdog thread, which sits in a loop sleeping for timeout/4. at
109  each iteration, and if abs(time() - _last_ping) > timeout, exits.
110  """
111 
112  while not self._stop:
113 
114  if self._last_ping is not None:
115  delta = abs(self._last_ping - time.time())
116  else:
117  delta = None
118 
119  if self.debug:
120 
121  if delta is None:
122  val = 'N/A s'
123  else:
124  val = '%.0f s' % delta
125 
126  print 'Watchdog: last life sign %s ago; timeout is %d min(s).' % \
127  (val, self.timeout/60.)
128 
129  if self._last_ping is not None and delta > self.timeout:
130 
131  s = 'No life sign for > %d minute(s)' % (self.timeout/60.)
132 
133  print s + ', exiting...'
134 
135  if self.logfile is not None:
136 
137  if os.path.exists(self.logfile):
138  mode = 'a'
139  else:
140  mode = 'w'
141 
142  try:
143  f = open(self.logfile, mode)
144  f.write(s+'; host %s, %s\n' % (socket.gethostname(), time.ctime()))
145  f.close()
146 
147  except IOError:
148  pass
149 
150  if not self.debug:
151  os._exit(0)
152  else:
153  print 'Watchdog: keeping Python interpreter alive.'
154  self.stop()
155 
156  time.sleep(self.timeout/4.)
157 
158 class SpinWheel:
159 
160  symbols = ('-', '/', '|', '\\')
161 
162  def __init__(self):
163  self.state = 0
164 
165  def update(self, s=''):
166 
167  import sys
168 
169  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
170  sys.stdout.flush()
171 
172  self.state = (self.state + 1) % len(self.symbols)
173 
174 class Pipe(object):
175  """implements a FIFO pipe that merges lists (see self.put)"""
176 
177  def __init__(self, length = -1):
178 
179  self.length = length
180  self.pipe = []
181 
182  def put(self, x):
183  """if x is subscriptable, insert its contents at the beginning of the pipe.
184  Else insert the element itself.
185  If the pipe is full, drop the oldest element.
186  """
187 
188  try:
189  x[0]
190  self.pipe = list(x) + self.pipe
191 
192  except:
193  self.pipe.insert(0, x)
194 
195  if self.length > 0 and len(self.pipe) > self.length:
196  self.pipe = self.pipe[:-1]
197 
198  def append(self, x):
199  """ x must be a list and will be appended to the end of the pipe, dropping
200  rightmost elements if necessary
201  """
202 
203  self.pipe = (list(x) + self.pipe)[:self.length]
204 
205  def get(self):
206  """returns the oldest element, without popping it out of the pipe.
207  Popping occurs in the put() method
208  """
209  return self.pipe[-1]
210 
211  def __getitem__(self, index):
212  return self.pipe.__getitem__(index)
213 
214  def __len__(self):
215  return len(self.pipe)
216 
217  def __str__(self):
218  return str(self.pipe)
219 
220  def is_full(self):
221  return len(self.pipe) == self.length
222 
223  __repr__ = __str__
224 
225 class SortedQueue(Queue):
226 
227  def sort(self):
228 
229  from numpy.oldnumeric import array
230  from Isd.misc.mathutils import average
231 
232  self.queue.sort(lambda a, b: cmp(average(a.time), average(b.time)))
233 
234  self.times = array([average(x.time) for x in self.queue])
235 
236  def _put(self, item):
237 
238  Queue._put(self, item)
239  self.sort()
240 
241  def _get(self):
242 
243  from numpy.oldnumeric import power
244  from Isd.misc.mathutils import draw_dirichlet, rescale_uniform
245 
246  ## compute "probabilities"
247 
248  p = 1. - rescale_uniform(self.times)
249  p = power(p, 2.)
250 
251  index = draw_dirichlet(p)
252 
253  val = self.queue[index]
254 
255  self.queue = self.queue[:index] + self.queue[index + 1:]
256 
257  if len(self.queue):
258  self.sort()
259 
260  return val
261 
262 def load_pdb(filename):
263 
264  import os
265 
266  from Scientific.IO.PDB import Structure
267 
268  return Structure(os.path.expanduser(filename))
269 
270 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
271 
272  from glob import glob
273  from shutil import copyfile
274  import os
275 
276  if pattern is None:
277  pattern = '*'
278 
279  file_list = glob(os.path.join(src_path,pattern))
280 
281  for f in file_list:
282  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
283 
284  if verbose:
285  print f
286 
287 def touch(filename):
288 
289  try:
290  f = open(filename, 'w')
291  f.close()
292 
293  except IOError, error:
294  import os
295  if os.path.isdir(filename):
296  pass
297  else:
298  raise IOError, error
299 
300 #Yannick
301 def read_sequence_file(filename, first_residue_number=1):
302  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
303  no:3-letter code. Fails on unknown amino acids.
304  """
305 
306  filename = os.path.abspath(filename)
307  try:
308  f = open(filename)
309  except IOError, msg:
310  raise IOError, 'Could not open sequence file "%s".' % filename
311  seq = f.read().upper()
312 
313  if seq.startswith('>'):
314  print "Detected FASTA 1-letter sequence"
315  pos=seq.find('\n')
316  #get rid of first line and get sequence in one line
317  seq=''.join(seq[pos+1:].split())
318  names = [code[i] for i in seq]
319  numbers = range(first_residue_number, first_residue_number+len(seq))
320  return dict(zip(numbers,names))
321  else:
322  l=seq.split()
323  for x in l:
324  if not x in code.values():
325  print 'Warning: unknown 3-letter code: %s' % x
326  numbers = range(first_residue_number, first_residue_number+len(l))
327  return dict(zip(numbers,l))
328 
329 #Yannick
330 def check_residue(a,b):
331  "checks whether residue codes a and b are the same, doing necessary conversions"
332  a=a.upper()
333  b=b.upper()
334  if len(a) == 1:
335  if a not in code:
336  print 'Warning: unknown 1-letter code: %s' % a
337  return False
338  a=code[a]
339  if len(b) == 1:
340  if b not in code:
341  print 'Warning: unknown 1-letter code: %s' % b
342  return False
343  b=code[b]
344  if len(a) != 3:
345  print 'Unknown residue code %s' % a
346  return False
347  if len(b) != 3:
348  print 'Unknown residue code %s' % b
349  return False
350  if a != b:
351  print 'Residues %s and %s are not the same' % (a,b)
352  return False
353  else:
354  return True
355 
356 
357 
358 def my_glob(x, do_touch=False):
359 
360  from glob import glob
361 
362  if do_touch:
363 
364  import os
365 
366  path, name = os.path.split(x)
367 
368  #os.system('touch %s' % path) #this is very inefficient
369  touch(path) #this is better (4x to 6x faster)
370 
371  return glob(x)
372 
373 def Dump(this, filename, gzip = 0, mode = 'w', bin=1):
374  """
375  Dump(this, filename, gzip = 0)
376  Supports also '~' or '~user'.
377  """
378 
379  import os, cPickle
380 
381  filename = os.path.expanduser(filename)
382 
383  if not mode in ['w', 'a']:
384  raise "mode has to be 'w' (write) or 'a' (append)"
385 
386  if gzip:
387  import gzip
388  f = gzip.GzipFile(filename, mode)
389  else:
390  f = open(filename, mode)
391 
392  cPickle.dump(this, f, bin)
393 
394  f.close()
395 
396 def Load(filename, gzip = 0, force=0):
397  """
398  Load(filename, gzip=0, force=0)
399 
400  force: returns all objects that could be unpickled. Useful
401  when unpickling of sequential objects fails at some point.
402  """
403  import cPickle, os
404 
405  filename = os.path.expanduser(filename)
406 
407  if gzip:
408  import gzip
409  try:
410  f = gzip.GzipFile(filename)
411  except:
412  return
413 
414  f = open(filename)
415 
416  objects = None
417 
418  eof = 0
419  n = 0
420 
421  while not eof:
422 
423  try:
424  object = cPickle.load(f)
425 
426  if objects is None:
427  objects = object
428 
429  else:
430  objects += object
431 
432  n += 1
433 
434  except EOFError:
435  eof = 1
436 
437  except Exception:
438  print 'Could not load chunk %d. Stopped.' % n
439 
440  if force:
441  eof = 1
442  else:
443  object = cPickle.load(f)
444 
445  f.close()
446 
447  return objects
448 
449 def get_pdb(pdb_entry, dest='.', verbose_level=0):
450 
451  import ftplib
452  from tempfile import mktemp
453  import os
454 
455  url = 'ftp.ebi.ac.uk'
456  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
457  filename_template = 'pdb%s.ent.gz'
458 
459  dest = os.path.expanduser(dest)
460 
461  ftp = ftplib.FTP(url)
462  ftp.login()
463  ftp.set_debuglevel(verbose_level)
464 
465  ftp.cwd(path)
466 
467  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
468 
469  f = open(filename, 'wb')
470 
471  try:
472  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
473  f.write)
474 
475  f.close()
476 
477  ftp.quit()
478 
479  except ftplib.error_perm:
480  raise IOError, 'File %s not found on server' % filename
481 
482  os.system('gunzip -f %s' % filename)
483 
484 def compile_index_list(chain, atom_names, residue_index_list=None):
485 
486  if residue_index_list is None:
487  residue_index_list = range(len(chain))
488 
489  index_list = []
490 
491  names = atom_names
492 
493  index_map = {}
494 
495  i = 0
496 
497  for res_index in residue_index_list:
498 
499  if atom_names is None:
500  names = chain[res_index].keys()
501  names.sort()
502 
503  for n in names:
504 
505  if n in chain[res_index]:
506  index = chain[res_index][n].index
507  index_list.append(index)
508  index_map[index] = i
509  i += 1
510 
511  return index_list, index_map
512 
513 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
514  residue_index_list=None, atom_index_list=None):
515 
516  from numpy.oldnumeric import array, take
517 
518  if indices is None:
519  indices = range(len(E))
520 
521  chain = universe.get_polymer()
522 
523  if atom_index_list is None:
524  atom_index_list, index_map = compile_index_list(chain, atom_names,
525  residue_index_list)
526 
527  l = []
528 
529  for i in indices:
530 
531  chain.set_torsions(E.torsion_angles[i], 1)
532 
533  X = array(take(universe.X, atom_index_list))
534 
535  l.append(X)
536 
537  return array(l)
538 
539 def map_angles(angles, period=None):
540  """
541  maps angles into interval [-pi,pi]
542  """
543 
544  from numpy.oldnumeric import fmod, greater, logical_not
545 
546  if period is None:
547  from numpy.oldnumeric import pi as period
548 
549  mask = greater(angles, 0.)
550 
551  return mask * (fmod(angles+period, 2*period)-period) + \
552  logical_not(mask) * (fmod(angles-period, 2*period)+period)
553 
554 def remove_from_dict(d, items):
555 
556  for item in items:
557  if item in d:
558  del d[item]
559 
560 def myrange(a, b, n):
561 
562  from numpy.oldnumeric import arange
563 
564  step = (b - a) / (n - 1)
565 
566  x = arange(a, b + step, step)
567 
568  return x[:n]
569 
570 def indent(lines, prefix):
571 
572  tag = ' ' * len(str(prefix))
573 
574  lines[0] = prefix + lines[0]
575  lines = [lines[0]] + map(lambda s, t = tag: t + s, lines[1:])
576 
577  return '\n'.join(lines)
578 
579 def make_block(s, length = 80, tol = 10):
580  blocks = s.split('\n')
581  l = []
582  for block in blocks:
583  l += _make_block(block, length, tol)
584 
585  return l
586 
587 def _make_block(s, length, tol):
588 
589  l = s.split(' ')
590  l = [(w,' ') for w in l]
591 
592  words = []
593  for ll in l:
594  g = ll[0].split('/')
595  g = [w+'/' for w in g]
596  g[-1] = g[-1][:-1] + ' '
597 
598  words += g
599 
600  l = []
601  line = ''
602 
603  for i in range(len(words)):
604  word = words[i]
605 
606  if len(line + word) <= length:
607  line += word
608 
609  else:
610  if length - len(line) > tol:
611  m = length - len(line)
612  line += word[:m]
613  word = word[m:]
614 
615  if len(line) > 1 and line[0] == ' ' and \
616  line[1] <> ' ':
617  line = line[1:]
618 
619  l.append(line)
620  line = word
621 
622  line = line[:-1]
623  if len(line) > 1 and line[0] == ' ' and \
624  line[1] <> ' ':
625  line = line[1:]
626 
627  l.append(line)
628 
629  return l
630 
631 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
632  gzip=False, bin=True):
633 
634  try:
635  Dump(x, filename, gzip=gzip, bin=bin)
636 
637  except IOError, msg:
638 
639  import time
640 
641  if err_msg is None:
642  print 'IOError: %s' % str(msg)
643 
644  else:
645  if show_io_err:
646  print '%s. %s' % (str(msg), err_msg)
647  else:
648  print err_msg
649 
650  while 1:
651 
652  ## wait for 10 minutes
653 
654  time.sleep(60. * delay)
655 
656  try:
657  Dump(x, filename, gzip=gzip, bin=bin)
658  break
659 
660  except IOError:
661  continue
662 
663 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
664  gzip=False, mode='w', bin=True):
665 
666  import os, tempfile
667 
668  path, _filename = os.path.split(filename)
669 
670  temp_path, temp_filename = os.path.split(tempfile.mktemp())
671  temp_filename = os.path.join(path, temp_filename)
672 
673  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
674  gzip, bin)
675 
676  ## if that worked, dump properly
677 
678  if mode == 'w':
679  os.rename(temp_filename, filename)
680 
681  elif mode == 'a':
682  os.unlink(temp_filename)
683  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
684 
685  else:
686  raise StandardError, 'Mode "%s" invalid.' % mode