IMP  2.1.0
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 ##
6 ## The Inferential Structure Determination (ISD) software library
7 ##
8 ## Authors: Michael Habeck and Wolfgang Rieping
9 ##
10 ## Copyright (C) Michael Habeck and Wolfgang Rieping
11 ##
12 ## All rights reserved.
13 ##
14 ## NO WARRANTY. This library is provided 'as is' without warranty of any
15 ## kind, expressed or implied, including, but not limited to the implied
16 ## warranties of merchantability and fitness for a particular purpose or
17 ## a warranty of non-infringement.
18 ##
19 ## Distribution of substantively modified versions of this module is
20 ## prohibited without the explicit permission of the copyright holders.
21 ##
22 
23 import atexit
24 import sys
25 import time
26 import os
27 import os.path
28 import socket
29 
30 
31 from Queue import Queue
32 from threading import Thread
33 
34 debug = False
35 
36 code={
37  'A':'ALA',
38  'R':'ARG',
39  'N':'ASN',
40  'D':'ASP',
41  'C':'CYS',
42  'E':'GLU',
43  'Q':'GLN',
44  'G':'GLY',
45  'H':'HIS',
46  'I':'ILE',
47  'L':'LEU',
48  'K':'LYS',
49  'M':'MET',
50  'F':'PHE',
51  'P':'PRO',
52  'S':'SER',
53  'T':'THR',
54  'W':'TRP',
55  'Y':'TYR',
56  'V':'VAL'
57  }
58 
59 
60 def average(x):
61  return sum(x)/float(len(x))
62 
63 def atexit_register(*args):
64 
65  atexit.register(*args)
66 
67 def atexit_unregister(func):
68 
69  exit_funcs= [x[0] for x in atexit._exithandlers]
70 
71  try:
72  i = exit_funcs.index(func)
73  except:
74  return
75 
76  atexit._exithandlers.pop( i )
77 
78 class WatchDog(Thread):
79 
80  def __init__(self, timeout, debug=False, logfile=None):
81 
82  """
83  timeout: in minutes.
84  """
85 
86  Thread.__init__(self)
87 
88  self.timeout = timeout*60.
89  self.debug = debug
90  self._last_ping = None
91  self._stop = False
92 
93  if logfile is not None:
94  logfile = os.path.expanduser(logfile)
95 
96  self.logfile = logfile
97 
98  self.setDaemon(True)
99 
100  def stop(self):
101  self._stop = True
102 
103  def set(self, x):
104  "set the _last_ping variable of the WatchDog instance"
105 
106  if self.debug:
107  print 'Watchdog: set(%s) called.' % str(x)
108 
109  self._last_ping = x
110 
111  def run(self):
112  """run the Watchdog thread, which sits in a loop sleeping for timeout/4. at
113  each iteration, and if abs(time() - _last_ping) > timeout, exits.
114  """
115 
116  while not self._stop:
117 
118  if self._last_ping is not None:
119  delta = abs(self._last_ping - time.time())
120  else:
121  delta = None
122 
123  if self.debug:
124 
125  if delta is None:
126  val = 'N/A s'
127  else:
128  val = '%.0f s' % delta
129 
130  print 'Watchdog: last life sign %s ago; timeout is %d min(s).' % \
131  (val, self.timeout/60.)
132 
133  if self._last_ping is not None and delta > self.timeout:
134 
135  s = 'No life sign for > %d minute(s)' % (self.timeout/60.)
136 
137  print s + ', exiting...'
138 
139  if self.logfile is not None:
140 
141  if os.path.exists(self.logfile):
142  mode = 'a'
143  else:
144  mode = 'w'
145 
146  try:
147  f = open(self.logfile, mode)
148  f.write(s+'; host %s, %s\n' % (socket.gethostname(), time.ctime()))
149  f.close()
150 
151  except IOError:
152  pass
153 
154  if not self.debug:
155  os._exit(0)
156  else:
157  print 'Watchdog: keeping Python interpreter alive.'
158  self.stop()
159 
160  time.sleep(self.timeout/4.)
161 
162 class SpinWheel:
163 
164  symbols = ('-', '/', '|', '\\')
165 
166  def __init__(self):
167  self.state = 0
168 
169  def update(self, s=''):
170 
171  import sys
172 
173  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
174  sys.stdout.flush()
175 
176  self.state = (self.state + 1) % len(self.symbols)
177 
178 class Pipe(object):
179  """implements a FIFO pipe that merges lists (see self.put)"""
180 
181  def __init__(self, length = -1):
182 
183  self.length = length
184  self.pipe = []
185 
186  def put(self, x):
187  """if x is subscriptable, insert its contents at the beginning of the pipe.
188  Else insert the element itself.
189  If the pipe is full, drop the oldest element.
190  """
191 
192  try:
193  x[0]
194  self.pipe = list(x) + self.pipe
195 
196  except:
197  self.pipe.insert(0, x)
198 
199  if self.length > 0 and len(self.pipe) > self.length:
200  self.pipe = self.pipe[:-1]
201 
202  def append(self, x):
203  """ x must be a list and will be appended to the end of the pipe, dropping
204  rightmost elements if necessary
205  """
206 
207  self.pipe = (list(x) + self.pipe)[:self.length]
208 
209  def get(self):
210  """returns the oldest element, without popping it out of the pipe.
211  Popping occurs in the put() method
212  """
213  return self.pipe[-1]
214 
215  def __getitem__(self, index):
216  return self.pipe.__getitem__(index)
217 
218  def __len__(self):
219  return len(self.pipe)
220 
221  def __str__(self):
222  return str(self.pipe)
223 
224  def is_full(self):
225  return len(self.pipe) == self.length
226 
227  __repr__ = __str__
228 
229 class SortedQueue(Queue):
230 
231  def sort(self):
232 
233  from numpy.oldnumeric import array
234  from Isd.misc.mathutils import average
235 
236  self.queue.sort(lambda a, b: cmp(average(a.time), average(b.time)))
237 
238  self.times = array([average(x.time) for x in self.queue])
239 
240  def _put(self, item):
241 
242  Queue._put(self, item)
243  self.sort()
244 
245  def _get(self):
246 
247  from numpy.oldnumeric import power
248  from Isd.misc.mathutils import draw_dirichlet, rescale_uniform
249 
250  ## compute "probabilities"
251 
252  p = 1. - rescale_uniform(self.times)
253  p = power(p, 2.)
254 
255  index = draw_dirichlet(p)
256 
257  val = self.queue[index]
258 
259  self.queue = self.queue[:index] + self.queue[index + 1:]
260 
261  if len(self.queue):
262  self.sort()
263 
264  return val
265 
266 def load_pdb(filename):
267 
268  import os
269 
270  from Scientific.IO.PDB import Structure
271 
272  return Structure(os.path.expanduser(filename))
273 
274 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
275 
276  from glob import glob
277  from shutil import copyfile
278  import os
279 
280  if pattern is None:
281  pattern = '*'
282 
283  file_list = glob(os.path.join(src_path,pattern))
284 
285  for f in file_list:
286  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
287 
288  if verbose:
289  print f
290 
291 def touch(filename):
292 
293  try:
294  f = open(filename, 'w')
295  f.close()
296 
297  except IOError, error:
298  import os
299  if os.path.isdir(filename):
300  pass
301  else:
302  raise IOError, error
303 
304 #Yannick
305 def read_sequence_file(filename, first_residue_number=1):
306  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
307  no:3-letter code. Fails on unknown amino acids.
308  """
309 
310  filename = os.path.abspath(filename)
311  try:
312  f = open(filename)
313  except IOError, msg:
314  raise IOError, 'Could not open sequence file "%s".' % filename
315  seq = f.read().upper()
316 
317  if seq.startswith('>'):
318  print "Detected FASTA 1-letter sequence"
319  pos=seq.find('\n')
320  #get rid of first line and get sequence in one line
321  seq=''.join(seq[pos+1:].split())
322  names = [code[i] for i in seq]
323  numbers = range(first_residue_number, first_residue_number+len(seq))
324  return dict(zip(numbers,names))
325  else:
326  l=seq.split()
327  for x in l:
328  if not x in code.values():
329  print 'Warning: unknown 3-letter code: %s' % x
330  numbers = range(first_residue_number, first_residue_number+len(l))
331  return dict(zip(numbers,l))
332 
333 #Yannick
334 def check_residue(a,b):
335  "checks whether residue codes a and b are the same, doing necessary conversions"
336  a=a.upper()
337  b=b.upper()
338  if len(a) == 1:
339  if a not in code:
340  print 'Warning: unknown 1-letter code: %s' % a
341  return False
342  a=code[a]
343  if len(b) == 1:
344  if b not in code:
345  print 'Warning: unknown 1-letter code: %s' % b
346  return False
347  b=code[b]
348  if len(a) != 3:
349  print 'Unknown residue code %s' % a
350  return False
351  if len(b) != 3:
352  print 'Unknown residue code %s' % b
353  return False
354  if a != b:
355  print 'Residues %s and %s are not the same' % (a,b)
356  return False
357  else:
358  return True
359 
360 
361 
362 def my_glob(x, do_touch=False):
363 
364  from glob import glob
365 
366  if do_touch:
367 
368  import os
369 
370  path, name = os.path.split(x)
371 
372  #os.system('touch %s' % path) #this is very inefficient
373  touch(path) #this is better (4x to 6x faster)
374 
375  return glob(x)
376 
377 def Dump(this, filename, gzip = 0, mode = 'w', bin=1):
378  """
379  Dump(this, filename, gzip = 0)
380  Supports also '~' or '~user'.
381  """
382 
383  import os, cPickle
384 
385  filename = os.path.expanduser(filename)
386 
387  if not mode in ['w', 'a']:
388  raise "mode has to be 'w' (write) or 'a' (append)"
389 
390  if gzip:
391  import gzip
392  f = gzip.GzipFile(filename, mode)
393  else:
394  f = open(filename, mode)
395 
396  cPickle.dump(this, f, bin)
397 
398  f.close()
399 
400 def Load(filename, gzip = 0, force=0):
401  """
402  Load(filename, gzip=0, force=0)
403 
404  force: returns all objects that could be unpickled. Useful
405  when unpickling of sequential objects fails at some point.
406  """
407  import cPickle, os
408 
409  filename = os.path.expanduser(filename)
410 
411  if gzip:
412  import gzip
413  try:
414  f = gzip.GzipFile(filename)
415  except:
416  return
417 
418  f = open(filename)
419 
420  objects = None
421 
422  eof = 0
423  n = 0
424 
425  while not eof:
426 
427  try:
428  object = cPickle.load(f)
429 
430  if objects is None:
431  objects = object
432 
433  else:
434  objects += object
435 
436  n += 1
437 
438  except EOFError:
439  eof = 1
440 
441  except Exception:
442  print 'Could not load chunk %d. Stopped.' % n
443 
444  if force:
445  eof = 1
446  else:
447  object = cPickle.load(f)
448 
449  f.close()
450 
451  return objects
452 
453 def get_pdb(pdb_entry, dest='.', verbose_level=0):
454 
455  import ftplib
456  from tempfile import mktemp
457  import os
458 
459  url = 'ftp.ebi.ac.uk'
460  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
461  filename_template = 'pdb%s.ent.gz'
462 
463  dest = os.path.expanduser(dest)
464 
465  ftp = ftplib.FTP(url)
466  ftp.login()
467  ftp.set_debuglevel(verbose_level)
468 
469  ftp.cwd(path)
470 
471  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
472 
473  f = open(filename, 'wb')
474 
475  try:
476  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
477  f.write)
478 
479  f.close()
480 
481  ftp.quit()
482 
483  except ftplib.error_perm:
484  raise IOError, 'File %s not found on server' % filename
485 
486  os.system('gunzip -f %s' % filename)
487 
488 def compile_index_list(chain, atom_names, residue_index_list=None):
489 
490  if residue_index_list is None:
491  residue_index_list = range(len(chain))
492 
493  index_list = []
494 
495  names = atom_names
496 
497  index_map = {}
498 
499  i = 0
500 
501  for res_index in residue_index_list:
502 
503  if atom_names is None:
504  names = chain[res_index].keys()
505  names.sort()
506 
507  for n in names:
508 
509  if n in chain[res_index]:
510  index = chain[res_index][n].index
511  index_list.append(index)
512  index_map[index] = i
513  i += 1
514 
515  return index_list, index_map
516 
517 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
518  residue_index_list=None, atom_index_list=None):
519 
520  from numpy.oldnumeric import array, take
521 
522  if indices is None:
523  indices = range(len(E))
524 
525  chain = universe.get_polymer()
526 
527  if atom_index_list is None:
528  atom_index_list, index_map = compile_index_list(chain, atom_names,
529  residue_index_list)
530 
531  l = []
532 
533  for i in indices:
534 
535  chain.set_torsions(E.torsion_angles[i], 1)
536 
537  X = array(take(universe.X, atom_index_list))
538 
539  l.append(X)
540 
541  return array(l)
542 
543 def map_angles(angles, period=None):
544  """
545  maps angles into interval [-pi,pi]
546  """
547 
548  from numpy.oldnumeric import fmod, greater, logical_not
549 
550  if period is None:
551  from numpy.oldnumeric import pi as period
552 
553  mask = greater(angles, 0.)
554 
555  return mask * (fmod(angles+period, 2*period)-period) + \
556  logical_not(mask) * (fmod(angles-period, 2*period)+period)
557 
558 def remove_from_dict(d, items):
559 
560  for item in items:
561  if item in d:
562  del d[item]
563 
564 def myrange(a, b, n):
565 
566  from numpy.oldnumeric import arange
567 
568  step = (b - a) / (n - 1)
569 
570  x = arange(a, b + step, step)
571 
572  return x[:n]
573 
574 def indent(lines, prefix):
575 
576  tag = ' ' * len(str(prefix))
577 
578  lines[0] = prefix + lines[0]
579  lines = [lines[0]] + map(lambda s, t = tag: t + s, lines[1:])
580 
581  return '\n'.join(lines)
582 
583 def make_block(s, length = 80, tol = 10):
584  blocks = s.split('\n')
585  l = []
586  for block in blocks:
587  l += _make_block(block, length, tol)
588 
589  return l
590 
591 def _make_block(s, length, tol):
592 
593  l = s.split(' ')
594  l = [(w,' ') for w in l]
595 
596  words = []
597  for ll in l:
598  g = ll[0].split('/')
599  g = [w+'/' for w in g]
600  g[-1] = g[-1][:-1] + ' '
601 
602  words += g
603 
604  l = []
605  line = ''
606 
607  for i in range(len(words)):
608  word = words[i]
609 
610  if len(line + word) <= length:
611  line += word
612 
613  else:
614  if length - len(line) > tol:
615  m = length - len(line)
616  line += word[:m]
617  word = word[m:]
618 
619  if len(line) > 1 and line[0] == ' ' and \
620  line[1] <> ' ':
621  line = line[1:]
622 
623  l.append(line)
624  line = word
625 
626  line = line[:-1]
627  if len(line) > 1 and line[0] == ' ' and \
628  line[1] <> ' ':
629  line = line[1:]
630 
631  l.append(line)
632 
633  return l
634 
635 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
636  gzip=False, bin=True):
637 
638  try:
639  Dump(x, filename, gzip=gzip, bin=bin)
640 
641  except IOError, msg:
642 
643  import time
644 
645  if err_msg is None:
646  print 'IOError: %s' % str(msg)
647 
648  else:
649  if show_io_err:
650  print '%s. %s' % (str(msg), err_msg)
651  else:
652  print err_msg
653 
654  while 1:
655 
656  ## wait for 10 minutes
657 
658  time.sleep(60. * delay)
659 
660  try:
661  Dump(x, filename, gzip=gzip, bin=bin)
662  break
663 
664  except IOError:
665  continue
666 
667 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
668  gzip=False, mode='w', bin=True):
669 
670  import os, tempfile
671 
672  path, _filename = os.path.split(filename)
673 
674  temp_path, temp_filename = os.path.split(tempfile.mktemp())
675  temp_filename = os.path.join(path, temp_filename)
676 
677  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
678  gzip, bin)
679 
680  ## if that worked, dump properly
681 
682  if mode == 'w':
683  os.rename(temp_filename, filename)
684 
685  elif mode == 'a':
686  os.unlink(temp_filename)
687  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
688 
689  else:
690  raise StandardError, 'Mode "%s" invalid.' % mode
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:543
def put
if x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:186
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:178
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:209
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:305
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:202
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:334