IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
utils.py
1 """@namespace IMP.isd.utils
2  Miscellaneous utilities.
3 """
4 
5 #
6 # The Inferential Structure Determination (ISD) software library
7 #
8 # Authors: Michael Habeck and Wolfgang Rieping
9 #
10 # Copyright (C) Michael Habeck and Wolfgang Rieping
11 #
12 # All rights reserved.
13 #
14 # NO WARRANTY. This library is provided 'as is' without warranty of any
15 # kind, expressed or implied, including, but not limited to the implied
16 # warranties of merchantability and fitness for a particular purpose or
17 # a warranty of non-infringement.
18 #
19 # Distribution of substantively modified versions of this module is
20 # prohibited without the explicit permission of the copyright holders.
21 #
22 
23 import atexit
24 import sys
25 import time
26 import os
27 import os.path
28 import socket
29 from threading import Thread
30 
31 debug = False
32 
33 code = {
34  'A': 'ALA',
35  'R': 'ARG',
36  'N': 'ASN',
37  'D': 'ASP',
38  'C': 'CYS',
39  'E': 'GLU',
40  'Q': 'GLN',
41  'G': 'GLY',
42  'H': 'HIS',
43  'I': 'ILE',
44  'L': 'LEU',
45  'K': 'LYS',
46  'M': 'MET',
47  'F': 'PHE',
48  'P': 'PRO',
49  'S': 'SER',
50  'T': 'THR',
51  'W': 'TRP',
52  'Y': 'TYR',
53  'V': 'VAL'
54 }
55 
56 
57 def average(x):
58  return sum(x) / float(len(x))
59 
60 
61 def atexit_register(*args):
62 
63  atexit.register(*args)
64 
65 
66 def atexit_unregister(func):
67 
68  exit_funcs = [x[0] for x in atexit._exithandlers]
69 
70  try:
71  i = exit_funcs.index(func)
72  except ValueError:
73  return
74 
75  atexit._exithandlers.pop(i)
76 
77 
78 class WatchDog(Thread):
79 
80  def __init__(self, timeout, debug=False, logfile=None):
81  """
82  timeout: in minutes.
83  """
84 
85  Thread.__init__(self)
86 
87  self.timeout = timeout * 60.
88  self.debug = debug
89  self._last_ping = None
90  self._stop = False
91 
92  if logfile is not None:
93  logfile = os.path.expanduser(logfile)
94 
95  self.logfile = logfile
96 
97  self.setDaemon(True)
98 
99  def stop(self):
100  self._stop = True
101 
102  def set(self, x):
103  "set the _last_ping variable of the WatchDog instance"
104 
105  if self.debug:
106  print('Watchdog: set(%s) called.' % str(x))
107 
108  self._last_ping = x
109 
110  def run(self):
111  """run the Watchdog thread, which sits in a loop sleeping for
112  timeout/4. at each iteration, and
113  if abs(time() - _last_ping) > timeout, exits.
114  """
115 
116  while not self._stop:
117 
118  if self._last_ping is not None:
119  delta = abs(self._last_ping - time.time())
120  else:
121  delta = None
122 
123  if self.debug:
124 
125  if delta is None:
126  val = 'N/A s'
127  else:
128  val = '%.0f s' % delta
129 
130  print('Watchdog: last life sign %s ago; timeout is %d min(s).'
131  % (val, self.timeout / 60.))
132 
133  if self._last_ping is not None and delta > self.timeout:
134 
135  s = 'No life sign for > %d minute(s)' % (self.timeout / 60.)
136 
137  print(s + ', exiting...')
138 
139  if self.logfile is not None:
140 
141  if os.path.exists(self.logfile):
142  mode = 'a'
143  else:
144  mode = 'w'
145 
146  try:
147  f = open(self.logfile, mode)
148  f.write(
149  s + '; host %s, %s\n' %
150  (socket.gethostname(), time.ctime()))
151  f.close()
152 
153  except IOError:
154  pass
155 
156  if not self.debug:
157  os._exit(0)
158  else:
159  print('Watchdog: keeping Python interpreter alive.')
160  self.stop()
161 
162  time.sleep(self.timeout / 4.)
163 
164 
165 class SpinWheel:
166 
167  symbols = ('-', '/', '|', '\')
168 
169  def __init__(self):
170  self.state = 0
171 
172  def update(self, s=''):
173  sys.stdout.write('\r%s%s' % (s, self.symbols[self.state]))
174  sys.stdout.flush()
175 
176  self.state = (self.state + 1) % len(self.symbols)
177 
178 
179 class Pipe:
180 
181  """implements a FIFO pipe that merges lists (see self.put)"""
182 
183  def __init__(self, length=-1):
184 
185  self.length = length
186  self.pipe = []
187 
188  def put(self, x):
189  """If x is subscriptable, insert its contents at the beginning of
190  the pipe. Else insert the element itself.
191  If the pipe is full, drop the oldest element.
192  """
193 
194  try:
195  x[0]
196  self.pipe = list(x) + self.pipe
197 
198  except TypeError:
199  self.pipe.insert(0, x)
200 
201  if self.length > 0 and len(self.pipe) > self.length:
202  self.pipe = self.pipe[:-1]
203 
204  def append(self, x):
205  """x must be a list and will be appended to the end of the pipe,
206  dropping rightmost elements if necessary
207  """
208 
209  self.pipe = (list(x) + self.pipe)[:self.length]
210 
211  def get(self):
212  """returns the oldest element, without popping it out of the pipe.
213  Popping occurs in the put() method
214  """
215  return self.pipe[-1]
216 
217  def __getitem__(self, index):
218  return self.pipe.__getitem__(index)
219 
220  def __len__(self):
221  return len(self.pipe)
222 
223  def __str__(self):
224  return str(self.pipe)
225 
226  def is_full(self):
227  return len(self.pipe) == self.length
228 
229  __repr__ = __str__
230 
231 
232 def load_pdb(filename):
233 
234  import os
235 
236  from Scientific.IO.PDB import Structure
237 
238  return Structure(os.path.expanduser(filename))
239 
240 
241 def copyfiles(src_path, dest_path, pattern=None, verbose=False):
242 
243  from glob import glob
244  from shutil import copyfile
245  import os
246 
247  if pattern is None:
248  pattern = '*'
249 
250  file_list = glob(os.path.join(src_path, pattern))
251 
252  for f in file_list:
253  copyfile(f, os.path.join(dest_path, os.path.basename(f)))
254 
255  if verbose:
256  print(f)
257 
258 
259 def touch(filename):
260 
261  try:
262  f = open(filename, 'w')
263  f.close()
264 
265  except IOError as error:
266  import os
267  if os.path.isdir(filename):
268  pass
269  else:
270  raise IOError(error)
271 
272 # Yannick
273 
274 
275 def read_sequence_file(filename, first_residue_number=1):
276  """read sequence of ONE chain, 1-letter or 3-letter, returns dict of
277  no:3-letter code. Fails on unknown amino acids.
278  """
279 
280  filename = os.path.abspath(filename)
281  try:
282  f = open(filename)
283  except IOError:
284  raise IOError('Could not open sequence file "%s".' % filename)
285  seq = f.read().upper()
286 
287  if seq.startswith('>'):
288  print("Detected FASTA 1-letter sequence")
289  pos = seq.find('\n')
290  # get rid of first line and get sequence in one line
291  seq = ''.join(seq[pos + 1:].split())
292  names = [code[i] for i in seq]
293  numbers = list(range(first_residue_number,
294  first_residue_number + len(seq)))
295  return dict(list(zip(numbers, names)))
296  else:
297  spl = seq.split()
298  for x in spl:
299  if x not in code.values():
300  print('Warning: unknown 3-letter code: %s' % x)
301  numbers = list(range(first_residue_number,
302  first_residue_number + len(spl)))
303  return dict(list(zip(numbers, spl)))
304 
305 # Yannick
306 
307 
308 def check_residue(a, b):
309  """checks whether residue codes a and b are the same, doing necessary
310  conversions"""
311  a = a.upper()
312  b = b.upper()
313  if len(a) == 1:
314  if a not in code:
315  print('Warning: unknown 1-letter code: %s' % a)
316  return False
317  a = code[a]
318  if len(b) == 1:
319  if b not in code:
320  print('Warning: unknown 1-letter code: %s' % b)
321  return False
322  b = code[b]
323  if len(a) != 3:
324  print('Unknown residue code %s' % a)
325  return False
326  if len(b) != 3:
327  print('Unknown residue code %s' % b)
328  return False
329  if a != b:
330  print('Residues %s and %s are not the same' % (a, b))
331  return False
332  else:
333  return True
334 
335 
336 def my_glob(x, do_touch=False):
337 
338  from glob import glob
339 
340  if do_touch:
341 
342  import os
343 
344  path, name = os.path.split(x)
345 
346  # os.system('touch %s' % path) #this is very inefficient
347  touch(path) # this is better (4x to 6x faster)
348 
349  return glob(x)
350 
351 
352 def Dump(this, filename, gzip=0, mode='w', bin=1):
353  """
354  Dump(this, filename, gzip = 0)
355  Supports also '~' or '~user'.
356  """
357 
358  import os
359  import pickle
360 
361  filename = os.path.expanduser(filename)
362 
363  if mode not in ['w', 'a']:
364  raise ValueError("mode has to be 'w' (write) or 'a' (append)")
365 
366  if gzip:
367  import gzip
368  f = gzip.GzipFile(filename, mode)
369  else:
370  f = open(filename, mode)
371 
372  pickle.dump(this, f, bin)
373 
374  f.close()
375 
376 
377 def Load(filename, gzip=0, force=0):
378  """
379  Load(filename, gzip=0, force=0)
380 
381  force: returns all objects that could be unpickled. Useful
382  when unpickling of sequential objects fails at some point.
383  """
384  import pickle
385  import os
386 
387  filename = os.path.expanduser(filename)
388 
389  if gzip:
390  import gzip
391  try:
392  f = gzip.GzipFile(filename)
393  except IOError:
394  return
395 
396  f = open(filename, 'rb')
397 
398  objects = None
399 
400  eof = 0
401  n = 0
402 
403  while not eof:
404 
405  try:
406  object = pickle.load(f)
407 
408  if objects is None:
409  objects = object
410 
411  else:
412  objects += object
413 
414  n += 1
415 
416  except EOFError:
417  eof = 1
418 
419  except Exception:
420  print('Could not load chunk %d. Stopped.' % n)
421 
422  if force:
423  eof = 1
424  else:
425  object = pickle.load(f)
426 
427  f.close()
428 
429  return objects
430 
431 
432 def get_pdb(pdb_entry, dest='.', verbose_level=0):
433 
434  import ftplib
435  import os
436 
437  url = 'ftp.ebi.ac.uk'
438  path = 'pub/databases/rcsb/pdb-remediated/data/structures/all/pdb'
439  filename_template = 'pdb%s.ent.gz'
440 
441  dest = os.path.expanduser(dest)
442 
443  ftp = ftplib.FTP(url)
444  ftp.login()
445  ftp.set_debuglevel(verbose_level)
446 
447  ftp.cwd(path)
448 
449  filename = os.path.join(dest, '%s.pdb.gz' % pdb_entry)
450 
451  f = open(filename, 'wb')
452 
453  try:
454  ftp.retrbinary('RETR %s' % filename_template % pdb_entry.lower(),
455  f.write)
456 
457  f.close()
458 
459  ftp.quit()
460 
461  except ftplib.error_perm:
462  raise IOError('File %s not found on server' % filename)
463 
464  os.system('gunzip -f %s' % filename)
465 
466 
467 def compile_index_list(chain, atom_names, residue_index_list=None):
468 
469  if residue_index_list is None:
470  residue_index_list = list(range(len(chain)))
471 
472  index_list = []
473 
474  names = atom_names
475 
476  index_map = {}
477 
478  i = 0
479 
480  for res_index in residue_index_list:
481 
482  if atom_names is None:
483  names = sorted(chain[res_index].keys())
484 
485  for n in names:
486 
487  if n in chain[res_index]:
488  index = chain[res_index][n].index
489  index_list.append(index)
490  index_map[index] = i
491  i += 1
492 
493  return index_list, index_map
494 
495 
496 def get_coordinates(universe, E, indices=None, atom_names=('CA',),
497  residue_index_list=None, atom_index_list=None):
498 
499  from numpy.oldnumeric import array, take
500 
501  if indices is None:
502  indices = list(range(len(E)))
503 
504  chain = universe.get_polymer()
505 
506  if atom_index_list is None:
507  atom_index_list, index_map = compile_index_list(chain, atom_names,
508  residue_index_list)
509 
510  coord = []
511 
512  for i in indices:
513 
514  chain.set_torsions(E.torsion_angles[i], 1)
515 
516  X = array(take(universe.X, atom_index_list))
517 
518  coord.append(X)
519 
520  return array(coord)
521 
522 
523 def map_angles(angles, period=None):
524  """
525  maps angles into interval [-pi,pi]
526  """
527 
528  from numpy.oldnumeric import fmod, greater, logical_not
529 
530  if period is None:
531  from numpy.oldnumeric import pi as period
532 
533  mask = greater(angles, 0.)
534 
535  return mask * (fmod(angles + period, 2 * period) - period) + \
536  logical_not(mask) * (fmod(angles - period, 2 * period) + period)
537 
538 
539 def remove_from_dict(d, items):
540 
541  for item in items:
542  if item in d:
543  del d[item]
544 
545 
546 def myrange(a, b, n):
547 
548  from numpy.oldnumeric import arange
549 
550  step = (b - a) / (n - 1)
551 
552  x = arange(a, b + step, step)
553 
554  return x[:n]
555 
556 
557 def indent(lines, prefix):
558 
559  tag = ' ' * len(str(prefix))
560 
561  lines[0] = prefix + lines[0]
562  lines = [lines[0]] + list(map(lambda s, t=tag: t + s, lines[1:]))
563 
564  return '\n'.join(lines)
565 
566 
567 def make_block(s, length=80, tol=10):
568  blocks = s.split('\n')
569  spl = []
570  for block in blocks:
571  spl += _make_block(block, length, tol)
572 
573  return spl
574 
575 
576 def _make_block(s, length, tol):
577 
578  spl = s.split(' ')
579  spl = [(w, ' ') for w in spl]
580 
581  words = []
582  for ll in spl:
583  g = ll[0].split('/')
584  g = [w + '/' for w in g]
585  g[-1] = g[-1][:-1] + ' '
586 
587  words += g
588 
589  spl = []
590  line = ''
591 
592  for i in range(len(words)):
593  word = words[i]
594 
595  if len(line + word) <= length:
596  line += word
597 
598  else:
599  if length - len(line) > tol:
600  m = length - len(line)
601  line += word[:m]
602  word = word[m:]
603 
604  if len(line) > 1 and line[0] == ' ' and \
605  line[1] != ' ':
606  line = line[1:]
607 
608  spl.append(line)
609  line = word
610 
611  line = line[:-1]
612  if len(line) > 1 and line[0] == ' ' and \
613  line[1] != ' ':
614  line = line[1:]
615 
616  spl.append(line)
617 
618  return spl
619 
620 
621 def _save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
622  gzip=False, bin=True):
623 
624  try:
625  Dump(x, filename, gzip=gzip, bin=bin)
626 
627  except IOError as msg:
628 
629  import time
630 
631  if err_msg is None:
632  print('IOError: %s' % str(msg))
633 
634  else:
635  if show_io_err:
636  print('%s. %s' % (str(msg), err_msg))
637  else:
638  print(err_msg)
639 
640  while True:
641 
642  # wait for 10 minutes
643 
644  time.sleep(60. * delay)
645 
646  try:
647  Dump(x, filename, gzip=gzip, bin=bin)
648  break
649 
650  except IOError:
651  continue
652 
653 
654 def save_dump(x, filename, err_msg=None, delay=10, show_io_err=True,
655  gzip=False, mode='w', bin=True):
656 
657  import os
658  import tempfile
659 
660  path, _filename = os.path.split(filename)
661 
662  temp_path, temp_filename = os.path.split(tempfile.mktemp())
663  temp_filename = os.path.join(path, temp_filename)
664 
665  _save_dump(x, temp_filename, err_msg, delay, show_io_err,
666  gzip, bin)
667 
668  # if that worked, dump properly
669 
670  if mode == 'w':
671  os.rename(temp_filename, filename)
672 
673  elif mode == 'a':
674  os.unlink(temp_filename)
675  Dump(x, filename, mode='a', gzip=gzip, bin=bin)
676 
677  else:
678  raise Exception('Mode "%s" invalid.' % mode)
def map_angles
maps angles into interval [-pi,pi]
Definition: utils.py:523
def put
If x is subscriptable, insert its contents at the beginning of the pipe.
Definition: utils.py:188
implements a FIFO pipe that merges lists (see self.put)
Definition: utils.py:179
def get
returns the oldest element, without popping it out of the pipe.
Definition: utils.py:211
def read_sequence_file
read sequence of ONE chain, 1-letter or 3-letter, returns dict of no:3-letter code.
Definition: utils.py:275
def append
x must be a list and will be appended to the end of the pipe, dropping rightmost elements if necessar...
Definition: utils.py:204
The general base class for IMP exceptions.
Definition: exception.h:48
def check_residue
checks whether residue codes a and b are the same, doing necessary conversions
Definition: utils.py:308