IMP  2.0.1
The Integrative Modeling Platform
idock.py
1 #!/usr/bin/env python
2 
3 import IMP
4 import IMP.saxs
5 import os
6 import re
7 import sys
8 import math
9 import shutil
10 import tempfile
11 import subprocess
12 import optparse
13 import inspect
14 
15 class TempDir(object):
16  """Make a temporary directory, and delete it when the object is destroyed"""
17  def __init__(self):
18  self.tmpdir = tempfile.mkdtemp()
19  def __del__(self):
20  shutil.rmtree(self.tmpdir, ignore_errors=True)
21 
22 
23 def _count_lines(filename):
24  """Count the number of lines in the file"""
25  wc = 0
26  for line in open(filename):
27  wc += 1
28  return wc
29 
30 def _count_fiber_dock_out(filename):
31  """Count the number of transformations in a FiberDock output file"""
32  wc = 0
33  header_found = False
34  for line in open(filename):
35  if '|' in line:
36  if header_found:
37  wc += 1
38  elif 'glob' in line:
39  header_found = True
40  return wc
41 
42 def _run_binary(path, binary, args, out_file=None):
43  if path:
44  binary = os.path.join(path, binary)
45  cmd = ' '.join([binary] + args)
46  if out_file:
47  stdout = open(out_file, 'w')
48  cmd += ' > ' + out_file
49  else:
50  stdout = sys.stdout
51  print cmd
52  p = subprocess.Popen([binary] + args, stdout=stdout, stderr=sys.stderr)
53  ret = p.wait()
54  if ret != 0:
55  raise OSError("subprocess failed with exit code %d" % ret)
56 
57 
58 class Scorer(object):
59  """Score transformations using a type of experimental data.
60  To add support for a new type of data, simply create a new Scorer
61  subclass and override its methods and variables."""
62  transformations_file = 'trans_pd'
63  # Number of transforms to be passed to FiberDock; more accurate scoring
64  # methods can get away with fewer transforms
65  transforms_needed = 5000
66  # If set to True, higher scores are considered better when calculating
67  # zscores
68  reverse_zscores = False
69  # A short string (< 8 characters) identifying the method
70  short_name = None
71 
72  @classmethod
73  def add_parser_options(cls, parser):
74  """Add command line options to the given parser"""
75  pass
76 
77  @classmethod
78  def check_options(cls, idock, parser):
79  """Check command line options for consistency, and create and return
80  a new instance if selected by these options"""
81  pass
82 
83  def __init__(self, idock, output_type):
84  self.receptor = idock.receptor
85  self.ligand = idock.ligand
86  self.output_file = idock.get_filename("%s.res" % output_type)
87  self.zscore_output_file = idock.get_filename("%sf.res" % output_type)
88 
89  def score(self, num_transforms):
90  if os.path.exists(self.output_file) \
91  and _count_lines(self.output_file) >= num_transforms:
92  print "Skipping %s for %s" % (str(self), self.receptor)
93  else:
94  self._run_score_binary()
95 
96  def recompute_zscore(self, transforms_file):
97  """Recompute z-scores for the new set of transformations
98  in transforms_file"""
99  d = TempDir()
100  tmp_tf = os.path.join(d.tmpdir, 'tmp')
101  solutions = []
102  num_re = re.compile('\d')
103  # Read all solutions
104  for line in open(self.output_file):
105  spl = line.split('|')
106  if len(spl) > 1 and num_re.search(spl[0]):
107  solutions.append(line)
108  # Extract only the solutions that are mentioned in transforms_file
109  fh = open(tmp_tf, 'w')
110  for line in open(transforms_file):
111  spl = line.split()
112  if len(spl) > 1:
113  fh.write(solutions[int(spl[0])-1])
114  fh.close()
115  # Recompute z-scores for the new set
116  args = [tmp_tf]
117  if self.reverse_zscores:
118  args.append('1')
119  _run_binary(None, 'recompute_zscore', args,
120  out_file=self.zscore_output_file)
121 
122 
123 class NMRScorer(Scorer):
124  """Score transformations using NMR residue type content"""
125  short_name = 'nmr_rtc'
126 
127  @classmethod
128  def add_parser_options(cls, parser):
129  g = optparse.OptionGroup(parser, "NMR-RTC")
130  g.add_option('--receptor_rtc', metavar='FILE',
131  help="File name of the receptor NMR residue type content")
132  g.add_option('--ligand_rtc', metavar='FILE',
133  help="File name of the ligand NMR residue type content")
134  parser.add_option_group(g)
135 
136  @classmethod
137  def check_options(cls, idock, parser):
138  if idock.opts.receptor_rtc or idock.opts.ligand_rtc:
139  return cls(idock)
140 
141  def __init__(self, idock):
142  Scorer.__init__(self, idock, "nmr_rtc_score")
143  self.receptor_rtc = idock.opts.receptor_rtc
144  self.ligand_rtc = idock.opts.ligand_rtc
145  if idock.opts.type == 'AA':
146  self.receptor_rtc, self.ligand_rtc = self.ligand_rtc, '-'
147 
148  def __str__(self):
149  return "NMR score"
150 
151  def _run_score_binary(self):
152  _run_binary(None, "nmr_rtc_score",
153  [self.receptor, self.ligand, self.transformations_file,
154  self.receptor_rtc, self.ligand_rtc,
155  '-o', self.output_file])
156 
157 
158 class SAXSScorer(Scorer):
159  """Score transformations using SAXS (rg + chi)"""
160  short_name = 'saxs'
161 
162  @classmethod
163  def add_parser_options(cls, parser):
164  g = optparse.OptionGroup(parser, "SAXS")
165  g.add_option('--saxs', metavar='FILE', dest='saxs_file',
166  help="File name of the complex SAXS profile")
167  g.add_option('--saxs_receptor_pdb', metavar='FILE',
168  help='Additional receptor structure for SAXS scoring '
169  'with modeled missing atoms/residues. '
170  'This structure should be aligned to the '
171  'input receptor!')
172  g.add_option('--saxs_ligand_pdb', metavar='FILE',
173  help='Additional ligand structure for SAXS scoring '
174  'with modeled missing atoms/residues. '
175  'This structure should be aligned to the '
176  'input ligand!')
177  parser.add_option_group(g)
178 
179  @classmethod
180  def check_options(cls, idock, parser):
181  if idock.opts.saxs_file:
182  return cls(idock)
183 
184  def __init__(self, idock):
185  Scorer.__init__(self, idock, "saxs_score")
186  self.saxs_file = idock.opts.saxs_file
187  self.saxs_receptor = idock.opts.saxs_receptor_pdb or idock.receptor
188  self.saxs_ligand = idock.opts.saxs_ligand_pdb or idock.ligand
189 
190  def __str__(self):
191  return "SAXS score"
192 
193  def _run_score_binary(self):
194  _run_binary(None, "saxs_score",
195  [self.saxs_receptor, self.saxs_ligand,
196  self.transformations_file, self.saxs_file,
197  '-o', self.output_file])
198 
199 
200 class EM2DScorer(Scorer):
201  """Score transformations using EM2D"""
202  short_name = 'em2d'
203 
204  @classmethod
205  def add_parser_options(cls, parser):
206  g = optparse.OptionGroup(parser, "EM2D")
207  g.add_option('--em2d', metavar='FILE', action='append', default=[],
208  dest='class_averages',
209  help="File name of a complex 2D class average in PGM "
210  "format. This option can be repeated to use "
211  "multiple class averages (up to 5 in total)")
212  g.add_option('--pixel_size', type='float', default=0.,
213  help='Pixel size for EM2D images')
214  parser.add_option_group(g)
215 
216  @classmethod
217  def check_options(cls, idock, parser):
218  if idock.opts.class_averages:
219  if idock.opts.pixel_size <= 0.:
220  parser.error("please specify pixel size for 2D images with "
221  "--pixel_size option")
222  else:
223  return cls(idock)
224 
225  def __init__(self, idock):
226  Scorer.__init__(self, idock, "em2d_score")
227  self.class_averages = idock.opts.class_averages
228  self.pixel_size = idock.opts.pixel_size
229 
230  def __str__(self):
231  return "EM2D score"
232 
233  def _run_score_binary(self):
234  _run_binary(None, "em2d_score",
235  [self.receptor, self.ligand, self.transformations_file] \
236  + self.class_averages \
237  + ['-o', self.output_file, '-n', '200', '-s',
238  str(self.pixel_size)])
239 
240 
241 class EM3DScorer(Scorer):
242  """Score transformations using EM3D"""
243  short_name = 'em3d'
244  transforms_needed = 1000
245  reverse_zscores = True
246 
247  @classmethod
248  def add_parser_options(cls, parser):
249  g = optparse.OptionGroup(parser, "EM3D")
250  g.add_option('--em3d', metavar='FILE', dest='map_file',
251  help="File name of the complex density map in mrc format")
252  parser.add_option_group(g)
253 
254  @classmethod
255  def check_options(cls, idock, parser):
256  if idock.opts.map_file:
257  return cls(idock)
258 
259  def __init__(self, idock):
260  Scorer.__init__(self, idock, "em3d_score")
261  self.map_file = idock.opts.map_file
262 
263  def __str__(self):
264  return "EM3D score"
265 
266  def _run_score_binary(self):
267  _run_binary(None, "em3d_score",
268  [self.receptor, self.ligand, self.transformations_file,
269  self.map_file, '-o', self.output_file, '-s'])
270 
271 
272 class CXMSScorer(Scorer):
273  """Score transformations using CXMS"""
274  short_name = 'cxms'
275  transforms_needed = 2000
276  reverse_zscores = True
277 
278  @classmethod
279  def add_parser_options(cls, parser):
280  g = optparse.OptionGroup(parser, "CXMS")
281  g.add_option('--cxms', metavar='FILE', dest='cross_links_file',
282  help="File name of the cross links file")
283  parser.add_option_group(g)
284 
285  @classmethod
286  def check_options(cls, idock, parser):
287  if idock.opts.cross_links_file:
288  return cls(idock)
289 
290  def __init__(self, idock):
291  Scorer.__init__(self, idock, "cxms_score")
292  self.cross_links_file = idock.opts.cross_links_file
293 
294  def __str__(self):
295  return "CXMS score"
296 
297  def _run_score_binary(self):
298  _run_binary(None, "cross_links_score",
299  [self.receptor, self.ligand, self.transformations_file,
300  self.cross_links_file, '-o', self.output_file])
301 
302 
303 class IDock(object):
304  """Handle all stages of the integrative docking protocol"""
305 
306  # Get list of all Scorer subclasses
307  _all_scorers = [x[1] for x in inspect.getmembers(sys.modules[__name__],
308  lambda x: inspect.isclass(x) \
309  and issubclass(x, Scorer) and x is not Scorer)]
310 
311  def parse_args(self):
312  """Parse command line options, and return all applicable Scorers"""
313  data_types = [x.short_name.upper() for x in self._all_scorers]
314  data_types = ", ".join(data_types[:-1]) + " and/or " + data_types[-1]
315  epilog = """Note that in order to run this application, you must also
316 have PatchDock and FiberDock (available from
317 http://bioinfo3d.cs.tau.ac.il/FiberDock/) and reduce (available from
318 http://kinemage.biochem.duke.edu/software/reduce.php)
319 installed. See the --patch-dock and --fiber-dock parameters to tell idock
320 where the PatchDock and FiberDock programs are installed. idock expects to
321 find 'reduce' in the system path."""
322  parser = IMP.OptionParser(usage="%prog [options] <receptor_pdb> "
323  "<ligand_pdb>",
324  description="Integrative pairwise docking "
325  "using %s data" % data_types,
326  epilog=epilog,
327  imp_module=IMP.saxs)
328  choices=['EI', 'AA', 'other']
329  parser.add_option('--complex_type', metavar='TYPE', type='choice',
330  dest="type", choices=choices, default='other',
331  help='/'.join(choices) + '; use this order for '
332  'receptor-ligand: '
333  'antibody-antigen, enzyme-inhibitor')
334  parser.add_option('--patch_dock', metavar='DIR',
335  default=os.environ.get('PATCH_DOCK_HOME', None),
336  help='Directory where PatchDock tools are installed. '
337  'If not specified, the value of the '
338  'PATCH_DOCK_HOME environment variable is used '
339  'if set, otherwise the tools are assumed to be '
340  'in the default path.')
341  parser.add_option('--fiber_dock', metavar='DIR',
342  default=os.environ.get('FIBER_DOCK_HOME', None),
343  help='Directory where FiberDock tools are installed. '
344  'If not specified, the value of the '
345  'FIBER_DOCK_HOME environment variable is used '
346  'if set, otherwise the tools are assumed to be '
347  'in the default path.')
348  parser.add_option('--prefix', default='',
349  help='Add prefix string (separated by an underscore) '
350  'to filenames generated by the current run')
351  parser.add_option('--precision', type='choice', choices=['1', '2', '3'],
352  default='1',
353  help='Sampling precision for rigid docking: '
354  '1-normal, 2-medium, 3-high. The higher the '
355  'precision, the higher are the run times')
356  for s in self._all_scorers:
357  s.add_parser_options(parser)
358 
359  opts, args = parser.parse_args()
360  if len(args) != 2:
361  parser.error("incorrect number of arguments")
362  opts.precision = int(opts.precision)
363  if opts.prefix:
364  opts.prefix += '_'
365 
366  self.opts = opts
367  self.receptor, self.ligand = args
368  scorers = self.get_scorers(parser)
369 
370  if len(scorers) == 0:
371  parser.error("please provide %s experimental data" % data_types)
372  return scorers
373 
374  def get_scorers(self, parser):
375  """Get all scorers that are applicable to the command line options"""
376  scorers = []
377  for s in self._all_scorers:
378  si = s.check_options(self, parser)
379  if si:
380  scorers.append(si)
381  return scorers
382 
383  def run_patch_dock_binary(self, binary, args):
384  """Run a binary that is part of the PatchDock distribution"""
385  _run_binary(self.opts.patch_dock, binary, args)
386 
387  def run_fiber_dock_binary(self, binary, args):
388  """Run a binary that is part of the FiberDock distribution"""
389  _run_binary(self.opts.fiber_dock, binary, args)
390 
391  def make_patch_dock_surfaces(self):
392  """Make molecular surfaces for PatchDock"""
393  self.run_patch_dock_binary('buildMS.pl', [self.receptor, self.ligand])
394 
395  def make_patch_dock_parameters(self):
396  """Make parameter file for PatchDock"""
397  if self.opts.precision == 1:
398  rmsd = '4.0'
399  else:
400  rmsd = '2.0'
401  if self.opts.precision == 3:
402  script = 'buildParamsFine.pl'
403  else:
404  script = 'buildParams.pl'
405  self.run_patch_dock_binary(script, [self.receptor, self.ligand, rmsd,
406  self.opts.type])
407 
408  def get_filename(self, fn):
409  """Get a filename, with user-defined prefix if given"""
410  return self.opts.prefix + fn
411 
412  def get_all_scores_filename(self, scorers, prefix, suffix):
413  """Get a filename containing the names of all scores used"""
414  return self.get_filename(prefix \
415  + '_'.join([x.short_name for x in scorers]) + suffix)
416 
417  def do_patch_dock_docking(self):
418  """Do PatchDock docking, using previously generated surfaces
419  and parameter files"""
420  out_file = self.get_filename('docking.res')
421  # Skip if PatchDock output file exists and contains transformations
422  # (not just the header containing parameter information)
423  if os.path.exists(out_file) and _count_lines(out_file) > 36:
424  print "Skipping PatchDock for %s" % self.receptor
425  else:
426  self.run_patch_dock_binary('patch_dock.Linux',
427  ['params.txt', out_file])
428 
429  def make_transformation_file(self):
430  """Extract transformation image from PatchDock output file"""
431  out_file = self.get_filename('docking.res')
432  num_re = re.compile('\d')
433  num_transforms = 0
434  fout = open('trans_pd', 'w')
435  for line in open(out_file):
436  fields = line.split('|')
437  if len(fields) > 1 and num_re.search(fields[0]):
438  num_transforms += 1
439  print >> fout, int(fields[0]), fields[-1].strip(' \r\n')
440  if self.opts.precision == 1 and num_transforms >= 5000:
441  break
442  return num_transforms
443 
444  def run_patch_dock(self):
445  """Run PatchDock on the ligand and receptor"""
446  self.make_patch_dock_surfaces()
447  self.make_patch_dock_parameters()
448  self.do_patch_dock_docking()
449  num_transforms = self.make_transformation_file()
450  # Swap ligand/receptor if we're doing AA
451  if self.opts.type == 'AA':
452  self.ligand, self.receptor = self.receptor, self.ligand
453  self.opts.saxs_receptor_pdb, self.opts.saxs_ligand_pdb = \
454  self.opts.saxs_ligand_pdb, self.opts.saxs_receptor_pdb
455  return num_transforms
456 
457  def get_filtered_scores(self, scorers):
458  """Get scores that were filtered by an experimental method"""
459  # If only one score, simply extract from its file (note we extract the
460  # z-score, not the raw score, since the combined score is the weighted
461  # sum of z-scores, not raw scores)
462  if len(scorers) == 1:
463  out_fh = open('trans_for_cluster', 'w')
464  in_fh = open(scorers[0].output_file)
465  for line in in_fh:
466  spl = line.split('|')
467  if '+' in line and '#' not in line and len(spl) > 1:
468  out_fh.write("%s %s %s" % (spl[0], spl[3], spl[-1]))
469  else:
470  out_file = self.get_all_scores_filename(scorers, 'combined_',
471  '.res')
472  args = []
473  for s in scorers:
474  args.extend([s.output_file, '1.0'])
475  _run_binary(None, 'combine_scores', args, out_file=out_file)
476  out_fh = open('trans_for_cluster', 'w')
477  in_fh = open(out_file)
478  for line in in_fh:
479  spl = line.split('|')
480  if '#' not in line and len(spl) > 1:
481  out_fh.write("%s %s %s" % (spl[0], spl[1], spl[-1]))
482 
483  def get_clustered_transforms(self, scorers):
484  """Cluster transformations with PatchDock"""
485  out_file = self.get_all_scores_filename(scorers, 'clustered_', '.res')
486  self.run_patch_dock_binary('interface_cluster.linux',
487  [self.receptor + '.ms', self.ligand,
488  'trans_for_cluster', '4.0', out_file])
489  return out_file
490 
491  def get_transforms_for_fiber_dock(self, scorers, transforms_file):
492  """Convert PatchDock-clustered transforms into FiberDock input"""
493  trans_for_fd = self.get_all_scores_filename(scorers, 'trans_for_fd_',
494  '')
495  transforms_needed = min([s.transforms_needed for s in scorers])
496  fh_in = open(transforms_file)
497  fh_out = open(trans_for_fd, 'w')
498  for line in fh_in:
499  spl = line.split('|')
500  if len(spl) > 1:
501  print >> fh_out, spl[0] + " " + spl[4].rstrip('\r\n')
502  transforms_needed -= 1
503  if transforms_needed <= 0:
504  break
505  return trans_for_fd
506 
507  def add_hydrogens(self, in_pdb):
508  """Run reduce to add hydrogens to in_pdb; return the name of the new
509  PDB with hydrogens"""
510  out_pdb = in_pdb + '.HB'
511  _run_binary(None, 'reduce',
512  ['-OH', '-HIS', '-NOADJ', '-NOROTMET', in_pdb],
513  out_file=out_pdb)
514  return out_pdb
515 
516  def make_fiber_dock_parameters(self, scorers, receptorH, ligandH,
517  trans_for_fd):
518  params = self.get_all_scores_filename(scorers, 'params_fd_', '.txt')
519  fd_out = self.get_all_scores_filename(scorers, 'fd_res_', '')
520  self.run_fiber_dock_binary('buildFiberDockParams.pl',
521  [receptorH, ligandH, 'U', 'U', self.opts.type, trans_for_fd,
522  fd_out, '0', '50', '0.8', '0', 'glpk', params])
523  return params, fd_out + '.ref'
524 
525  def do_fiber_dock_docking(self, trans_for_fd, params, fd_out):
526  if os.path.exists(fd_out) \
527  and _count_fiber_dock_out(fd_out) == _count_lines(trans_for_fd):
528  print "Skipping FiberDock for %s" % self.receptor
529  return
530  else:
531  self.run_fiber_dock_binary('FiberDock', [params])
532 
533  def convert_fiber_dock_to_score(self, scorers, fd_out):
534  """Convert FiberDock or FireDock output file to the same format
535  as the scorers"""
536  out = self.get_all_scores_filename(scorers, 'fd_res_', '.res')
537  solutions = []
538  fire_dock = False
539  num_re = re.compile('\d')
540  # Extract solutions
541  for line in open(fd_out):
542  spl = line.split('|')
543  if len(spl) > 1 and num_re.search(spl[0]):
544  num = int(spl[0])
545  if fire_dock:
546  solutions.append((num, float(spl[5]),
547  spl[-1].rstrip('\r\n')))
548  else:
549  solutions.append((num, float(spl[1]), spl[-3]))
550  elif len(spl) > 5 and 'glob' in spl[5]:
551  fire_dock = True
552  # Compute Z-score
553  average = 0.
554  stddev = 0.
555  for solution in solutions:
556  score = solution[1]
557  average += score
558  stddev += (score * score)
559  average /= len(solutions)
560  stddev = math.sqrt(stddev / len(solutions) - (average * average))
561 
562  # Output in new format
563  fh = open(out, 'w')
564  print >> fh, " # | Score | filt| ZScore | Transformation"
565  for solution in solutions:
566  num, score, trans = solution
567  zscore = (score - average) / stddev
568  print >> fh, " %4d | %5.3f | + | %5.3f | %s" \
569  % (num, score, zscore, trans)
570  return out
571 
572  def run_fiber_dock(self, scorers, transforms_file):
573  trans_for_fd = self.get_transforms_for_fiber_dock(scorers,
574  transforms_file)
575  receptorH = self.add_hydrogens(self.receptor)
576  ligandH = self.add_hydrogens(self.ligand)
577  params, fd_out = self.make_fiber_dock_parameters(scorers, receptorH,
578  ligandH, trans_for_fd)
579  self.do_fiber_dock_docking(trans_for_fd, params, fd_out)
580  return trans_for_fd, self.convert_fiber_dock_to_score(scorers, fd_out)
581 
582  def combine_final_scores(self, scorers, fd_out):
583  """Combine scores for the final set with those from FireDock"""
584  out_file = self.get_filename('combined_final.res')
585  args = []
586  for s in scorers:
587  args.extend([s.zscore_output_file, '1.0'])
588  args.extend([fd_out, '1.0'])
589  _run_binary(None, 'combine_scores', args, out_file=out_file)
590  return out_file
591 
592  def write_results(self, scorers, comb_final):
593  """Write final results file"""
594  out_fname = self.get_all_scores_filename(scorers, 'results_', '.txt')
595  # Get all solutions
596  solutions = []
597  for line in open(comb_final):
598  spl = line.split('|')
599  if len(spl) > 1 and 'Score' not in line:
600  solutions.append(spl)
601  # Sort by z-score
602  solutions.sort(key=lambda x: float(x[3]))
603  # Write results file
604  fh = open(out_fname, 'w')
605  print >> fh, "receptorPdb Str " + self.receptor
606  print >> fh, "ligandPdb Str " + self.ligand
607  print >> fh, " # | Score | filt| ZScore |" \
608  + "|".join(["%-8s| Zscore " % s.short_name for s in scorers]) \
609  + "| Energy | Zscore | Transformation"
610  for i, solution in enumerate(solutions):
611  fh.write("%6d | " % (i+1) + "|".join(solution[1:]))
612  return out_fname
613 
614  def main(self):
615  """Run the entire protocol"""
616  scorers = self.parse_args()
617  num_transforms = self.run_patch_dock()
618  for scorer in scorers:
619  scorer.score(num_transforms)
620  self.get_filtered_scores(scorers)
621  transforms_file = self.get_clustered_transforms(scorers)
622  trans_for_fd, fd_out = self.run_fiber_dock(scorers, transforms_file)
623  for scorer in scorers:
624  scorer.recompute_zscore(trans_for_fd)
625  fh = self.combine_final_scores(scorers, fd_out)
626  self.write_results(scorers, fh)
627 
628 
629 if __name__ == "__main__":
630  dock = IDock()
631  dock.main()