15 class TempDir(object):
16 """Make a temporary directory, and delete it when the object is destroyed"""
18 self.tmpdir = tempfile.mkdtemp()
20 shutil.rmtree(self.tmpdir, ignore_errors=
True)
23 def _count_lines(filename):
24 """Count the number of lines in the file"""
26 for line
in open(filename):
30 def _count_fiber_dock_out(filename):
31 """Count the number of transformations in a FiberDock output file"""
34 for line
in open(filename):
42 def _run_binary(path, binary, args, out_file=None):
44 binary = os.path.join(path, binary)
45 cmd =
' '.join([binary] + args)
47 stdout = open(out_file,
'w')
48 cmd +=
' > ' + out_file
52 p = subprocess.Popen([binary] + args, stdout=stdout, stderr=sys.stderr)
55 raise OSError(
"subprocess failed with exit code %d" % ret)
59 """Score transformations using a type of experimental data.
60 To add support for a new type of data, simply create a new Scorer
61 subclass and override its methods and variables."""
62 transformations_file =
'trans_pd'
65 transforms_needed = 5000
68 reverse_zscores =
False
73 def add_parser_options(cls, parser):
74 """Add command line options to the given parser"""
78 def check_options(cls, idock, parser):
79 """Check command line options for consistency, and create and return
80 a new instance if selected by these options"""
83 def __init__(self, idock, output_type):
84 self.receptor = idock.receptor
85 self.ligand = idock.ligand
86 self.output_file = idock.get_filename(
"%s.res" % output_type)
87 self.zscore_output_file = idock.get_filename(
"%sf.res" % output_type)
89 def score(self, num_transforms):
90 if os.path.exists(self.output_file) \
91 and _count_lines(self.output_file) >= num_transforms:
92 print "Skipping %s for %s" % (str(self), self.receptor)
94 self._run_score_binary()
96 def recompute_zscore(self, transforms_file):
97 """Recompute z-scores for the new set of transformations
100 tmp_tf = os.path.join(d.tmpdir,
'tmp')
102 num_re = re.compile(
'\d')
104 for line
in open(self.output_file):
105 spl = line.split(
'|')
106 if len(spl) > 1
and num_re.search(spl[0]):
107 solutions.append(line)
109 fh = open(tmp_tf,
'w')
110 for line
in open(transforms_file):
113 fh.write(solutions[int(spl[0])-1])
117 if self.reverse_zscores:
119 _run_binary(
None,
'recompute_zscore', args,
120 out_file=self.zscore_output_file)
123 class NMRScorer(Scorer):
124 """Score transformations using NMR residue type content"""
125 short_name =
'nmr_rtc'
128 def add_parser_options(cls, parser):
129 g = optparse.OptionGroup(parser,
"NMR-RTC")
130 g.add_option(
'--receptor_rtc', metavar=
'FILE',
131 help=
"File name of the receptor NMR residue type content")
132 g.add_option(
'--ligand_rtc', metavar=
'FILE',
133 help=
"File name of the ligand NMR residue type content")
134 parser.add_option_group(g)
137 def check_options(cls, idock, parser):
138 if idock.opts.receptor_rtc
or idock.opts.ligand_rtc:
141 def __init__(self, idock):
142 Scorer.__init__(self, idock,
"nmr_rtc_score")
143 self.receptor_rtc = idock.opts.receptor_rtc
144 self.ligand_rtc = idock.opts.ligand_rtc
145 if idock.opts.type ==
'AA':
146 self.receptor_rtc, self.ligand_rtc = self.ligand_rtc,
'-'
151 def _run_score_binary(self):
152 _run_binary(
None,
"nmr_rtc_score",
153 [self.receptor, self.ligand, self.transformations_file,
154 self.receptor_rtc, self.ligand_rtc,
155 '-o', self.output_file])
158 class SAXSScorer(Scorer):
159 """Score transformations using SAXS (rg + chi)"""
163 def add_parser_options(cls, parser):
164 g = optparse.OptionGroup(parser,
"SAXS")
165 g.add_option(
'--saxs', metavar=
'FILE', dest=
'saxs_file',
166 help=
"File name of the complex SAXS profile")
167 g.add_option(
'--saxs_receptor_pdb', metavar=
'FILE',
168 help=
'Additional receptor structure for SAXS scoring '
169 'with modeled missing atoms/residues. '
170 'This structure should be aligned to the '
172 g.add_option(
'--saxs_ligand_pdb', metavar=
'FILE',
173 help=
'Additional ligand structure for SAXS scoring '
174 'with modeled missing atoms/residues. '
175 'This structure should be aligned to the '
177 parser.add_option_group(g)
180 def check_options(cls, idock, parser):
181 if idock.opts.saxs_file:
184 def __init__(self, idock):
185 Scorer.__init__(self, idock,
"saxs_score")
186 self.saxs_file = idock.opts.saxs_file
187 self.saxs_receptor = idock.opts.saxs_receptor_pdb
or idock.receptor
188 self.saxs_ligand = idock.opts.saxs_ligand_pdb
or idock.ligand
193 def _run_score_binary(self):
194 _run_binary(
None,
"saxs_score",
195 [self.saxs_receptor, self.saxs_ligand,
196 self.transformations_file, self.saxs_file,
197 '-o', self.output_file])
200 class EM2DScorer(Scorer):
201 """Score transformations using EM2D"""
205 def add_parser_options(cls, parser):
206 g = optparse.OptionGroup(parser,
"EM2D")
207 g.add_option(
'--em2d', metavar=
'FILE', action=
'append', default=[],
208 dest=
'class_averages',
209 help=
"File name of a complex 2D class average in PGM "
210 "format. This option can be repeated to use "
211 "multiple class averages (up to 5 in total)")
212 g.add_option(
'--pixel_size', type=
'float', default=0.,
213 help=
'Pixel size for EM2D images')
214 parser.add_option_group(g)
217 def check_options(cls, idock, parser):
218 if idock.opts.class_averages:
219 if idock.opts.pixel_size <= 0.:
220 parser.error(
"please specify pixel size for 2D images with "
221 "--pixel_size option")
225 def __init__(self, idock):
226 Scorer.__init__(self, idock,
"em2d_score")
227 self.class_averages = idock.opts.class_averages
228 self.pixel_size = idock.opts.pixel_size
233 def _run_score_binary(self):
234 _run_binary(
None,
"em2d_score",
235 [self.receptor, self.ligand, self.transformations_file] \
236 + self.class_averages \
237 + [
'-o', self.output_file,
'-n',
'200',
'-s',
238 str(self.pixel_size)])
241 class EM3DScorer(Scorer):
242 """Score transformations using EM3D"""
244 transforms_needed = 1000
245 reverse_zscores =
True
248 def add_parser_options(cls, parser):
249 g = optparse.OptionGroup(parser,
"EM3D")
250 g.add_option(
'--em3d', metavar=
'FILE', dest=
'map_file',
251 help=
"File name of the complex density map in mrc format")
252 parser.add_option_group(g)
255 def check_options(cls, idock, parser):
256 if idock.opts.map_file:
259 def __init__(self, idock):
260 Scorer.__init__(self, idock,
"em3d_score")
261 self.map_file = idock.opts.map_file
266 def _run_score_binary(self):
267 _run_binary(
None,
"em3d_score",
268 [self.receptor, self.ligand, self.transformations_file,
269 self.map_file,
'-o', self.output_file,
'-s'])
272 class CXMSScorer(Scorer):
273 """Score transformations using CXMS"""
275 transforms_needed = 2000
276 reverse_zscores =
True
279 def add_parser_options(cls, parser):
280 g = optparse.OptionGroup(parser,
"CXMS")
281 g.add_option(
'--cxms', metavar=
'FILE', dest=
'cross_links_file',
282 help=
"File name of the cross links file")
283 parser.add_option_group(g)
286 def check_options(cls, idock, parser):
287 if idock.opts.cross_links_file:
290 def __init__(self, idock):
291 Scorer.__init__(self, idock,
"cxms_score")
292 self.cross_links_file = idock.opts.cross_links_file
297 def _run_score_binary(self):
298 _run_binary(
None,
"cross_links_score",
299 [self.receptor, self.ligand, self.transformations_file,
300 self.cross_links_file,
'-o', self.output_file])
304 """Handle all stages of the integrative docking protocol"""
307 _all_scorers = [x[1]
for x
in inspect.getmembers(sys.modules[__name__],
308 lambda x: inspect.isclass(x) \
309 and issubclass(x, Scorer)
and x
is not Scorer)]
311 def parse_args(self):
312 """Parse command line options, and return all applicable Scorers"""
313 data_types = [x.short_name.upper()
for x
in self._all_scorers]
314 data_types =
", ".join(data_types[:-1]) +
" and/or " + data_types[-1]
315 epilog =
"""Note that in order to run this application, you must also
316 have PatchDock and FiberDock (available from
317 http://bioinfo3d.cs.tau.ac.il/FiberDock/) and reduce (available from
318 http://kinemage.biochem.duke.edu/software/reduce.php)
319 installed. See the --patch-dock and --fiber-dock parameters to tell idock
320 where the PatchDock and FiberDock programs are installed. idock expects to
321 find 'reduce' in the system path."""
324 description=
"Integrative pairwise docking "
325 "using %s data" % data_types,
328 choices=[
'EI',
'AA',
'other']
329 parser.add_option(
'--complex_type', metavar=
'TYPE', type=
'choice',
330 dest=
"type", choices=choices, default=
'other',
331 help=
'/'.join(choices) +
'; use this order for '
333 'antibody-antigen, enzyme-inhibitor')
334 parser.add_option(
'--patch_dock', metavar=
'DIR',
335 default=os.environ.get(
'PATCH_DOCK_HOME',
None),
336 help=
'Directory where PatchDock tools are installed. '
337 'If not specified, the value of the '
338 'PATCH_DOCK_HOME environment variable is used '
339 'if set, otherwise the tools are assumed to be '
340 'in the default path.')
341 parser.add_option(
'--fiber_dock', metavar=
'DIR',
342 default=os.environ.get(
'FIBER_DOCK_HOME',
None),
343 help=
'Directory where FiberDock tools are installed. '
344 'If not specified, the value of the '
345 'FIBER_DOCK_HOME environment variable is used '
346 'if set, otherwise the tools are assumed to be '
347 'in the default path.')
348 parser.add_option(
'--prefix', default=
'',
349 help=
'Add prefix string (separated by an underscore) '
350 'to filenames generated by the current run')
351 parser.add_option(
'--precision', type=
'choice', choices=[
'1',
'2',
'3'],
353 help=
'Sampling precision for rigid docking: '
354 '1-normal, 2-medium, 3-high. The higher the '
355 'precision, the higher are the run times')
356 for s
in self._all_scorers:
357 s.add_parser_options(parser)
359 opts, args = parser.parse_args()
361 parser.error(
"incorrect number of arguments")
362 opts.precision = int(opts.precision)
367 self.receptor, self.ligand = args
368 scorers = self.get_scorers(parser)
370 if len(scorers) == 0:
371 parser.error(
"please provide %s experimental data" % data_types)
374 def get_scorers(self, parser):
375 """Get all scorers that are applicable to the command line options"""
377 for s
in self._all_scorers:
378 si = s.check_options(self, parser)
383 def run_patch_dock_binary(self, binary, args):
384 """Run a binary that is part of the PatchDock distribution"""
385 _run_binary(self.opts.patch_dock, binary, args)
387 def run_fiber_dock_binary(self, binary, args):
388 """Run a binary that is part of the FiberDock distribution"""
389 _run_binary(self.opts.fiber_dock, binary, args)
391 def make_patch_dock_surfaces(self):
392 """Make molecular surfaces for PatchDock"""
393 self.run_patch_dock_binary(
'buildMS.pl', [self.receptor, self.ligand])
395 def make_patch_dock_parameters(self):
396 """Make parameter file for PatchDock"""
397 if self.opts.precision == 1:
401 if self.opts.precision == 3:
402 script =
'buildParamsFine.pl'
404 script =
'buildParams.pl'
405 self.run_patch_dock_binary(script, [self.receptor, self.ligand, rmsd,
408 def get_filename(self, fn):
409 """Get a filename, with user-defined prefix if given"""
410 return self.opts.prefix + fn
412 def get_all_scores_filename(self, scorers, prefix, suffix):
413 """Get a filename containing the names of all scores used"""
414 return self.get_filename(prefix \
415 +
'_'.join([x.short_name
for x
in scorers]) + suffix)
417 def do_patch_dock_docking(self):
418 """Do PatchDock docking, using previously generated surfaces
419 and parameter files"""
420 out_file = self.get_filename(
'docking.res')
423 if os.path.exists(out_file)
and _count_lines(out_file) > 36:
424 print "Skipping PatchDock for %s" % self.receptor
426 self.run_patch_dock_binary(
'patch_dock.Linux',
427 [
'params.txt', out_file])
429 def make_transformation_file(self):
430 """Extract transformation image from PatchDock output file"""
431 out_file = self.get_filename(
'docking.res')
432 num_re = re.compile(
'\d')
434 fout = open(
'trans_pd',
'w')
435 for line
in open(out_file):
436 fields = line.split(
'|')
437 if len(fields) > 1
and num_re.search(fields[0]):
439 print >> fout, int(fields[0]), fields[-1].strip(
' \r\n')
440 if self.opts.precision == 1
and num_transforms >= 5000:
442 return num_transforms
444 def run_patch_dock(self):
445 """Run PatchDock on the ligand and receptor"""
446 self.make_patch_dock_surfaces()
447 self.make_patch_dock_parameters()
448 self.do_patch_dock_docking()
449 num_transforms = self.make_transformation_file()
451 if self.opts.type ==
'AA':
452 self.ligand, self.receptor = self.receptor, self.ligand
453 self.opts.saxs_receptor_pdb, self.opts.saxs_ligand_pdb = \
454 self.opts.saxs_ligand_pdb, self.opts.saxs_receptor_pdb
455 return num_transforms
457 def get_filtered_scores(self, scorers):
458 """Get scores that were filtered by an experimental method"""
462 if len(scorers) == 1:
463 out_fh = open(
'trans_for_cluster',
'w')
464 in_fh = open(scorers[0].output_file)
466 spl = line.split(
'|')
467 if '+' in line
and '#' not in line
and len(spl) > 1:
468 out_fh.write(
"%s %s %s" % (spl[0], spl[3], spl[-1]))
470 out_file = self.get_all_scores_filename(scorers,
'combined_',
474 args.extend([s.output_file,
'1.0'])
475 _run_binary(
None,
'combine_scores', args, out_file=out_file)
476 out_fh = open(
'trans_for_cluster',
'w')
477 in_fh = open(out_file)
479 spl = line.split(
'|')
480 if '#' not in line
and len(spl) > 1:
481 out_fh.write(
"%s %s %s" % (spl[0], spl[1], spl[-1]))
483 def get_clustered_transforms(self, scorers):
484 """Cluster transformations with PatchDock"""
485 out_file = self.get_all_scores_filename(scorers,
'clustered_',
'.res')
486 self.run_patch_dock_binary(
'interface_cluster.linux',
487 [self.receptor +
'.ms', self.ligand,
488 'trans_for_cluster',
'4.0', out_file])
491 def get_transforms_for_fiber_dock(self, scorers, transforms_file):
492 """Convert PatchDock-clustered transforms into FiberDock input"""
493 trans_for_fd = self.get_all_scores_filename(scorers,
'trans_for_fd_',
495 transforms_needed = min([s.transforms_needed
for s
in scorers])
496 fh_in = open(transforms_file)
497 fh_out = open(trans_for_fd,
'w')
499 spl = line.split(
'|')
501 print >> fh_out, spl[0] +
" " + spl[4].rstrip(
'\r\n')
502 transforms_needed -= 1
503 if transforms_needed <= 0:
507 def add_hydrogens(self, in_pdb):
508 """Run reduce to add hydrogens to in_pdb; return the name of the new
509 PDB with hydrogens"""
510 out_pdb = in_pdb +
'.HB'
511 _run_binary(
None,
'reduce',
512 [
'-OH',
'-HIS',
'-NOADJ',
'-NOROTMET', in_pdb],
516 def make_fiber_dock_parameters(self, scorers, receptorH, ligandH,
518 params = self.get_all_scores_filename(scorers,
'params_fd_',
'.txt')
519 fd_out = self.get_all_scores_filename(scorers,
'fd_res_',
'')
520 self.run_fiber_dock_binary(
'buildFiberDockParams.pl',
521 [receptorH, ligandH,
'U', 'U', self.opts.type, trans_for_fd,
522 fd_out, '0',
'50',
'0.8',
'0',
'glpk', params])
523 return params, fd_out +
'.ref'
525 def do_fiber_dock_docking(self, trans_for_fd, params, fd_out):
526 if os.path.exists(fd_out) \
527 and _count_fiber_dock_out(fd_out) == _count_lines(trans_for_fd):
528 print "Skipping FiberDock for %s" % self.receptor
531 self.run_fiber_dock_binary(
'FiberDock', [params])
533 def convert_fiber_dock_to_score(self, scorers, fd_out):
534 """Convert FiberDock or FireDock output file to the same format
536 out = self.get_all_scores_filename(scorers,
'fd_res_',
'.res')
539 num_re = re.compile(
'\d')
541 for line
in open(fd_out):
542 spl = line.split(
'|')
543 if len(spl) > 1
and num_re.search(spl[0]):
546 solutions.append((num, float(spl[5]),
547 spl[-1].rstrip(
'\r\n')))
549 solutions.append((num, float(spl[1]), spl[-3]))
550 elif len(spl) > 5
and 'glob' in spl[5]:
555 for solution
in solutions:
558 stddev += (score * score)
559 average /= len(solutions)
560 stddev = math.sqrt(stddev / len(solutions) - (average * average))
564 print >> fh,
" # | Score | filt| ZScore | Transformation"
565 for solution
in solutions:
566 num, score, trans = solution
567 zscore = (score - average) / stddev
568 print >> fh,
" %4d | %5.3f | + | %5.3f | %s" \
569 % (num, score, zscore, trans)
572 def run_fiber_dock(self, scorers, transforms_file):
573 trans_for_fd = self.get_transforms_for_fiber_dock(scorers,
575 receptorH = self.add_hydrogens(self.receptor)
576 ligandH = self.add_hydrogens(self.ligand)
577 params, fd_out = self.make_fiber_dock_parameters(scorers, receptorH,
578 ligandH, trans_for_fd)
579 self.do_fiber_dock_docking(trans_for_fd, params, fd_out)
580 return trans_for_fd, self.convert_fiber_dock_to_score(scorers, fd_out)
582 def combine_final_scores(self, scorers, fd_out):
583 """Combine scores for the final set with those from FireDock"""
584 out_file = self.get_filename(
'combined_final.res')
587 args.extend([s.zscore_output_file,
'1.0'])
588 args.extend([fd_out,
'1.0'])
589 _run_binary(
None,
'combine_scores', args, out_file=out_file)
592 def write_results(self, scorers, comb_final):
593 """Write final results file"""
594 out_fname = self.get_all_scores_filename(scorers,
'results_',
'.txt')
597 for line
in open(comb_final):
598 spl = line.split(
'|')
599 if len(spl) > 1
and 'Score' not in line:
600 solutions.append(spl)
602 solutions.sort(key=
lambda x: float(x[3]))
604 fh = open(out_fname,
'w')
605 print >> fh,
"receptorPdb Str " + self.receptor
606 print >> fh,
"ligandPdb Str " + self.ligand
607 print >> fh,
" # | Score | filt| ZScore |" \
608 +
"|".join([
"%-8s| Zscore " % s.short_name
for s
in scorers]) \
609 +
"| Energy | Zscore | Transformation"
610 for i, solution
in enumerate(solutions):
611 fh.write(
"%6d | " % (i+1) +
"|".join(solution[1:]))
615 """Run the entire protocol"""
616 scorers = self.parse_args()
617 num_transforms = self.run_patch_dock()
618 for scorer
in scorers:
619 scorer.score(num_transforms)
620 self.get_filtered_scores(scorers)
621 transforms_file = self.get_clustered_transforms(scorers)
622 trans_for_fd, fd_out = self.run_fiber_dock(scorers, transforms_file)
623 for scorer
in scorers:
624 scorer.recompute_zscore(trans_for_fd)
625 fh = self.combine_final_scores(scorers, fd_out)
626 self.write_results(scorers, fh)
629 if __name__ ==
"__main__":