1 """@namespace IMP.pmi.mmcif
2 @brief Support for the mmCIF file format.
4 IMP has basic support for writing out files in mmCIF format, for
5 deposition in [PDB-dev](https://pdb-dev.rcsb.rutgers.edu/).
6 mmCIF files are currently generated by creating an
7 IMP.pmi.mmcif.ProtocolOutput class, and attaching it to an
8 IMP.pmi.representation.Representation object, after which any
9 generated models and metadata are collected and output as mmCIF.
12 from __future__
import print_function
34 if sys.version_info[0] >= 3:
39 def _assign_id(obj, seen_objs, obj_by_id):
40 """Assign a unique ID to obj, and track all ids in obj_by_id."""
41 if obj
not in seen_objs:
42 if not hasattr(obj,
'id'):
44 obj.id = len(obj_by_id)
45 seen_objs[obj] = obj.id
47 obj.id = seen_objs[obj]
49 class _LineWriter(object):
50 def __init__(self, writer, line_len=80):
52 self.line_len = line_len
55 if isinstance(val, str)
and '\n' in val:
56 self.writer.fh.write(
"\n;")
57 self.writer.fh.write(val)
58 if not val.endswith(
'\n'):
59 self.writer.fh.write(
"\n")
60 self.writer.fh.write(
";\n")
63 val = self.writer._repr(val)
65 if self.column + len(val) + 1 > self.line_len:
66 self.writer.fh.write(
"\n")
69 self.writer.fh.write(
" ")
71 self.writer.fh.write(val)
72 self.column += len(val)
75 class _CifCategoryWriter(object):
76 def __init__(self, writer, category):
78 self.category = category
79 def write(self, **kwargs):
80 self.writer._write(self.category, kwargs)
83 def __exit__(self, exc_type, exc_value, traceback):
87 class _CifLoopWriter(object):
88 def __init__(self, writer, category, keys):
90 self.category = category
93 self.python_keys = [k.replace(
'[',
'').replace(
']',
'')
for k
in keys]
94 self._empty_loop =
True
95 def write(self, **kwargs):
100 f.write(
"%s.%s\n" % (self.category, k))
101 self._empty_loop =
False
102 l = _LineWriter(self.writer)
103 for k
in self.python_keys:
104 l.write(kwargs.get(k, self.writer.omitted))
105 self.writer.fh.write(
"\n")
108 def __exit__(self, exc_type, exc_value, traceback):
109 if not self._empty_loop:
110 self.writer.fh.write(
"#\n")
113 class _CifWriter(object):
116 _boolmap = {
False:
'NO',
True:
'YES'}
118 def __init__(self, fh):
120 def category(self, category):
121 return _CifCategoryWriter(self, category)
122 def loop(self, category, keys):
123 return _CifLoopWriter(self, category, keys)
124 def write_comment(self, comment):
125 for line
in textwrap.wrap(comment, 78):
126 self.fh.write(
'# ' + line +
'\n')
127 def _write(self, category, kwargs):
129 self.fh.write(
"%s.%s %s\n" % (category, key,
130 self._repr(kwargs[key])))
131 def _repr(self, obj):
132 if isinstance(obj, str)
and '"' not in obj \
133 and "'" not in obj
and " " not in obj:
135 elif isinstance(obj, float):
137 elif isinstance(obj, bool):
138 return self._boolmap[obj]
141 elif isinstance(obj, _long_type):
146 def _get_by_residue(p):
147 """Determine whether the given particle represents a specific residue
148 or a more coarse-grained object."""
152 class _ComponentMapper(object):
153 """Map a Particle to a component name"""
154 def __init__(self, prot):
157 self.name =
'cif-output'
158 self.o.dictionary_pdbs[self.name] = self.prot
159 self.o._init_dictchain(self.name, self.prot)
161 def __getitem__(self, p):
162 protname, is_a_bead = self.o.get_prot_name_from_particle(self.name, p)
166 class _AsymIDMapper(object):
167 """Map a Particle to an asym_id (chain ID)"""
168 def __init__(self, simo, prot):
170 self._cm = _ComponentMapper(prot)
172 def __getitem__(self, p):
173 protname = self._cm[p]
174 return self.simo._get_chain_for_component(protname, self._cm.o)
176 class _Dumper(object):
177 """Base class for helpers to dump output to mmCIF"""
178 def __init__(self, simo):
179 self.simo = weakref.proxy(simo)
184 def finalize_metadata(self):
188 class _EntryDumper(_Dumper):
189 def dump(self, writer):
190 entry_id =
'imp_model'
192 writer.fh.write(
"data_%s\n" % entry_id)
193 with writer.category(
"_entry")
as l:
197 class _SoftwareDumper(_Dumper):
198 def __init__(self, simo):
199 super(_SoftwareDumper, self).__init__(simo)
200 self.modeller_used = self.phyre2_used =
False
203 name=
"Integrative Modeling Platform (IMP)",
204 version=IMP.__version__,
205 classification=
"integrative model building",
206 description=
"integrative model building",
207 url=
'https://integrativemodeling.org'),
209 name=
"IMP PMI module",
210 version=IMP.pmi.__version__,
211 classification=
"integrative model building",
212 description=
"integrative model building",
213 url=
'https://integrativemodeling.org') ]
215 def set_modeller_used(self, version, date):
216 if self.modeller_used:
218 self.modeller_used =
True
220 name=
'MODELLER', classification=
'comparative modeling',
221 description=
'Comparative modeling by satisfaction '
222 'of spatial restraints, build ' + date,
223 url=
'https://salilab.org/modeller/',
226 def set_phyre2_used(self):
229 self.phyre2_used =
True
231 name=
'Phyre2', classification=
'protein homology modeling',
232 description=
'Protein Homology/analogY Recognition '
234 version=
'2.0', url=
'http://www.sbg.bio.ic.ac.uk/~phyre2/'))
236 def dump(self, writer):
238 with writer.loop(
"_software",
239 [
"pdbx_ordinal",
"name",
"classification",
"version",
240 "type",
"location"])
as l:
241 for m
in itertools.chain(self.software, self.simo._metadata):
243 l.write(pdbx_ordinal=ordinal, name=m.name,
244 classification=m.classification, version=m.version,
245 type=m.type, location=m.url)
248 class _AuditAuthorDumper(_Dumper):
249 """Populate the mmCIF audit_author category (a list of the people that
250 authored this mmCIF file; here we assume that's just the authors of
251 any associated publications)"""
252 def dump(self, writer):
253 citations = [m
for m
in self.simo._metadata
256 with writer.loop(
"_audit_author",
257 [
"name",
"pdbx_ordinal"])
as l:
259 for n, c
in enumerate(citations):
261 if a
not in seen_authors:
262 seen_authors[a] =
None
263 l.write(name=a, pdbx_ordinal=ordinal)
267 class _CitationDumper(_Dumper):
268 def dump(self, writer):
269 citations = [m
for m
in self.simo._metadata
271 with writer.loop(
"_citation",
272 [
"id",
"title",
"journal_abbrev",
"journal_volume",
273 "page_first",
"page_last",
"year",
274 "pdbx_database_id_PubMed",
275 "pdbx_database_id_DOI"])
as l:
276 for n, c
in enumerate(citations):
277 if isinstance(c.page_range, (tuple, list)):
278 page_first, page_last = c.page_range
280 page_first = c.page_range
281 page_last = _CifWriter.omitted
282 l.write(id=n+1, title=c.title, journal_abbrev=c.journal,
283 journal_volume=c.volume, page_first=page_first,
284 page_last=page_last, year=c.year,
285 pdbx_database_id_PubMed=c.pmid,
286 pdbx_database_id_DOI=c.doi)
288 with writer.loop(
"_citation_author",
289 [
"citation_id",
"name",
"ordinal"])
as l:
291 for n, c
in enumerate(citations):
293 l.write(citation_id=n+1, name=a, ordinal=ordinal)
296 class _EntityDumper(_Dumper):
299 def dump(self, writer):
300 with writer.loop(
"_entity",
301 [
"id",
"type",
"src_method",
"pdbx_description",
302 "formula_weight",
"pdbx_number_of_molecules",
304 for entity
in self.simo.entities.get_all():
305 l.write(id=entity.id, type=
'polymer', src_method=
'man',
306 pdbx_description=entity.first_component,
307 formula_weight=writer.unknown,
308 pdbx_number_of_molecules=1, details=writer.unknown)
311 class _EntityPolyDumper(_Dumper):
313 def __init__(self, simo):
314 super(_EntityPolyDumper, self).__init__(simo)
317 def dump(self, writer):
318 with writer.loop(
"_entity_poly",
319 [
"entity_id",
"type",
"nstd_linkage",
320 "nstd_monomer",
"pdbx_strand_id",
321 "pdbx_seq_one_letter_code",
322 "pdbx_seq_one_letter_code_can"])
as l:
323 for entity
in self.simo.entities.get_all():
324 seq = entity.sequence
326 seq =
"\n".join(seq[i:i+70]
for i
in range(0, len(seq), 70))
327 name = entity.first_component
328 chain_id = self.simo._get_chain_for_component(name, self.output)
329 l.write(entity_id=entity.id, type=
'polypeptide(L)',
330 nstd_linkage=
'no', nstd_monomer=
'no',
331 pdbx_strand_id=chain_id,
332 pdbx_seq_one_letter_code=seq,
333 pdbx_seq_one_letter_code_can=seq)
336 class _ChemCompDumper(_Dumper):
337 def dump(self, writer):
339 std = dict.fromkeys((
'ALA',
'CYS',
'ASP',
'GLU',
'PHE',
'GLY',
'HIS',
340 'ILE',
'LYS',
'LEU',
'MET',
'ASN',
'PRO',
'GLN',
'ARG',
'SER',
341 'THR',
'VAL',
'TRP',
'TYR'))
342 with writer.loop(
"_chem_comp", [
"id",
"type"])
as l:
343 for entity
in self.simo.entities.get_all():
344 seq = entity.sequence
345 for num, one_letter_code
in enumerate(seq):
347 resid = restyp.get_string()
348 if resid
not in seen:
351 type=
'L-peptide linking' if resid
in std \
354 class _EntityPolySeqDumper(_Dumper):
355 def dump(self, writer):
356 with writer.loop(
"_entity_poly_seq",
357 [
"entity_id",
"num",
"mon_id",
"hetero"])
as l:
358 for entity
in self.simo.entities.get_all():
359 seq = entity.sequence
360 for num, one_letter_code
in enumerate(seq):
362 l.write(entity_id=entity.id, num=num + 1,
363 mon_id=restyp.get_string(),
364 hetero=_CifWriter.omitted)
366 class _StructAsymDumper(_Dumper):
367 def __init__(self, simo):
368 super(_StructAsymDumper, self).__init__(simo)
371 def dump(self, writer):
372 with writer.loop(
"_struct_asym",
373 [
"id",
"entity_id",
"details"])
as l:
374 for comp
in self.simo.all_modeled_components:
375 entity = self.simo.entities[comp]
376 chain_id = self.simo._get_chain_for_component(comp, self.output)
381 class _PDBFragment(object):
382 """Record details about part of a PDB file used as input
385 granularity =
'by-residue'
386 num = _CifWriter.omitted
387 def __init__(self, state, component, start, end, offset, pdbname,
389 self.component, self.start, self.end, self.offset, self.pdbname \
390 = component, start, end, offset, pdbname
391 self.state, self.chain, self.hier = state, chain, hier
396 def combine(self, other):
399 class _BeadsFragment(object):
400 """Record details about beads used to represent part of a component."""
402 granularity =
'by-feature'
404 def __init__(self, state, component, start, end, num, hier):
405 self.state, self.component, self.start, self.end, self.num, self.hier \
406 = state, component, start, end, num, hier
408 def combine(self, other):
410 if type(other) == type(self)
and other.start == self.end + 1:
412 self.num += other.num
415 class _StartingModel(object):
416 """Record details about an input model (e.g. comparative modeling
417 template) used for a component."""
419 source = _CifWriter.unknown
420 db_name = _CifWriter.unknown
421 db_code = _CifWriter.unknown
422 sequence_identity = _CifWriter.unknown
424 def __init__(self, fragment):
425 self.fragments = [fragment]
427 class _ModelRepresentationDumper(_Dumper):
428 def __init__(self, simo):
429 super(_ModelRepresentationDumper, self).__init__(simo)
431 self.fragments = OrderedDict()
434 def add_fragment(self, state, fragment):
435 """Add a model fragment."""
436 comp = fragment.component
437 if comp
not in self.fragments:
438 self.fragments[comp] = OrderedDict()
439 if state
not in self.fragments[comp]:
440 self.fragments[comp][state] = []
441 fragments = self.fragments[comp][state]
442 if len(fragments) == 0
or not fragments[-1].combine(fragment):
443 fragments.append(fragment)
445 def get_model_mode(self, fragment):
446 """Determine the model_mode for a given fragment ('rigid' or
455 def dump(self, writer):
458 with writer.loop(
"_ihm_model_representation",
459 [
"ordinal_id",
"representation_id",
460 "segment_id",
"entity_id",
"entity_description",
462 "seq_id_begin",
"seq_id_end",
463 "model_object_primitive",
"starting_model_id",
464 "model_mode",
"model_granularity",
465 "model_object_count"])
as l:
466 for comp, statefrag
in self.fragments.items():
469 state = list(statefrag.keys())[0]
470 chain_id = self.simo._get_chain_for_component(comp, self.output)
471 for f
in statefrag[state]:
472 entity = self.simo.entities[f.component]
473 starting_model_id = _CifWriter.omitted
474 if hasattr(f,
'pdbname'):
476 = self.starting_model[state, comp, f.pdbname].name
478 l.write(ordinal_id=ordinal_id,
480 segment_id=segment_id,
482 entity_description=entity.description,
483 entity_asym_id=chain_id,
484 seq_id_begin=f.start,
486 model_object_primitive=f.primitive,
487 starting_model_id=starting_model_id,
488 model_mode=self.get_model_mode(f),
489 model_granularity=f.granularity,
490 model_object_count=f.num)
494 class _PDBSource(object):
495 """An experimental PDB file used as part of a starting model"""
496 source =
'experimental model'
498 sequence_identity = 100.0
500 def __init__(self, model, db_code, chain_id, metadata):
501 self.db_code = db_code
502 self.chain_id = chain_id
503 self.metadata = metadata
505 def get_seq_id_range(self, model):
507 return (model.seq_id_begin, model.seq_id_end)
509 class _TemplateSource(object):
510 """A PDB file used as a template for a comparative starting model"""
511 source =
'comparative model'
512 db_name = db_code = _CifWriter.omitted
517 sequence_identity_denominator = 1
519 def __init__(self, tm_code, tm_seq_id_begin, tm_seq_id_end, seq_id_begin,
520 chain_id, seq_id_end, seq_id, model):
522 if len(tm_code) == 5:
523 self._orig_tm_code =
None
524 self.tm_db_code = tm_code[:4].upper()
525 self.tm_chain_id = tm_code[4]
528 self._orig_tm_code = tm_code
529 self.tm_db_code = _CifWriter.omitted
530 self.tm_chain_id = tm_code[-1]
531 self.sequence_identity = seq_id
532 self.tm_seq_id_begin = tm_seq_id_begin
533 self.tm_seq_id_end = tm_seq_id_end
534 self.chain_id = chain_id
535 self._seq_id_begin, self._seq_id_end = seq_id_begin, seq_id_end
537 def get_seq_id_range(self, model):
539 seq_id_begin = max(model.seq_id_begin, self._seq_id_begin)
540 seq_id_end = min(model.seq_id_end, self._seq_id_end)
541 return (seq_id_begin, seq_id_end)
543 class _UnknownSource(object):
544 """Part of a starting model from an unknown source"""
545 db_code = _CifWriter.unknown
546 db_name = _CifWriter.unknown
547 chain_id = _CifWriter.unknown
548 sequence_identity = _CifWriter.unknown
550 _source_map = {
'Comparative model':
'comparative model',
551 'Experimental model':
'experimental model'}
553 def __init__(self, model, chain):
554 self.source = self._source_map[model.dataset._data_type]
555 self.chain_id = chain
557 def get_seq_id_range(self, model):
558 return (model.seq_id_begin, model.seq_id_end)
560 class _DatasetGroup(object):
561 """A group of datasets"""
562 def __init__(self, datasets):
563 self._datasets = list(datasets)
566 """Get final datasets for each restraint and remove duplicates"""
567 final_datasets = OrderedDict()
568 for ds
in self._datasets:
569 if isinstance(ds, _RestraintDataset):
573 final_datasets[d] =
None
574 self._datasets = final_datasets.keys()
577 class _ExternalReference(object):
578 """A single externally-referenced file"""
579 def __init__(self, location, content_type):
580 self.location, self.content_type = location, content_type
583 def __set_id(self, i):
585 id = property(
lambda x: x.location.id, __set_id)
586 file_size = property(
lambda x: x.location.file_size)
588 def __eq__(self, other):
589 return self.location == other.location
591 return hash(self.location)
594 class _ExternalReferenceDumper(_Dumper):
595 """Output information on externally referenced files
596 (i.e. anything that refers to a Location that isn't
597 a DatabaseLocation)."""
599 INPUT_DATA =
"Input data or restraints"
600 MODELING_OUTPUT =
"Modeling or post-processing output"
601 WORKFLOW =
"Modeling workflow or script"
603 class _LocalFiles(object):
604 reference_provider = _CifWriter.omitted
605 reference_type =
'Supplementary Files'
606 reference = _CifWriter.omitted
608 associated_url = _CifWriter.omitted
610 def __init__(self, top_directory):
611 self.top_directory = top_directory
613 def _get_full_path(self, path):
614 return os.path.relpath(path, start=self.top_directory)
616 class _Repository(object):
617 reference_provider = _CifWriter.omitted
618 reference_type =
'DOI'
620 associated_url = _CifWriter.omitted
622 def __init__(self, repo):
624 self.reference = repo.doi
625 if 'zenodo' in self.reference:
626 self.reference_provider =
'Zenodo'
628 self.associated_url = repo.url
629 if repo.url.endswith(
".zip"):
630 self.refers_to =
'Archive'
632 self.refers_to =
'File'
634 def __init__(self, simo):
635 super(_ExternalReferenceDumper, self).__init__(simo)
638 def add(self, location, content_type):
639 """Add a new location.
640 Note that ids are assigned later."""
641 self._refs.append(_ExternalReference(location, content_type))
644 def finalize_metadata(self):
645 """Register locations for any metadata and add the main script"""
647 details=
"The main integrative modeling script")
649 self._workflow = [main_script] \
650 + [m
for m
in self.simo._metadata
652 for w
in self._workflow:
653 self.add(w.location, self.WORKFLOW)
655 def finalize_after_datasets(self):
656 """Note that this must happen *after* DatasetDumper.finalize()"""
659 self._refs = [x
for x
in self._refs
660 if not isinstance(x.location,
667 self._repo_by_id = []
669 self._local_files = self._LocalFiles(self.simo._working_directory)
672 self.simo._update_location(r.location)
674 _assign_id(r, seen_refs, self._ref_by_id)
676 _assign_id(r.location.repo
or self._local_files, seen_repos,
679 def dump(self, writer):
680 self.dump_repos(writer)
681 self.dump_refs(writer)
683 def dump_repos(self, writer):
685 return r
if isinstance(r, self._LocalFiles)
else self._Repository(r)
686 with writer.loop(
"_ihm_external_reference_info",
687 [
"reference_id",
"reference_provider",
688 "reference_type",
"reference",
"refers_to",
689 "associated_url"])
as l:
690 for repo
in [map_repo(r)
for r
in self._repo_by_id]:
691 l.write(reference_id=repo.id,
692 reference_provider=repo.reference_provider,
693 reference_type=repo.reference_type,
694 reference=repo.reference, refers_to=repo.refers_to,
695 associated_url=repo.associated_url)
697 def dump_refs(self, writer):
698 with writer.loop(
"_ihm_external_files",
699 [
"id",
"reference_id",
"file_path",
"content_type",
700 "file_size_bytes",
"details"])
as l:
701 for r
in self._ref_by_id:
703 repo = loc.repo
or self._local_files
704 file_path=self._posix_path(repo._get_full_path(loc.path))
705 if r.file_size
is None:
706 file_size = _CifWriter.omitted
708 file_size = r.file_size
709 l.write(id=loc.id, reference_id=repo.id,
711 content_type=r.content_type,
712 file_size_bytes=file_size,
713 details=loc.details
or _CifWriter.omitted)
717 def _posix_path(self, path):
720 def _posix_path(self, path):
721 return path.replace(os.sep,
'/')
724 class _DatasetDumper(_Dumper):
725 def __init__(self, simo):
726 super(_DatasetDumper, self).__init__(simo)
728 self._datasets_by_state = {}
729 self._dataset_groups = []
731 def get_all_group(self, state):
732 """Get a _DatasetGroup encompassing all datasets so far in this state"""
733 g = _DatasetGroup(self._datasets_by_state.get(state, []))
734 self._dataset_groups.append(g)
737 def add(self, state, dataset):
738 """Add a new dataset.
739 Note that ids are assigned later."""
740 self._datasets.append(dataset)
741 if state
not in self._datasets_by_state:
742 self._datasets_by_state[state] = []
743 self._datasets_by_state[state].append(dataset)
749 self._dataset_by_id = []
750 for d
in self._flatten_dataset(self._datasets):
753 self.simo.extref_dump.add(d.location,
754 _ExternalReferenceDumper.INPUT_DATA)
755 _assign_id(d, seen_datasets, self._dataset_by_id)
759 self._dataset_group_by_id = []
760 for g
in self._dataset_groups:
762 ids = tuple(sorted(d.id
for d
in g._datasets))
763 if ids
not in seen_group_ids:
764 self._dataset_group_by_id.append(g)
765 g.id = len(self._dataset_group_by_id)
766 seen_group_ids[ids] = g
768 g.id = seen_group_ids[ids].id
770 self.simo.extref_dump.finalize_after_datasets()
772 def _flatten_dataset(self, d):
773 if isinstance(d, list):
775 for x
in self._flatten_dataset(p):
777 elif isinstance(d, _RestraintDataset):
778 for x
in self._flatten_dataset(d.dataset):
781 for p
in d._parents.keys():
782 for x
in self._flatten_dataset(p):
786 def dump(self, writer):
787 with writer.loop(
"_ihm_dataset_list",
788 [
"id",
"data_type",
"database_hosted"])
as l:
789 for d
in self._dataset_by_id:
790 l.write(id=d.id, data_type=d._data_type,
791 database_hosted=isinstance(d.location,
793 self.dump_groups(writer)
794 self.dump_other((d
for d
in self._dataset_by_id
795 if not isinstance(d.location,
798 self.dump_rel_dbs((d
for d
in self._dataset_by_id
799 if isinstance(d.location,
802 self.dump_related(writer)
804 def dump_groups(self, writer):
806 with writer.loop(
"_ihm_dataset_group",
807 [
"ordinal_id",
"group_id",
"dataset_list_id"])
as l:
808 for g
in self._dataset_group_by_id:
809 for d
in g._datasets:
810 l.write(ordinal_id=ordinal, group_id=g.id,
811 dataset_list_id=d.id)
814 def dump_related(self, writer):
816 with writer.loop(
"_ihm_related_datasets",
817 [
"ordinal_id",
"dataset_list_id_derived",
818 "dataset_list_id_primary"])
as l:
819 for derived
in self._dataset_by_id:
820 for parent
in sorted(derived._parents.keys(),
822 l.write(ordinal_id=ordinal,
823 dataset_list_id_derived=derived.id,
824 dataset_list_id_primary=parent.id)
827 def dump_rel_dbs(self, datasets, writer):
829 with writer.loop(
"_ihm_dataset_related_db_reference",
830 [
"id",
"dataset_list_id",
"db_name",
831 "accession_code",
"version",
"details"])
as l:
833 l.write(id=ordinal, dataset_list_id=d.id,
834 db_name=d.location.db_name,
835 accession_code=d.location.access_code,
836 version=d.location.version
if d.location.version
837 else _CifWriter.omitted,
838 details=d.location.details
if d.location.details
839 else _CifWriter.omitted)
842 def dump_other(self, datasets, writer):
844 with writer.loop(
"_ihm_dataset_external_reference",
845 [
"id",
"dataset_list_id",
"file_id"])
as l:
847 l.write(id=ordinal, dataset_list_id=d.id, file_id=d.location.id)
851 class _CrossLinkGroup(object):
852 """Group common information for a set of cross links"""
853 def __init__(self, pmi_restraint, rdataset):
854 self.pmi_restraint, self.rdataset = pmi_restraint, rdataset
855 self.label = self.pmi_restraint.label
858 class _ExperimentalCrossLink(object):
859 def __init__(self, r1, c1, r2, c2, length, group):
860 self.r1, self.c1, self.r2, self.c2 = r1, c1, r2, c2
861 self.length, self.group = length, group
863 class _CrossLink(object):
864 def __init__(self, state, ex_xl, p1, p2, sigma1, sigma2, psi):
866 self.ex_xl, self.sigma1, self.sigma2 = ex_xl, sigma1, sigma2
867 self.p1, self.p2 = p1, p2
870 def get_asym_mapper_for_state(simo, state, asym_map):
871 asym = asym_map.get(state,
None)
873 asym = _AsymIDMapper(simo, state.prot)
874 asym_map[state] = asym
877 class _CrossLinkDumper(_Dumper):
878 def __init__(self, simo):
879 super(_CrossLinkDumper, self).__init__(simo)
880 self.cross_links = []
881 self.exp_cross_links = []
883 def add_experimental(self, cross_link):
884 self.exp_cross_links.append(cross_link)
886 def add(self, cross_link):
887 self.cross_links.append(cross_link)
889 def dump(self, writer):
890 self.dump_list(writer)
891 self.dump_restraint(writer)
892 self.dump_results(writer)
894 def dump_list(self, writer):
895 seen_cross_links = {}
896 with writer.loop(
"_ihm_cross_link_list",
897 [
"id",
"group_id",
"entity_description_1",
898 "entity_id_1",
"seq_id_1",
"comp_id_1",
899 "entity_description_2",
900 "entity_id_2",
"seq_id_2",
"comp_id_2",
"type",
901 "dataset_list_id"])
as l:
903 for xl
in self.exp_cross_links:
905 sig = (xl.c1, xl.c2, xl.r1, xl.r2, xl.group.label)
906 if sig
in seen_cross_links:
907 xl.id = seen_cross_links[sig]
909 entity1 = self.simo.entities[xl.c1]
910 entity2 = self.simo.entities[xl.c2]
911 seq1 = entity1.sequence
912 seq2 = entity2.sequence
917 seen_cross_links[sig] = xl_id
919 l.write(id=xl.id, group_id=xl.id,
920 entity_description_1=entity1.description,
921 entity_id_1=entity1.id,
923 comp_id_1=rt1.get_string(),
924 entity_description_2=entity2.description,
925 entity_id_2=entity2.id,
927 comp_id_2=rt2.get_string(),
929 dataset_list_id=xl.group.rdataset.dataset.id)
931 def _granularity(self, xl):
932 """Determine the granularity of a cross link"""
933 if _get_by_residue(xl.p1)
and _get_by_residue(xl.p2):
938 def dump_restraint(self, writer):
939 seen_cross_links = {}
941 with writer.loop(
"_ihm_cross_link_restraint",
942 [
"id",
"group_id",
"entity_id_1",
"asym_id_1",
943 "seq_id_1",
"comp_id_1",
944 "entity_id_2",
"asym_id_2",
"seq_id_2",
"comp_id_2",
945 "type",
"conditional_crosslink_flag",
946 "model_granularity",
"distance_threshold",
947 "psi",
"sigma_1",
"sigma_2"])
as l:
949 for xl
in self.cross_links:
950 asym = get_asym_mapper_for_state(self.simo, xl.state,
952 entity1 = self.simo.entities[xl.ex_xl.c1]
953 entity2 = self.simo.entities[xl.ex_xl.c2]
954 seq1 = entity1.sequence
955 seq2 = entity2.sequence
961 sig = (asym1, xl.ex_xl.r1, asym2, xl.ex_xl.r2,
962 xl.ex_xl.group.label)
963 if sig
in seen_cross_links:
964 xl.id = seen_cross_links[sig]
967 seen_cross_links[sig] = xl_id
970 group_id=xl.ex_xl.id,
971 entity_id_1=entity1.id,
973 seq_id_1=xl.ex_xl.r1,
974 comp_id_1=rt1.get_string(),
975 entity_id_2=entity2.id,
977 seq_id_2=xl.ex_xl.r2,
978 comp_id_2=rt2.get_string(),
979 type=xl.ex_xl.group.label,
981 conditional_crosslink_flag=
"ALL",
982 model_granularity=self._granularity(xl),
983 distance_threshold=xl.ex_xl.length,
984 psi=xl.psi.get_scale(),
985 sigma_1=xl.sigma1.get_scale(),
986 sigma_2=xl.sigma2.get_scale())
988 def _set_psi_sigma(self, model, g):
989 for resolution
in g.pmi_restraint.sigma_dictionary:
990 statname =
'ISDCrossLinkMS_Sigma_%s_%s' % (resolution, g.label)
991 if model.stats
and statname
in model.stats:
992 sigma = float(model.stats[statname])
993 p = g.pmi_restraint.sigma_dictionary[resolution][0]
995 for psiindex
in g.pmi_restraint.psi_dictionary:
996 statname =
'ISDCrossLinkMS_Psi_%s_%s' % (psiindex, g.label)
997 if model.stats
and statname
in model.stats:
998 psi = float(model.stats[statname])
999 p = g.pmi_restraint.psi_dictionary[psiindex][0]
1002 def dump_results(self, writer):
1004 for xl
in self.cross_links:
1005 all_groups[xl.ex_xl.group] =
None
1007 with writer.loop(
"_ihm_cross_link_result_parameters",
1008 [
"ordinal_id",
"restraint_id",
"model_id",
1009 "psi",
"sigma_1",
"sigma_2"])
as l:
1010 for model
in self.models:
1011 for g
in all_groups.keys():
1012 self._set_psi_sigma(model, g)
1013 for xl
in self.cross_links:
1015 l.write(ordinal_id=ordinal, restraint_id=xl.id,
1016 model_id=model.id, psi=xl.psi.get_scale(),
1017 sigma_1=xl.sigma1.get_scale(),
1018 sigma_2=xl.sigma2.get_scale())
1021 class _EM2DRestraint(object):
1022 def __init__(self, state, rdataset, pmi_restraint, image_number, resolution,
1023 pixel_size, image_resolution, projection_number):
1025 self.pmi_restraint, self.image_number = pmi_restraint, image_number
1026 self.rdataset, self.resolution = rdataset, resolution
1027 self.pixel_size, self.image_resolution = pixel_size, image_resolution
1028 self.projection_number = projection_number
1030 def get_transformation(self, model):
1031 """Get the transformation that places the model on the image"""
1032 prefix =
'ElectronMicroscopy2D_%s_Image%d' % (self.pmi_restraint.label,
1033 self.image_number + 1)
1034 r = [float(model.stats[prefix +
'_Rotation%d' % i])
for i
in range(4)]
1035 t = [float(model.stats[prefix +
'_Translation%d' % i])
1039 inv = model.transform.get_inverse()
1042 def get_cross_correlation(self, model):
1043 """Get the cross correlation coefficient between the model projection
1045 return float(model.stats[
'ElectronMicroscopy2D_%s_Image%d_CCC'
1046 % (self.pmi_restraint.label,
1047 self.image_number + 1)])
1049 def get_num_raw_micrographs(self):
1050 """Return the number of raw micrographs used, if known.
1051 This is extracted from the EMMicrographsDataset if any."""
1052 for d
in self.rdataset.dataset._parents.keys():
1056 class _EM2DDumper(_Dumper):
1057 def __init__(self, simo):
1058 super(_EM2DDumper, self).__init__(simo)
1059 self.restraints = []
1062 self.restraints.append(rsr)
1063 rsr.id = len(self.restraints)
1065 def dump(self, writer):
1066 self.dump_restraints(writer)
1067 self.dump_fitting(writer)
1069 def dump_restraints(self, writer):
1070 with writer.loop(
"_ihm_2dem_class_average_restraint",
1071 [
"id",
"dataset_list_id",
"number_raw_micrographs",
1072 "pixel_size_width",
"pixel_size_height",
1073 "image_resolution",
"image_segment_flag",
1074 "number_of_projections",
"struct_assembly_id",
1076 for r
in self.restraints:
1077 unk = _CifWriter.unknown
1078 num_raw = r.get_num_raw_micrographs()
1079 l.write(id=r.id, dataset_list_id=r.rdataset.dataset.id,
1080 number_raw_micrographs=num_raw
if num_raw
else unk,
1081 pixel_size_width=r.pixel_size,
1082 pixel_size_height=r.pixel_size,
1083 image_resolution=r.image_resolution,
1084 number_of_projections=r.projection_number,
1087 struct_assembly_id=r.state.modeled_assembly.id,
1088 image_segment_flag=
False)
1090 def dump_fitting(self, writer):
1092 with writer.loop(
"_ihm_2dem_class_average_fitting",
1093 [
"ordinal_id",
"restraint_id",
"model_id",
1094 "cross_correlation_coefficient",
"rot_matrix[1][1]",
1095 "rot_matrix[2][1]",
"rot_matrix[3][1]",
"rot_matrix[1][2]",
1096 "rot_matrix[2][2]",
"rot_matrix[3][2]",
"rot_matrix[1][3]",
1097 "rot_matrix[2][3]",
"rot_matrix[3][3]",
"tr_vector[1]",
1098 "tr_vector[2]",
"tr_vector[3]"])
as l:
1099 for m
in self.models:
1100 for r
in self.restraints:
1101 trans = r.get_transformation(m)
1102 rot = trans.get_rotation()
1105 rm = [[
"%.6f" % e
for e
in rot.get_rotation_matrix_row(i)]
1107 t = trans.get_translation()
1108 ccc = r.get_cross_correlation(m)
1109 l.write(ordinal_id=ordinal, restraint_id=r.id,
1110 model_id=m.id, cross_correlation_coefficient=ccc,
1111 rot_matrix11=rm[0][0], rot_matrix21=rm[1][0],
1112 rot_matrix31=rm[2][0], rot_matrix12=rm[0][1],
1113 rot_matrix22=rm[1][1], rot_matrix32=rm[2][1],
1114 rot_matrix13=rm[0][2], rot_matrix23=rm[1][2],
1115 rot_matrix33=rm[2][2], tr_vector1=t[0],
1116 tr_vector2=t[1], tr_vector3=t[2])
1119 class _EM3DRestraint(object):
1120 fitting_method =
'Gaussian mixture models'
1122 def __init__(self, simo, state, rdataset, pmi_restraint, target_ps,
1124 self.pmi_restraint = pmi_restraint
1125 self.rdataset = rdataset
1126 self.number_of_gaussians = len(target_ps)
1127 self.assembly = self.get_assembly(densities, simo, state)
1129 def get_assembly(self, densities, simo, state):
1130 """Get the Assembly that this restraint acts on"""
1131 cm = _ComponentMapper(state.prot)
1134 components[cm[d]] =
None
1135 return simo.assembly_dump.get_subassembly(components)
1137 def get_cross_correlation(self, model):
1138 """Get the cross correlation coefficient between the model
1140 return float(model.stats[
'GaussianEMRestraint_%s_CCC'
1141 % self.pmi_restraint.label])
1144 class _EM3DDumper(_Dumper):
1145 def __init__(self, simo):
1146 super(_EM3DDumper, self).__init__(simo)
1147 self.restraints = []
1150 self.restraints.append(rsr)
1152 def dump(self, writer):
1155 with writer.loop(
"_ihm_3dem_restraint",
1156 [
"ordinal_id",
"dataset_list_id",
"fitting_method",
1157 "struct_assembly_id",
1158 "number_of_gaussians",
"model_id",
1159 "cross_correlation_coefficient"])
as l:
1160 for model
in self.models:
1161 for r
in self.restraints:
1162 ccc = r.get_cross_correlation(model)
1163 l.write(ordinal_id=ordinal,
1164 dataset_list_id=r.rdataset.dataset.id,
1165 fitting_method=r.fitting_method,
1166 struct_assembly_id=r.assembly.id,
1167 number_of_gaussians=r.number_of_gaussians,
1169 cross_correlation_coefficient=ccc)
1172 class _Assembly(list):
1173 """A collection of components. Currently simply implemented as a list of
1174 the component names. These must be in creation order."""
1178 return hash(tuple(self))
1180 class _AssemblyDumper(_Dumper):
1181 def __init__(self, simo):
1182 super(_AssemblyDumper, self).__init__(simo)
1183 self.assemblies = []
1187 """Add a new assembly. The first such assembly is assumed to contain
1188 all components. Duplicate assemblies will be pruned at the end."""
1189 self.assemblies.append(a)
1192 def get_subassembly(self, compdict):
1193 """Get an _Assembly consisting of the given components."""
1195 newa = _Assembly(c
for c
in self.assemblies[0]
if c
in compdict)
1196 return self.add(newa)
1199 seen_assemblies = {}
1201 self._assembly_by_id = []
1202 for a
in self.assemblies:
1203 _assign_id(a, seen_assemblies, self._assembly_by_id)
1205 def dump(self, writer):
1207 with writer.loop(
"_ihm_struct_assembly",
1208 [
"ordinal_id",
"assembly_id",
"entity_description",
1209 "entity_id",
"asym_id",
"seq_id_begin",
1210 "seq_id_end"])
as l:
1211 for a
in self._assembly_by_id:
1213 entity = self.simo.entities[comp]
1214 seq = self.simo.sequence_dict[comp]
1215 chain_id = self.simo._get_chain_for_component(comp,
1217 l.write(ordinal_id=ordinal, assembly_id=a.id,
1218 entity_description=entity.description,
1219 entity_id=entity.id,
1222 seq_id_end=len(seq))
1226 class _Protocol(list):
1227 """A modeling protocol. This can consist of multiple _ProtocolSteps."""
1230 class _ProtocolStep(object):
1231 """A single step in a _Protocol."""
1234 class _ReplicaExchangeProtocolStep(_ProtocolStep):
1235 def __init__(self, state, rex):
1237 self.modeled_assembly = state.modeled_assembly
1238 self.name =
'Sampling'
1239 if rex.monte_carlo_sample_objects
is not None:
1240 self.method =
'Replica exchange monte carlo'
1242 self.method =
'Replica exchange molecular dynamics'
1243 self.num_models_end = rex.vars[
"number_of_frames"]
1245 class _ModelGroup(object):
1246 """Group sets of models"""
1247 def __init__(self, state, name):
1251 class _Model(object):
1252 def __init__(self, prot, simo, protocol, assembly, group):
1260 self.protocol = protocol
1261 self.assembly = assembly
1265 o.dictionary_pdbs[name] = prot
1266 o._init_dictchain(name, prot)
1267 (particle_infos_for_pdb,
1268 self.geometric_center) = o.get_particle_infos_for_pdb_writing(name)
1270 self.entity_for_chain = {}
1271 self.comp_for_chain = {}
1272 self.correct_chain_id = {}
1273 for protname, chain_id
in o.dictchain[name].items():
1274 self.entity_for_chain[chain_id] = simo.entities[protname]
1275 self.comp_for_chain[chain_id] = protname
1279 self.correct_chain_id[chain_id] = \
1280 simo._get_chain_for_component(protname, o)
1281 self.spheres = [t
for t
in particle_infos_for_pdb
if t[1]
is None]
1282 self.atoms = [t
for t
in particle_infos_for_pdb
if t[1]
is not None]
1284 self.name = _CifWriter.omitted
1286 def parse_rmsf_file(self, fname, component):
1287 self.rmsf[component] = rmsf = {}
1288 with open(fname)
as fh:
1290 resnum, blocknum, val = line.split()
1291 rmsf[int(resnum)] = (int(blocknum), float(val))
1293 def get_rmsf(self, component, indexes):
1294 """Get the RMSF value for the given residue indexes."""
1296 return _CifWriter.omitted
1297 rmsf = self.rmsf[component]
1298 blocknums = dict.fromkeys(rmsf[ind][0]
for ind
in indexes)
1299 if len(blocknums) != 1:
1300 raise ValueError(
"Residue indexes %s aren't all in the same block"
1302 return rmsf[indexes[0]][1]
1304 class _ModelDumper(_Dumper):
1305 def __init__(self, simo):
1306 super(_ModelDumper, self).__init__(simo)
1309 def add(self, prot, protocol, assembly, group):
1310 m = _Model(prot, self.simo, protocol, assembly, group)
1311 self.models.append(m)
1312 m.id = len(self.models)
1315 def dump(self, writer):
1316 self.dump_model_list(writer)
1317 num_atoms = sum(len(m.atoms)
for m
in self.models)
1318 num_spheres = sum(len(m.spheres)
for m
in self.models)
1319 self.dump_atoms(writer)
1320 self.dump_spheres(writer)
1322 def dump_model_list(self, writer):
1324 with writer.loop(
"_ihm_model_list",
1325 [
"ordinal_id",
"model_id",
"model_group_id",
1326 "model_name",
"model_group_name",
"assembly_id",
1327 "protocol_id"])
as l:
1328 for model
in self.models:
1329 state = model.group.state
1330 group_name = state.get_prefixed_name(model.group.name)
1331 l.write(ordinal_id=ordinal, model_id=model.id,
1332 model_group_id=model.group.id,
1333 model_name=model.name,
1334 model_group_name=group_name,
1335 assembly_id=model.assembly.id,
1336 protocol_id=model.protocol.id)
1339 def dump_atoms(self, writer):
1341 with writer.loop(
"_atom_site",
1342 [
"id",
"label_atom_id",
"label_comp_id",
1344 "label_asym_id",
"Cartn_x",
1345 "Cartn_y",
"Cartn_z",
"label_entity_id",
1347 for model
in self.models:
1348 for atom
in model.atoms:
1349 (xyz, atom_type, residue_type, chain_id, residue_index,
1350 all_indexes, radius) = atom
1351 pt = model.transform * xyz
1352 l.write(id=ordinal, label_atom_id=atom_type.get_string(),
1353 label_comp_id=residue_type.get_string(),
1354 label_asym_id=model.correct_chain_id[chain_id],
1355 label_entity_id=model.entity_for_chain[chain_id].id,
1356 label_seq_id=residue_index,
1357 Cartn_x=pt[0], Cartn_y=pt[1], Cartn_z=pt[2],
1361 def dump_spheres(self, writer):
1363 with writer.loop(
"_ihm_sphere_obj_site",
1364 [
"ordinal_id",
"entity_id",
"seq_id_begin",
1365 "seq_id_end",
"asym_id",
"Cartn_x",
1366 "Cartn_y",
"Cartn_z",
"object_radius",
"rmsf",
1368 for model
in self.models:
1369 for sphere
in model.spheres:
1370 (xyz, atom_type, residue_type, chain_id, residue_index,
1371 all_indexes, radius) = sphere
1372 if all_indexes
is None:
1373 all_indexes = (residue_index,)
1374 pt = model.transform * xyz
1375 l.write(ordinal_id=ordinal,
1376 entity_id=model.entity_for_chain[chain_id].id,
1377 seq_id_begin = all_indexes[0],
1378 seq_id_end = all_indexes[-1],
1379 asym_id=model.correct_chain_id[chain_id],
1380 Cartn_x=pt[0], Cartn_y=pt[1], Cartn_z=pt[2],
1381 object_radius=radius,
1382 rmsf=model.get_rmsf(model.comp_for_chain[chain_id],
1388 class _ModelProtocolDumper(_Dumper):
1389 def __init__(self, simo):
1390 super(_ModelProtocolDumper, self).__init__(simo)
1392 self.protocols = OrderedDict()
1394 def add(self, step):
1396 if state
not in self.protocols:
1398 self.protocols[state] = _Protocol()
1399 self.protocols[state].id = len(self.protocols)
1400 step.num_models_begin = 0
1402 step.num_models_begin = self.protocols[state][-1].num_models_end
1403 self.protocols[state].append(step)
1405 step.id = len(self.protocols[state])
1407 step.dataset_group = self.simo.dataset_dump.get_all_group(state)
1409 def get_last_protocol(self, state):
1410 """Return the most recently-added _Protocol"""
1412 return self.protocols[state]
1414 def dump(self, writer):
1416 with writer.loop(
"_ihm_modeling_protocol",
1417 [
"ordinal_id",
"protocol_id",
"step_id",
1418 "struct_assembly_id",
"dataset_group_id",
1419 "struct_assembly_description",
"protocol_name",
1420 "step_name",
"step_method",
"num_models_begin",
1421 "num_models_end",
"multi_scale_flag",
1422 "multi_state_flag",
"time_ordered_flag"])
as l:
1423 for p
in self.protocols.values():
1425 l.write(ordinal_id=ordinal, protocol_id=p.id,
1426 step_id=step.id, step_method=step.method,
1427 step_name=step.name,
1428 struct_assembly_id=step.modeled_assembly.id,
1429 dataset_group_id=step.dataset_group.id,
1430 num_models_begin=step.num_models_begin,
1431 num_models_end=step.num_models_end,
1433 multi_state_flag=
False, time_ordered_flag=
False,
1435 multi_scale_flag=
True)
1439 class _PDBHelix(object):
1440 """Represent a HELIX record from a PDB file."""
1441 def __init__(self, line):
1442 self.helix_id = line[11:14].strip()
1443 self.start_resnam = line[14:18].strip()
1444 self.start_asym = line[19]
1445 self.start_resnum = int(line[21:25])
1446 self.end_resnam = line[27:30].strip()
1447 self.end_asym = line[31]
1448 self.end_resnum = int(line[33:37])
1449 self.helix_class = int(line[38:40])
1450 self.length = int(line[71:76])
1453 class _MSESeqDif(object):
1454 """Track an MSE -> MET mutation in the starting model sequence"""
1457 details =
'Conversion of modified residue MSE to MET'
1458 def __init__(self, res, component, source, model, offset):
1459 self.res, self.component, self.source = res, component, source
1461 self.offset = offset
1464 class _StartingModelDumper(_Dumper):
1465 def __init__(self, simo):
1466 super(_StartingModelDumper, self).__init__(simo)
1469 self.models = OrderedDict()
1471 self.starting_model = {}
1474 def add_pdb_fragment(self, fragment):
1475 """Add a starting model PDB fragment."""
1476 comp = fragment.component
1477 state = fragment.state
1478 if comp
not in self.models:
1479 self.models[comp] = OrderedDict()
1480 if state
not in self.models[comp]:
1481 self.models[comp][state] = []
1482 models = self.models[comp][state]
1483 if len(models) == 0 \
1484 or models[-1].fragments[0].pdbname != fragment.pdbname:
1485 model = _StartingModel(fragment)
1486 models.append(model)
1487 model.sources = self.get_sources(model, fragment.pdbname,
1490 models[-1].fragments.append(fragment)
1492 def get_templates(self, pdbname, model):
1493 template_path_map = {}
1496 alnfilere = re.compile(
'REMARK 6 ALIGNMENT: (\S+)')
1497 tmppathre = re.compile(
'REMARK 6 TEMPLATE PATH (\S+) (\S+)')
1498 tmpre = re.compile(
'REMARK 6 TEMPLATE: '
1499 '(\S+) (\S+):\S+ \- (\S+):\S+ '
1500 'MODELS (\S+):(\S+) \- (\S+):\S+ AT (\S+)%')
1502 with open(pdbname)
as fh:
1504 if line.startswith(
'ATOM'):
1506 m = tmppathre.match(line)
1508 template_path_map[m.group(1)] = \
1510 m = alnfilere.match(line)
1514 m = tmpre.match(line)
1516 templates.append(_TemplateSource(m.group(1),
1526 fname = template_path_map[t._orig_tm_code]
1528 details=
"Template for comparative modeling")
1532 d = self.simo._add_dataset(d)
1534 model.dataset.add_parent(d)
1537 return(sorted(templates,
1538 key=
lambda x: (x._seq_id_begin, x._seq_id_end)),
1541 def _parse_pdb(self, fh, first_line):
1542 """Extract information from an official PDB"""
1546 if line.startswith(
'TITLE'):
1547 details += line[10:].rstrip()
1548 elif line.startswith(
'HELIX'):
1549 metadata.append(_PDBHelix(line))
1550 return (first_line[50:59].strip(),
1551 details
if details
else _CifWriter.unknown, metadata)
1553 def _parse_details(self, fh):
1554 """Extract TITLE records from a PDB file"""
1557 if line.startswith(
'TITLE'):
1558 details += line[10:].rstrip()
1559 elif line.startswith(
'ATOM'):
1563 def get_sources(self, model, pdbname, chain):
1566 first_line = fh.readline()
1568 details=
"Starting model structure")
1569 file_dataset = self.simo.get_file_dataset(pdbname)
1570 if first_line.startswith(
'HEADER'):
1571 version, details, metadata = self._parse_pdb(fh, first_line)
1572 source = _PDBSource(model, first_line[62:66].strip(), chain,
1576 model.dataset = self.simo._add_dataset(file_dataset
or d)
1578 elif first_line.startswith(
'EXPDTA DERIVED FROM PDB:'):
1581 local_file.details = self._parse_details(fh)
1582 db_code = first_line[27:].strip()
1586 d.add_parent(parent)
1587 model.dataset = self.simo._add_dataset(file_dataset
or d)
1588 return [_UnknownSource(model, chain)]
1589 elif first_line.startswith(
'EXPDTA DERIVED FROM COMPARATIVE '
1593 local_file.details = self._parse_details(fh)
1598 details=
"Starting comparative model structure")
1600 d.add_parent(parent)
1601 model.dataset = self.simo._add_dataset(file_dataset
or d)
1602 return [_UnknownSource(model, chain)]
1603 elif first_line.startswith(
'EXPDTA THEORETICAL MODEL, MODELLER'):
1604 self.simo.software_dump.set_modeller_used(
1605 *first_line[38:].split(
' ', 1))
1606 return self.handle_comparative_model(local_file, file_dataset,
1607 pdbname, model, chain)
1608 elif first_line.startswith(
'REMARK 99 Chain ID :'):
1610 self.simo.software_dump.set_phyre2_used()
1611 return self.handle_comparative_model(local_file, file_dataset,
1612 pdbname, model, chain)
1617 return self.handle_comparative_model(local_file, file_dataset,
1618 pdbname, model, chain)
1620 def handle_comparative_model(self, local_file, file_dataset, pdbname,
1623 model.dataset = self.simo._add_dataset(file_dataset
or d)
1624 templates, alnfile = self.get_templates(pdbname, model)
1627 details=
"Alignment for starting "
1628 "comparative model")
1629 self.simo.extref_dump.add(model.alignment_file,
1630 _ExternalReferenceDumper.INPUT_DATA)
1635 return [_UnknownSource(model, chain)]
1637 def assign_model_details(self):
1638 for comp, states
in self.models.items():
1640 for state
in states:
1641 for model
in states[state]:
1643 model.name =
"%s-m%d" % (comp, model_id)
1644 self.starting_model[state, comp,
1645 model.fragments[0].pdbname] = model
1646 model.seq_id_begin = min(x.start + x.offset
1647 for x
in model.fragments)
1648 model.seq_id_end = max(x.end + x.offset
1649 for x
in model.fragments)
1651 def all_models(self):
1652 for comp, states
in self.models.items():
1655 first_state = list(states.keys())[0]
1656 for model
in states[first_state]:
1660 self.assign_model_details()
1662 def dump(self, writer):
1663 self.dump_details(writer)
1664 self.dump_comparative(writer)
1665 seq_dif = self.dump_coords(writer)
1666 self.dump_seq_dif(writer, seq_dif)
1668 def dump_seq_dif(self, writer, seq_dif):
1670 with writer.loop(
"_ihm_starting_model_seq_dif",
1671 [
"ordinal_id",
"entity_id",
"asym_id",
1672 "seq_id",
"comp_id",
"starting_model_id",
1673 "db_asym_id",
"db_seq_id",
"db_comp_id",
1676 chain_id = self.simo._get_chain_for_component(
1677 sd.component, self.output)
1678 entity = self.simo.entities[sd.component]
1679 l.write(ordinal_id=ordinal, entity_id=entity.id,
1680 asym_id=chain_id, seq_id=sd.res.get_index(),
1682 db_asym_id=sd.source.chain_id,
1683 db_seq_id=sd.res.get_index() - sd.offset,
1684 db_comp_id=sd.db_comp_id,
1685 starting_model_id=sd.model.name,
1689 def dump_comparative(self, writer):
1690 """Dump details on comparative models. Must be called after
1691 dump_details() since it uses IDs assigned there."""
1692 with writer.loop(
"_ihm_starting_comparative_models",
1693 [
"ordinal_id",
"starting_model_id",
1694 "starting_model_auth_asym_id",
1695 "starting_model_seq_id_begin",
1696 "starting_model_seq_id_end",
1697 "template_auth_asym_id",
"template_seq_id_begin",
1698 "template_seq_id_end",
"template_sequence_identity",
1699 "template_sequence_identity_denominator",
1700 "template_dataset_list_id",
1701 "alignment_file_id"])
as l:
1703 for model
in self.all_models():
1704 for template
in [s
for s
in model.sources
1705 if isinstance(s, _TemplateSource)]:
1706 seq_id_begin, seq_id_end = template.get_seq_id_range(model)
1707 denom = template.sequence_identity_denominator
1708 l.write(ordinal_id=ordinal,
1709 starting_model_id=model.name,
1710 starting_model_auth_asym_id=template.chain_id,
1711 starting_model_seq_id_begin=seq_id_begin,
1712 starting_model_seq_id_end=seq_id_end,
1713 template_auth_asym_id=template.tm_chain_id,
1714 template_seq_id_begin=template.tm_seq_id_begin,
1715 template_seq_id_end=template.tm_seq_id_end,
1716 template_sequence_identity=template.sequence_identity,
1717 template_sequence_identity_denominator=denom,
1718 template_dataset_list_id=template.tm_dataset.id
1719 if template.tm_dataset
1720 else _CifWriter.unknown,
1721 alignment_file_id=model.alignment_file.id
1722 if hasattr(model,
'alignment_file')
1723 else _CifWriter.unknown)
1726 def dump_details(self, writer):
1727 writer.write_comment(
"""IMP will attempt to identify which input models
1728 are crystal structures and which are comparative models, but does not always
1729 have sufficient information to deduce all of the templates used for comparative
1730 modeling. These may need to be added manually below.""")
1731 with writer.loop(
"_ihm_starting_model_details",
1732 [
"starting_model_id",
"entity_id",
"entity_description",
1733 "asym_id",
"seq_id_begin",
1734 "seq_id_end",
"starting_model_source",
1735 "starting_model_auth_asym_id",
1736 "starting_model_sequence_offset",
1737 "dataset_list_id"])
as l:
1738 for model
in self.all_models():
1739 f = model.fragments[0]
1740 entity = self.simo.entities[f.component]
1741 chain_id = self.simo._get_chain_for_component(f.component,
1743 source0 = model.sources[0]
1747 seq_id_begin, seq_id_end = source0.get_seq_id_range(model)
1748 for source
in model.sources[1:]:
1749 this_begin, this_end = source.get_seq_id_range(model)
1750 seq_id_begin = min(seq_id_begin, this_begin)
1751 seq_id_end = max(seq_id_end, this_end)
1752 l.write(entity_id=entity.id,
1753 entity_description=entity.description,
1755 seq_id_begin=seq_id_begin,
1756 seq_id_end=seq_id_end,
1757 starting_model_auth_asym_id=source0.chain_id,
1758 starting_model_id=model.name,
1759 starting_model_source=source0.source,
1760 starting_model_sequence_offset=f.offset,
1761 dataset_list_id=model.dataset.id)
1763 def dump_coords(self, writer):
1766 with writer.loop(
"_ihm_starting_model_coord",
1767 [
"starting_model_id",
"group_PDB",
"id",
"type_symbol",
1768 "atom_id",
"comp_id",
"entity_id",
"asym_id",
1769 "seq_id",
"Cartn_x",
1770 "Cartn_y",
"Cartn_z",
"B_iso_or_equiv",
1771 "ordinal_id"])
as l:
1772 for model
in self.all_models():
1773 for f
in model.fragments:
1775 residue_indexes=list(range(f.start, f.end + 1)))
1776 last_res_index =
None
1777 for a
in sel.get_selected_particles():
1780 element = atom.get_element()
1782 atom_name = atom.get_atom_type().get_string()
1784 if atom_name.startswith(
'HET:'):
1785 group_pdb =
'HETATM'
1786 atom_name = atom_name[4:]
1788 res_name = res.get_residue_type().get_string()
1792 if res_name ==
'MSE':
1795 ind = res.get_index()
1796 if ind != last_res_index:
1797 last_res_index = ind
1802 assert(len(model.sources) == 1)
1803 seq_dif.append(_MSESeqDif(res, f.component,
1806 chain_id = self.simo._get_chain_for_component(
1807 f.component, self.output)
1808 entity = self.simo.entities[f.component]
1809 l.write(starting_model_id=model.name,
1810 group_PDB=group_pdb,
1811 id=atom.get_input_index(), type_symbol=element,
1812 atom_id=atom_name, comp_id=res_name,
1813 entity_id=entity.id,
1815 seq_id=res.get_index(), Cartn_x=coord[0],
1816 Cartn_y=coord[1], Cartn_z=coord[2],
1817 B_iso_or_equiv=atom.get_temperature_factor(),
1822 class _StructConfDumper(_Dumper):
1823 def all_rigid_fragments(self):
1824 """Yield all rigid model representation fragments"""
1826 model_repr = self.simo.model_repr_dump
1827 for comp, statefrag
in model_repr.fragments.items():
1830 state = list(statefrag.keys())[0]
1831 for f
in statefrag[state]:
1832 if hasattr(f,
'pdbname') \
1833 and model_repr.get_model_mode(f) ==
'rigid':
1834 asym = get_asym_mapper_for_state(self.simo, f.state,
1836 yield (f, model_repr.starting_model[state, comp, f.pdbname],
1839 def all_helices(self):
1840 """Yield all helices that overlap with rigid model fragments"""
1841 for f, starting_model, asym_id
in self.all_rigid_fragments():
1842 if len(starting_model.sources) == 1 \
1843 and isinstance(starting_model.sources[0], _PDBSource):
1844 pdb = starting_model.sources[0]
1845 for m
in pdb.metadata:
1846 if isinstance(m, _PDBHelix) \
1847 and m.start_asym == pdb.chain_id \
1848 and m.end_asym == pdb.chain_id \
1849 and m.start_resnum >= f.start
and m.end_resnum <= f.end:
1850 yield (m, max(f.start, m.start_resnum),
1851 min(f.end, m.end_resnum), asym_id)
1853 def dump(self, writer):
1854 with writer.category(
"_struct_conf_type")
as l:
1855 l.write(id=
'HELX_P', criteria=_CifWriter.unknown,
1856 reference=_CifWriter.unknown)
1864 with writer.loop(
"_struct_conf",
1865 [
"id",
"conf_type_id",
"beg_label_comp_id",
1866 "beg_label_asym_id",
"beg_label_seq_id",
1867 "end_label_comp_id",
"end_label_asym_id",
1868 "end_label_seq_id",])
as l:
1869 for h, begin, end, asym_id
in self.all_helices():
1871 l.write(id=
'HELX_P%d' % ordinal, conf_type_id=
'HELX_P',
1872 beg_label_comp_id=h.start_resnam,
1873 beg_label_asym_id=asym_id,
1874 beg_label_seq_id=begin,
1875 end_label_comp_id=h.end_resnam,
1876 end_label_asym_id=asym_id,
1877 end_label_seq_id=end)
1880 class _PostProcess(object):
1881 """Base class for any post processing"""
1884 class _ReplicaExchangeAnalysisPostProcess(_PostProcess):
1885 """Post processing using AnalysisReplicaExchange0 macro"""
1889 def __init__(self, protocol, rex, num_models_begin):
1890 self.protocol = protocol
1892 self.num_models_begin = num_models_begin
1893 self.num_models_end = 0
1894 for fname
in self.get_all_stat_files():
1895 with open(fname)
as fh:
1896 self.num_models_end += len(fh.readlines())
1898 def get_stat_file(self, cluster_num):
1899 return os.path.join(self.rex._outputdir,
"cluster.%d" % cluster_num,
1902 def get_all_stat_files(self):
1903 for i
in range(self.rex._number_of_clusters):
1904 yield self.get_stat_file(i)
1906 class _SimplePostProcess(_PostProcess):
1907 """Simple ad hoc clustering"""
1911 def __init__(self, protocol, num_models_begin, num_models_end):
1912 self.protocol = protocol
1913 self.num_models_begin = num_models_begin
1914 self.num_models_end = num_models_end
1916 class _PostProcessDumper(_Dumper):
1917 def __init__(self, simo):
1918 super(_PostProcessDumper, self).__init__(simo)
1921 def add(self, postproc):
1922 protocol = postproc.protocol
1923 self.postprocs.append(postproc)
1924 postproc.id = len(self.postprocs)
1929 for p
in self.postprocs:
1930 protocol = p.protocol
1931 if id(protocol)
not in pp_by_protocol:
1932 pp_by_protocol[id(protocol)] = []
1933 by_prot = pp_by_protocol[id(protocol)]
1935 p.step_id = len(by_prot)
1937 def dump(self, writer):
1938 with writer.loop(
"_ihm_modeling_post_process",
1939 [
"id",
"protocol_id",
"analysis_id",
"step_id",
1940 "type",
"feature",
"num_models_begin",
1941 "num_models_end"])
as l:
1943 for p
in self.postprocs:
1944 l.write(id=p.id, protocol_id=p.protocol.id, analysis_id=1,
1945 step_id=p.step_id, type=p.type, feature=p.feature,
1946 num_models_begin=p.num_models_begin,
1947 num_models_end=p.num_models_end)
1949 class _Ensemble(object):
1950 """Base class for any ensemble"""
1954 class _ReplicaExchangeAnalysisEnsemble(_Ensemble):
1955 """Ensemble generated using AnalysisReplicaExchange0 macro"""
1957 def __init__(self, pp, cluster_num, model_group, num_deposit):
1959 self.model_group = model_group
1960 self.cluster_num = cluster_num
1962 self.num_deposit = num_deposit
1963 self.localization_density = {}
1964 with open(pp.get_stat_file(cluster_num))
as fh:
1965 self.num_models = len(fh.readlines())
1967 def get_rmsf_file(self, component):
1968 return os.path.join(self.postproc.rex._outputdir,
1969 'cluster.%d' % self.cluster_num,
1970 'rmsf.%s.dat' % component)
1972 def load_rmsf(self, model, component):
1973 fname = self.get_rmsf_file(component)
1974 if os.path.exists(fname):
1975 model.parse_rmsf_file(fname, component)
1977 def get_localization_density_file(self, component):
1981 return os.path.join(self.postproc.rex._outputdir,
1982 'cluster.%d' % self.cluster_num,
1983 '%s.mrc' % component)
1985 def load_localization_density(self, state, component, extref_dump):
1986 fname = self.get_localization_density_file(component)
1987 if os.path.exists(fname):
1988 details =
"Localization density for %s %s" \
1989 % (component, self.model_group.name)
1991 details=state.get_postfixed_name(details))
1992 self.localization_density[component] = local_file
1993 extref_dump.add(local_file,
1994 _ExternalReferenceDumper.MODELING_OUTPUT)
1996 def load_all_models(self, simo, state):
1997 stat_fname = self.postproc.get_stat_file(self.cluster_num)
1999 with open(stat_fname)
as fh:
2000 stats = ast.literal_eval(fh.readline())
2002 rmf_file = os.path.join(os.path.dirname(stat_fname),
2003 "%d.rmf3" % model_num)
2004 for c
in state.all_modeled_components:
2006 state._pmi_object.set_coordinates_from_rmf(c, rmf_file, 0,
2007 force_rigid_update=
True)
2011 if model_num >= self.num_deposit:
2015 def _get_precision(self):
2016 precfile = os.path.join(self.postproc.rex._outputdir,
2017 "precision.%d.%d.out" % (self.cluster_num,
2019 if not os.path.exists(precfile):
2020 return _CifWriter.unknown
2022 r = re.compile(
'All .*/cluster.%d/ average centroid distance ([\d\.]+)'
2024 with open(precfile)
as fh:
2028 return float(m.group(1))
2030 feature = property(
lambda self: self.postproc.feature)
2031 name = property(
lambda self:
"cluster %d" % (self.cluster_num + 1))
2032 precision = property(
lambda self: self._get_precision())
2034 class _SimpleEnsemble(_Ensemble):
2035 """Simple manually-created ensemble"""
2039 def __init__(self, pp, model_group, num_models, drmsd,
2040 num_models_deposited, ensemble_file):
2041 self.file = ensemble_file
2043 self.model_group = model_group
2044 self.num_deposit = num_models_deposited
2045 self.localization_density = {}
2046 self.num_models = num_models
2047 self.precision = drmsd
2049 def load_localization_density(self, state, component, local_file,
2051 self.localization_density[component] = local_file
2052 extref_dump.add(local_file,
2053 _ExternalReferenceDumper.MODELING_OUTPUT)
2055 name = property(
lambda self: self.model_group.name)
2058 class _EnsembleDumper(_Dumper):
2059 def __init__(self, simo):
2060 super(_EnsembleDumper, self).__init__(simo)
2063 def add(self, ensemble):
2064 self.ensembles.append(ensemble)
2065 ensemble.id = len(self.ensembles)
2067 def dump(self, writer):
2068 with writer.loop(
"_ihm_ensemble_info",
2069 [
"ensemble_id",
"ensemble_name",
"post_process_id",
2070 "model_group_id",
"ensemble_clustering_method",
2071 "ensemble_clustering_feature",
2072 "num_ensemble_models",
2073 "num_ensemble_models_deposited",
2074 "ensemble_precision_value",
2075 "ensemble_file_id"])
as l:
2076 for e
in self.ensembles:
2077 state = e.model_group.state
2078 l.write(ensemble_id=e.id,
2079 ensemble_name=state.get_prefixed_name(e.name),
2080 post_process_id=e.postproc.id,
2081 model_group_id=e.model_group.id,
2082 ensemble_clustering_feature=e.feature,
2083 num_ensemble_models=e.num_models,
2084 num_ensemble_models_deposited=e.num_deposit,
2085 ensemble_precision_value=e.precision,
2086 ensemble_file_id=e.file.id
if e.file
2087 else _CifWriter.omitted)
2089 class _DensityDumper(_Dumper):
2090 """Output localization densities for ensembles"""
2092 def __init__(self, simo):
2093 super(_DensityDumper, self).__init__(simo)
2097 def add(self, ensemble):
2098 self.ensembles.append(ensemble)
2100 def get_density(self, ensemble, component):
2101 return ensemble.localization_density.get(component,
None)
2103 def dump(self, writer):
2104 with writer.loop(
"_ihm_localization_density_files",
2105 [
"id",
"file_id",
"ensemble_id",
"entity_id",
2106 "asym_id",
"seq_id_begin",
"seq_id_end"])
as l:
2108 for ensemble
in self.ensembles:
2109 for comp
in self.simo.all_modeled_components:
2110 density = self.get_density(ensemble, comp)
2113 entity = self.simo.entities[comp]
2114 lenseq = len(entity.sequence)
2115 chain_id = self.simo._get_chain_for_component(comp,
2117 l.write(id=ordinal, ensemble_id=ensemble.id,
2118 entity_id=entity.id,
2120 seq_id_begin=1, seq_id_end=lenseq,
2125 class _MultiStateDumper(_Dumper):
2126 """Output information on multiple states"""
2128 def dump(self, writer):
2129 states = sorted(self.simo._states.keys(),
2130 key=operator.attrgetter(
'id'))
2132 if len(states) <= 1:
2135 groups = sorted(self.simo.model_groups,
2136 key=
lambda g: (g.state.id, g.id))
2137 with writer.loop(
"_ihm_multi_state_modeling",
2138 [
"ordinal_id",
"state_id",
"state_group_id",
2139 "population_fraction",
"state_type",
"state_name",
2140 "model_group_id",
"experiment_type",
"details"])
as l:
2141 for n, group
in enumerate(groups):
2143 l.write(ordinal_id=n+1, state_id=state.id,
2144 state_group_id=state.id,
2145 model_group_id=group.id,
2146 state_name=state.long_name
if state.long_name
2147 else _CifWriter.omitted,
2149 experiment_type=
'Fraction of bulk',
2150 details=state.get_prefixed_name(group.name))
2153 class _Entity(object):
2154 """Represent a CIF entity (a chain with a unique sequence)"""
2155 def __init__(self, seq, first_component):
2157 self.first_component = first_component
2159 self.description = first_component
2161 class _EntityMapper(dict):
2162 """Handle mapping from IMP components to CIF entities.
2163 Multiple components may map to the same entity if they share sequence."""
2165 super(_EntityMapper, self).__init__()
2166 self._sequence_dict = {}
2169 def add(self, component_name, sequence):
2170 if sequence
not in self._sequence_dict:
2171 entity = _Entity(sequence, component_name)
2172 self._entities.append(entity)
2173 entity.id = len(self._entities)
2174 self._sequence_dict[sequence] = entity
2175 self[component_name] = self._sequence_dict[sequence]
2178 """Yield all entities"""
2179 return self._entities
2182 class _RestraintDataset(object):
2183 """Wrapper around a dataset associated with a restraint.
2184 This is needed because we need to delay access to the dataset
2185 in case the writer of the PMI script overrides or changes it
2186 after creating the restraint."""
2187 def __init__(self, restraint, num, allow_duplicates):
2188 self.restraint = restraint
2189 self.num, self.allow_duplicates = num, allow_duplicates
2190 self.__dataset =
None
2191 def __get_dataset(self):
2193 return self.__dataset
2194 if self.num
is not None:
2195 d = copy.deepcopy(self.restraint.datasets[self.num])
2197 d = copy.deepcopy(self.restraint.dataset)
2198 if self.allow_duplicates:
2199 d.location._allow_duplicates =
True
2203 dataset = property(__get_dataset)
2206 class _State(object):
2207 """Representation of a single state in the system."""
2209 def __init__(self, pmi_object, po):
2214 self._pmi_object = weakref.proxy(pmi_object)
2215 self._pmi_state = pmi_object.state
2219 self.modeled_assembly = _Assembly()
2220 po.assembly_dump.add(self.modeled_assembly)
2222 self.all_modeled_components = []
2224 def get_prefixed_name(self, name):
2225 """Prefix the given name with the state name, if available."""
2227 return self.short_name +
' ' + name
2229 return name.capitalize()
2231 def get_postfixed_name(self, name):
2232 """Postfix the given name with the state name, if available."""
2234 return "%s in state %s" % (name, self.short_name)
2238 short_name = property(
lambda self: self._pmi_state.short_name)
2239 long_name = property(
lambda self: self._pmi_state.long_name)
2243 """Class to encode a modeling protocol as mmCIF.
2245 IMP has basic support for writing out files in mmCIF format, for
2246 deposition in [PDB-dev](https://pdb-dev.rcsb.rutgers.edu/).
2247 After creating an instance of this class, attach it to an
2248 IMP.pmi.representation.Representation object. After this, any
2249 generated models and metadata are automatically collected and
2252 def __init__(self, fh):
2253 self._state_ensemble_offset = 0
2254 self._each_metadata = []
2255 self._file_datasets = []
2256 self._main_script = os.path.abspath(sys.argv[0])
2258 self._working_directory = os.getcwd()
2259 self._cif_writer = _CifWriter(fh)
2260 self.entities = _EntityMapper()
2262 self._all_components = {}
2263 self.all_modeled_components = []
2264 self.model_groups = []
2265 self.default_model_group =
None
2266 self.sequence_dict = {}
2267 self.model_repr_dump = _ModelRepresentationDumper(self)
2268 self.cross_link_dump = _CrossLinkDumper(self)
2269 self.em2d_dump = _EM2DDumper(self)
2270 self.em3d_dump = _EM3DDumper(self)
2271 self.model_prot_dump = _ModelProtocolDumper(self)
2272 self.extref_dump = _ExternalReferenceDumper(self)
2273 self.dataset_dump = _DatasetDumper(self)
2274 self.starting_model_dump = _StartingModelDumper(self)
2275 self.assembly_dump = _AssemblyDumper(self)
2278 self.complete_assembly = _Assembly()
2279 self.assembly_dump.add(self.complete_assembly)
2281 self.model_dump = _ModelDumper(self)
2282 self.model_repr_dump.starting_model \
2283 = self.starting_model_dump.starting_model
2284 self.software_dump = _SoftwareDumper(self)
2285 self.post_process_dump = _PostProcessDumper(self)
2286 self.ensemble_dump = _EnsembleDumper(self)
2287 self.density_dump = _DensityDumper(self)
2291 self.cross_link_dump.models = self.model_dump.models
2292 self.em3d_dump.models = self.model_dump.models
2293 self.em2d_dump.models = self.model_dump.models
2295 self._dumpers = [_EntryDumper(self),
2296 _AuditAuthorDumper(self),
2297 self.software_dump, _CitationDumper(self),
2298 _ChemCompDumper(self),
2299 _EntityDumper(self),
2300 _EntityPolyDumper(self), _EntityPolySeqDumper(self),
2301 _StructAsymDumper(self),
2303 self.model_repr_dump, self.extref_dump,
2305 self.cross_link_dump,
2306 self.em2d_dump, self.em3d_dump,
2307 self.starting_model_dump,
2310 self.model_prot_dump, self.post_process_dump,
2311 self.ensemble_dump, self.density_dump, self.model_dump,
2312 _MultiStateDumper(self)]
2314 def _add_state(self, state):
2315 """Create a new state and return a pointer to it."""
2316 self._state_ensemble_offset = len(self.ensemble_dump.ensembles)
2317 s = _State(state, self)
2318 if not self._states:
2319 self._first_state = s
2320 self._states[s] =
None
2321 s.id = len(self._states)
2322 self._last_state = s
2325 def get_file_dataset(self, fname):
2326 for d
in self._file_datasets:
2327 fd = d.get(os.path.abspath(fname),
None)
2331 def _get_chain_for_component(self, name, output):
2332 """Get the chain ID for a component, if any."""
2334 if name
in self.chains:
2335 chain = self.chains[name]
2336 return output.chainids[chain]
2339 return _CifWriter.omitted
2341 def create_component(self, state, name, modeled):
2342 new_comp = name
not in self._all_components
2343 self._all_components[name] =
None
2345 state.all_modeled_components.append(name)
2347 self.chains[name] = len(self.chains)
2348 self.all_modeled_components.append(name)
2349 state.modeled_assembly.append(name)
2351 self.complete_assembly.append(name)
2353 def add_component_sequence(self, name, seq):
2354 if name
in self.sequence_dict:
2355 if self.sequence_dict[name] != seq:
2356 raise ValueError(
"Sequence mismatch for component %s" % name)
2358 self.sequence_dict[name] = seq
2359 self.entities.add(name, seq)
2362 for dumper
in self._dumpers:
2363 dumper.finalize_metadata()
2364 for dumper
in self._dumpers:
2366 for dumper
in self._dumpers:
2367 dumper.dump(self._cif_writer)
2369 def add_pdb_element(self, state, name, start, end, offset, pdbname,
2371 p = _PDBFragment(state, name, start, end, offset, pdbname, chain,
2373 self.model_repr_dump.add_fragment(state, p)
2374 self.starting_model_dump.add_pdb_fragment(p)
2376 def add_bead_element(self, state, name, start, end, num, hier):
2377 b = _BeadsFragment(state, name, start, end, num, hier)
2378 self.model_repr_dump.add_fragment(state, b)
2380 def _get_restraint_dataset(self, r, num=None, allow_duplicates=False):
2381 """Get a wrapper object for the dataset used by this restraint.
2382 This is needed because the restraint's dataset may be changed
2383 in the PMI script before we get to use it."""
2384 rs = _RestraintDataset(r, num, allow_duplicates)
2385 self._add_dataset(rs)
2388 def get_cross_link_group(self, r):
2389 return _CrossLinkGroup(r, self._get_restraint_dataset(r))
2391 def add_experimental_cross_link(self, r1, c1, r2, c2, length, group):
2392 if c1
not in self._all_components
or c2
not in self._all_components:
2398 xl = _ExperimentalCrossLink(r1, c1, r2, c2, length, group)
2399 self.cross_link_dump.add_experimental(xl)
2402 def add_cross_link(self, state, ex_xl, p1, p2, sigma1, sigma2, psi):
2403 self.cross_link_dump.add(_CrossLink(state, ex_xl, p1, p2, sigma1,
2406 def add_replica_exchange(self, state, rex):
2411 self.model_prot_dump.add(_ReplicaExchangeProtocolStep(state, rex))
2413 def _add_dataset(self, dataset):
2414 return self.dataset_dump.add(self._last_state, dataset)
2416 def add_model_group(self, group):
2417 self.model_groups.append(group)
2418 group.id = len(self.model_groups)
2421 def _add_simple_postprocessing(self, num_models_begin, num_models_end):
2423 state = self._last_state
2424 protocol = self.model_prot_dump.get_last_protocol(state)
2425 pp = _SimplePostProcess(protocol, num_models_begin, num_models_end)
2426 self.post_process_dump.add(pp)
2429 def _add_simple_ensemble(self, pp, name, num_models, drmsd,
2430 num_models_deposited, localization_densities,
2432 """Add an ensemble generated by ad hoc methods (not using PMI).
2433 This is currently only used by the Nup84 system."""
2435 state = self._last_state
2436 group = self.add_model_group(_ModelGroup(state, name))
2437 self.extref_dump.add(ensemble_file,
2438 _ExternalReferenceDumper.MODELING_OUTPUT)
2439 e = _SimpleEnsemble(pp, group, num_models, drmsd, num_models_deposited,
2441 self.ensemble_dump.add(e)
2442 self.density_dump.add(e)
2443 for c
in state.all_modeled_components:
2444 den = localization_densities.get(c,
None)
2446 e.load_localization_density(state, c, den, self.extref_dump)
2450 """Point a previously-created ensemble to an 'all-models' file.
2451 This could be a trajectory such as DCD, an RMF, or a multimodel
2453 self.extref_dump.add(location,
2454 _ExternalReferenceDumper.MODELING_OUTPUT)
2456 ind = i + self._state_ensemble_offset
2457 self.ensemble_dump.ensembles[ind].file = location
2459 def add_replica_exchange_analysis(self, state, rex):
2465 protocol = self.model_prot_dump.get_last_protocol(state)
2466 num_models = protocol[-1].num_models_end
2467 pp = _ReplicaExchangeAnalysisPostProcess(protocol, rex, num_models)
2468 self.post_process_dump.add(pp)
2469 for i
in range(rex._number_of_clusters):
2470 group = self.add_model_group(_ModelGroup(state,
2471 'cluster %d' % (i + 1)))
2473 e = _ReplicaExchangeAnalysisEnsemble(pp, i, group, 1)
2474 self.ensemble_dump.add(e)
2475 self.density_dump.add(e)
2477 for c
in state.all_modeled_components:
2478 e.load_localization_density(state, c, self.extref_dump)
2479 for stats
in e.load_all_models(self, state):
2480 m = self.add_model(group)
2483 m.name =
'Best scoring model'
2486 for c
in state.all_modeled_components:
2489 def add_em2d_restraint(self, state, r, i, resolution, pixel_size,
2490 image_resolution, projection_number):
2491 d = self._get_restraint_dataset(r, i)
2492 self.em2d_dump.add(_EM2DRestraint(state, d, r, i, resolution,
2493 pixel_size, image_resolution,
2496 def add_em3d_restraint(self, state, target_ps, densities, r):
2499 d = self._get_restraint_dataset(r, allow_duplicates=
True)
2500 self.em3d_dump.add(_EM3DRestraint(self, state, d, r, target_ps,
2503 def add_model(self, group):
2505 return self.model_dump.add(state.prot,
2506 self.model_prot_dump.get_last_protocol(state),
2507 state.modeled_assembly, group)
2509 def _update_location(self, fileloc):
2510 """Update FileLocation to point to a parent repository, if any"""
2511 all_repos = [m
for m
in self._metadata
2515 _metadata = property(
lambda self:
2516 itertools.chain.from_iterable(self._each_metadata))
Select non water and non hydrogen atoms.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
Class to encode a modeling protocol as mmCIF.
static bool get_is_setup(const IMP::ParticleAdaptor &p)
ElementTable & get_element_table()
static bool get_is_setup(const IMP::ParticleAdaptor &p)
void read_pdb(TextInput input, int model, Hierarchy h)
Representation of the system.
def set_ensemble_file
Point a previously-created ensemble to an 'all-models' file.
A decorator for a particle representing an atom.
Base class for capturing a modeling protocol.
std::string get_relative_path(std::string base, std::string relative)
Return a path to a file relative to another file.
Basic utilities for handling cryo-electron microscopy 3D density maps.
A decorator for a particle with x,y,z coordinates.
Class for easy writing of PDBs, RMFs, and stat files.
Transformation3D get_identity_transformation_3d()
Return a transformation that does not do anything.
Classes for writing output files and processing them.
Basic functionality that is expected to be used by a wide variety of IMP users.
General purpose algebraic and geometric methods that are expected to be used by a wide variety of IMP...
Residue get_residue(Atom d, bool nothrow=false)
Return the Residue containing this atom.
Functionality for loading, creating, manipulating and scoring atomic structures.
Hierarchies get_leaves(const Selection &h)
Select hierarchy particles identified by the biological name.
Select all ATOM and HETATM records with the given chain ids.
ResidueType get_residue_type(char c)
Get the residue type from the 1-letter amino acid code.
Inferential scoring building on methods developed as part of the Inferential Structure Determination ...