IMP logo
IMP Reference Guide  2.8.0
The Integrative Modeling Platform
metadata.py
1 """@namespace IMP.pmi.metadata
2 Classes for attaching metadata to PMI objects.
3 """
4 
5 from __future__ import print_function, division
6 from IMP.pmi.tools import OrderedDict
7 import os
8 
9 class Metadata(object):
10  """Base class for all metadata"""
11  pass
12 
13 
15  """Metadata that only makes sense for the top-level PMI object."""
16  pass
17 
18 
20  """Software (other than IMP) used as part of the modeling protocol."""
21  def __init__(self, name, classification, description, url, type='program',
22  version=None):
23  self.name = name
24  self.classification = classification
25  self.description = description
26  self.url = url
27  self.type = type
28  self.version = version
29 
30 
32  """A publication that describes the modeling."""
33  def __init__(self, pmid, title, journal, volume, page_range, year, authors,
34  doi):
35  self.title, self.journal, self.volume = title, journal, volume
36  self.page_range, self.year = page_range, year
37  self.pmid, self.authors, self.doi = pmid, authors, doi
38 
39 
41  """A Python script used as part of the modeling."""
42  def __init__(self, location):
43  self.location = location
44 
45 
47  """A set of input data, for example, a crystal structure or EM map."""
48 
49  _eq_keys = ['location']
50 
51  # Datasets compare equal iff they are the same class and have the
52  # same attributes
53  def _eq_vals(self):
54  return tuple([self.__class__]
55  + [getattr(self, x) for x in self._eq_keys])
56  def __eq__(self, other):
57  return self._eq_vals() == other._eq_vals()
58  def __hash__(self):
59  return hash(self._eq_vals())
60 
61  _data_type = 'unspecified'
62  def __init__(self, location):
63  self.location = location
64  self._parents = OrderedDict()
65 
66  def add_parent(self, dataset):
67  """Add another Dataset from which this one was derived.
68  For example, a 3D EM map may be derived from a set of 2D images."""
69  self._parents[dataset] = None
70 
71  def add_primary(self, dataset):
72  """Add another Dataset from which the ultimate parent of this one
73  was derived."""
74  if len(self._parents) == 0:
75  self.add_parent(dataset)
76  elif len(self._parents) == 1:
77  list(self._parents.keys())[0].add_parent(dataset)
78  else:
79  raise ValueError("This dataset has multiple parents - don't "
80  "know which one to add to")
81 
83  """Processed crosslinks from a CX-MS experiment"""
84  _data_type = 'CX-MS data'
85 
87  """Raw mass spectrometry files such as peaklists"""
88  _data_type = 'Mass Spectrometry data'
89 
91  """A 3D electron microscopy dataset"""
92  _data_type = '3DEM volume'
93 
94 class PDBDataset(Dataset):
95  """An experimentally-determined 3D structure as a set of a coordinates,
96  usually in a PDB file"""
97  _data_type = 'Experimental model'
98 
100  """A 3D structure determined by comparative modeling"""
101  _data_type = 'Comparative model'
102 
104  """Raw 2D electron micrographs"""
105  _eq_keys = Dataset._eq_keys + ['number']
106 
107  _data_type = 'EM raw micrographs'
108  def __init__(self, location, number):
109  super(EMMicrographsDataset, self).__init__(location)
110  self.number = number
111 
113  """2DEM class average"""
114  _data_type = '2DEM class average'
115 
116 class Location(Metadata):
117  """Identifies the location where a resource can be found."""
119  # 'details' can differ without affecting dataset equality
120  _eq_keys = []
121  _allow_duplicates = False
122 
123  def __init__(self, details=None):
124  self.details = details
125 
126  # Locations compare equal iff they are the same class, have the
127  # same attributes, and allow_duplicates=False
128  def _eq_vals(self):
129  if self._allow_duplicates:
130  return id(self)
131  else:
132  return tuple([self.__class__]
133  + [getattr(self, x) for x in self._eq_keys])
134  def __eq__(self, other):
135  return self._eq_vals() == other._eq_vals()
136  def __hash__(self):
137  return hash(self._eq_vals())
138 
140  """A dataset stored in an official database (PDB, EMDB, PRIDE, etc.)"""
141 
142  _eq_keys = Location._eq_keys + ['db_name', 'access_code', 'version']
143 
144  def __init__(self, db_name, db_code, version=None, details=None):
145  super(DatabaseLocation, self).__init__(details)
146  self.db_name = db_name
147  self.access_code = db_code
148  self.version = version
149 
151  """Something stored in the EMDB database."""
152  def __init__(self, db_code, version=None, details=None):
153  DatabaseLocation.__init__(self, 'EMDB', db_code, version, details)
154 
156  """Something stored in the PDB database."""
157  def __init__(self, db_code, version=None, details=None):
158  DatabaseLocation.__init__(self, 'PDB', db_code, version, details)
159 
161  """Something stored in the MassIVE database."""
162  def __init__(self, db_code, version=None, details=None):
163  DatabaseLocation.__init__(self, 'MASSIVE', db_code, version, details)
164 
165 class FileLocation(Location):
166  """An individual file or directory.
167  This may be in a repository (if `repo` is not None) or only on the
168  local disk (if `repo` is None)."""
169 
170  _eq_keys = Location._eq_keys + ['repo', 'path']
171 
172  def __init__(self, path, repo=None, details=None):
173  """Constructor.
174  @param path the location of the file or directory.
175  @param repo a Repository object that describes the repository
176  containing the file (if any).
177  """
178  super(FileLocation, self).__init__(details)
179  self.repo = repo
180  if repo:
181  self.path = path
182  # Cannot determine file size if non-local
183  self.file_size = None
184  else:
185  if not os.path.exists(path):
186  raise ValueError("%s does not exist" % path)
187  self.file_size = os.stat(path).st_size
188  # Store absolute path in case the working directory changes later
189  self.path = os.path.abspath(path)
190 
192  """A repository containing modeling files.
193  This can be used if the PMI script plus input files are part of a
194  repository, which has been archived somewhere with a DOI.
195  This will be used to construct permanent references to files
196  used in this modeling, even if they haven't been uploaded to
197  a database such as PDB or EMDB.
198 
199  @see FileLocation."""
200 
201  # Two repositories compare equal if their DOIs and URLs are the same
202  def __eq__(self, other):
203  return self.doi == other.doi and self.url == other.url
204  def __hash__(self):
205  return hash((self.doi, self.url))
206 
207  def __init__(self, doi, root=None, url=None,
208  top_directory=None):
209  """Constructor.
210  @param doi the Digital Object Identifier for the repository.
211  @param root the relative path to the top-level directory
212  of the repository from the working directory of the script,
213  or None if files in this repository aren't checked out.
214  @param url If given, a location that this repository can be
215  downloaded from.
216  @param top_directory If given, prefix all paths for files in this
217  repository with this value. This is useful when the archived
218  version of the repository is found in a subdirectory at the
219  URL or DOI (for example, GitHub repositories archived at
220  Zenodo get placed in a subdirectory named for the repository
221  and git hash).
222  """
223  # todo: DOI should be optional (could also use URL, local path)
224  self.doi = doi
225  self.url, self.top_directory = url, top_directory
226  if root:
227  # Store absolute path in case the working directory changes later
228  self._root = os.path.abspath(root)
229 
230  @staticmethod
231  def update_in_repos(fileloc, repos):
232  """If the given FileLocation maps to somewhere within one of the
233  passed repositories, update it to reflect that."""
234  if fileloc.repo:
235  return
236  orig_path = fileloc.path
237  for repo in repos:
238  relpath = os.path.relpath(orig_path, repo._root)
239  if not relpath.startswith('..'):
240  # Prefer the shortest paths if multiple repositories can match
241  if fileloc.repo is None or len(fileloc.path) > len(relpath):
242  fileloc.repo = repo
243  fileloc.path = relpath
244 
245  def _get_full_path(self, path):
246  """Prefix the given path with our top-level directory"""
247  return os.path.join(self.top_directory or "", path)
Something stored in the EMDB database.
Definition: metadata.py:150
Something stored in the MassIVE database.
Definition: metadata.py:160
Processed crosslinks from a CX-MS experiment.
Definition: metadata.py:82
Miscellaneous utilities.
Definition: tools.py:1
Metadata that only makes sense for the top-level PMI object.
Definition: metadata.py:14
A set of input data, for example, a crystal structure or EM map.
Definition: metadata.py:46
A Python script used as part of the modeling.
Definition: metadata.py:40
A 3D structure determined by comparative modeling.
Definition: metadata.py:103
A repository containing modeling files.
Definition: metadata.py:191
An experimentally-determined 3D structure as a set of a coordinates, usually in a PDB file...
Definition: metadata.py:98
Raw 2D electron micrographs.
Definition: metadata.py:108
Raw mass spectrometry files such as peaklists.
Definition: metadata.py:87
Software (other than IMP) used as part of the modeling protocol.
Definition: metadata.py:19
def __init__
Constructor.
Definition: metadata.py:207
A publication that describes the modeling.
Definition: metadata.py:31
A dataset stored in an official database (PDB, EMDB, PRIDE, etc.)
Definition: metadata.py:139
A 3D electron microscopy dataset.
Definition: metadata.py:92
Base class for all metadata.
Definition: metadata.py:9
def add_primary
Add another Dataset from which the ultimate parent of this one was derived.
Definition: metadata.py:71
def __init__
Constructor.
Definition: metadata.py:172
Identifies the location where a resource can be found.
Definition: metadata.py:118
An individual file or directory.
Definition: metadata.py:167
Something stored in the PDB database.
Definition: metadata.py:155
def add_parent
Add another Dataset from which this one was derived.
Definition: metadata.py:66
def update_in_repos
If the given FileLocation maps to somewhere within one of the passed repositories, update it to reflect that.
Definition: metadata.py:230