IMP logo
IMP Reference Guide  2.7.0
The Integrative Modeling Platform
metadata.py
1 """@namespace IMP.pmi.metadata
2 Classes for attaching metadata to PMI objects.
3 """
4 
5 from __future__ import print_function, division
6 import os
7 
8 class Metadata(object):
9  """Base class for all metadata"""
10  pass
11 
12 
14  """Metadata that only makes sense for the top-level PMI object."""
15  pass
16 
17 
19  """Software (other than IMP) used as part of the modeling protocol."""
20  def __init__(self, name, classification, description, url, type='program',
21  version=None):
22  self.name = name
23  self.classification = classification
24  self.description = description
25  self.url = url
26  self.type = type
27  self.version = version
28 
29 
31  """A publication that describes the modeling."""
32  def __init__(self, pmid, title, journal, volume, page_range, year, authors,
33  doi):
34  self.title, self.journal, self.volume = title, journal, volume
35  self.page_range, self.year = page_range, year
36  self.pmid, self.authors, self.doi = pmid, authors, doi
37 
38 
40  """A Python script used as part of the modeling."""
41  def __init__(self, location):
42  self.location = location
43 
44 
46  """A set of input data, for example, a crystal structure or EM map."""
47 
48  _eq_keys = ['location']
49 
50  # Datasets compare equal iff they are the same class and have the
51  # same attributes
52  def _eq_vals(self):
53  return tuple([self.__class__]
54  + [getattr(self, x) for x in self._eq_keys])
55  def __eq__(self, other):
56  return self._eq_vals() == other._eq_vals()
57  def __hash__(self):
58  return hash(self._eq_vals())
59 
60  _data_type = 'unspecified'
61  def __init__(self, location):
62  self.location = location
63  self._parents = {}
64 
65  def add_parent(self, dataset):
66  """Add another Dataset from which this one was derived.
67  For example, a 3D EM map may be derived from a set of 2D images."""
68  self._parents[dataset] = None
69 
70  def add_primary(self, dataset):
71  """Add another Dataset from which the ultimate parent of this one
72  was derived."""
73  if len(self._parents) == 0:
74  self.add_parent(dataset)
75  elif len(self._parents) == 1:
76  list(self._parents.keys())[0].add_parent(dataset)
77  else:
78  raise ValueError("This dataset has multiple parents - don't "
79  "know which one to add to")
80 
82  """Processed crosslinks from a CX-MS experiment"""
83  _data_type = 'CX-MS data'
84 
86  """Raw mass spectrometry files such as peaklists"""
87  _data_type = 'Mass Spectrometry data'
88 
90  """A 3D electron microscopy dataset"""
91  _data_type = '3DEM volume'
92 
93 class PDBDataset(Dataset):
94  """An experimentally-determined 3D structure as a set of a coordinates,
95  usually in a PDB file"""
96  _data_type = 'Experimental model'
97 
99  """A 3D structure determined by comparative modeling"""
100  _data_type = 'Comparative model'
101 
103  """Raw 2D electron micrographs"""
104  _eq_keys = Dataset._eq_keys + ['number']
105 
106  _data_type = 'EM raw micrographs'
107  def __init__(self, location, number):
108  super(EMMicrographsDataset, self).__init__(location)
109  self.number = number
110 
112  """2DEM class average"""
113  _data_type = '2DEM class average'
114 
115 class Location(Metadata):
116  """Identifies the location where a resource can be found."""
118  # 'details' can differ without affecting dataset equality
119  _eq_keys = []
120  _allow_duplicates = False
121 
122  def __init__(self, details=None):
123  self.details = details
124 
125  # Locations compare equal iff they are the same class, have the
126  # same attributes, and allow_duplicates=False
127  def _eq_vals(self):
128  if self._allow_duplicates:
129  return id(self)
130  else:
131  return tuple([self.__class__]
132  + [getattr(self, x) for x in self._eq_keys])
133  def __eq__(self, other):
134  return self._eq_vals() == other._eq_vals()
135  def __hash__(self):
136  return hash(self._eq_vals())
137 
139  """A dataset stored in an official database (PDB, EMDB, PRIDE, etc.)"""
140 
141  _eq_keys = Location._eq_keys + ['db_name', 'access_code', 'version']
142 
143  def __init__(self, db_name, db_code, version=None, details=None):
144  super(DatabaseLocation, self).__init__(details)
145  self.db_name = db_name
146  self.access_code = db_code
147  self.version = version
148 
150  """Something stored in the EMDB database."""
151  def __init__(self, db_code, version=None, details=None):
152  DatabaseLocation.__init__(self, 'EMDB', db_code, version, details)
153 
155  """Something stored in the PDB database."""
156  def __init__(self, db_code, version=None, details=None):
157  DatabaseLocation.__init__(self, 'PDB', db_code, version, details)
158 
160  """Something stored in the MassIVE database."""
161  def __init__(self, db_code, version=None, details=None):
162  DatabaseLocation.__init__(self, 'MASSIVE', db_code, version, details)
163 
164 class FileLocation(Location):
165  """An individual file or directory.
166  This may be in a repository (if `repo` is not None) or only on the
167  local disk (if `repo` is None)."""
168 
169  _eq_keys = Location._eq_keys + ['repo', 'path']
170 
171  def __init__(self, path, repo=None, details=None):
172  """Constructor.
173  @param path the location of the file or directory.
174  @param repo a Repository object that describes the repository
175  containing the file (if any).
176  """
177  super(FileLocation, self).__init__(details)
178  self.repo = repo
179  if repo:
180  self.path = path
181  else:
182  if not os.path.exists(path):
183  raise ValueError("%s does not exist" % path)
184  # Store absolute path in case the working directory changes later
185  self.path = os.path.abspath(path)
186 
188  """A repository containing modeling files.
189  This can be used if the PMI script plus input files are part of a
190  repository, which has been archived somewhere with a DOI.
191  This will be used to construct permanent references to files
192  used in this modeling, even if they haven't been uploaded to
193  a database such as PDB or EMDB.
194 
195  @see FileLocation."""
196 
197  # Two repositories compare equal if their DOIs and URLs are the same
198  def __eq__(self, other):
199  return self.doi == other.doi and self.url == other.url
200  def __hash__(self):
201  return hash((self.doi, self.url))
202 
203  def __init__(self, doi, root=None, url=None,
204  top_directory=None):
205  """Constructor.
206  @param doi the Digital Object Identifier for the repository.
207  @param root the relative path to the top-level directory
208  of the repository from the working directory of the script,
209  or None if files in this repository aren't checked out.
210  @param url If given, a location that this repository can be
211  downloaded from.
212  @param top_directory If given, prefix all paths for files in this
213  repository with this value. This is useful when the archived
214  version of the repository is found in a subdirectory at the
215  URL or DOI (for example, GitHub repositories archived at
216  Zenodo get placed in a subdirectory named for the repository
217  and git hash).
218  """
219  # todo: DOI should be optional (could also use URL, local path)
220  self.doi = doi
221  self.url, self.top_directory = url, top_directory
222  if root:
223  # Store absolute path in case the working directory changes later
224  self._root = os.path.abspath(root)
225 
226  @staticmethod
227  def update_in_repos(fileloc, repos):
228  """If the given FileLocation maps to somewhere within one of the
229  passed repositories, update it to reflect that."""
230  if fileloc.repo:
231  return
232  orig_path = fileloc.path
233  for repo in repos:
234  relpath = os.path.relpath(orig_path, repo._root)
235  if not relpath.startswith('..'):
236  # Prefer the shortest paths if multiple repositories can match
237  if fileloc.repo is None or len(fileloc.path) > len(relpath):
238  fileloc.repo = repo
239  fileloc.path = relpath
240 
241  def _get_full_path(self, path):
242  """Prefix the given path with our top-level directory"""
243  return os.path.join(self.top_directory or "", path)
Something stored in the EMDB database.
Definition: metadata.py:149
Something stored in the MassIVE database.
Definition: metadata.py:159
Processed crosslinks from a CX-MS experiment.
Definition: metadata.py:81
Metadata that only makes sense for the top-level PMI object.
Definition: metadata.py:13
A set of input data, for example, a crystal structure or EM map.
Definition: metadata.py:45
A Python script used as part of the modeling.
Definition: metadata.py:39
A 3D structure determined by comparative modeling.
Definition: metadata.py:102
A repository containing modeling files.
Definition: metadata.py:187
An experimentally-determined 3D structure as a set of a coordinates, usually in a PDB file...
Definition: metadata.py:97
Raw 2D electron micrographs.
Definition: metadata.py:107
Raw mass spectrometry files such as peaklists.
Definition: metadata.py:86
Software (other than IMP) used as part of the modeling protocol.
Definition: metadata.py:18
def __init__
Constructor.
Definition: metadata.py:203
A publication that describes the modeling.
Definition: metadata.py:30
A dataset stored in an official database (PDB, EMDB, PRIDE, etc.)
Definition: metadata.py:138
A 3D electron microscopy dataset.
Definition: metadata.py:91
Base class for all metadata.
Definition: metadata.py:8
def add_primary
Add another Dataset from which the ultimate parent of this one was derived.
Definition: metadata.py:70
def __init__
Constructor.
Definition: metadata.py:171
Identifies the location where a resource can be found.
Definition: metadata.py:117
An individual file or directory.
Definition: metadata.py:166
Something stored in the PDB database.
Definition: metadata.py:154
def add_parent
Add another Dataset from which this one was derived.
Definition: metadata.py:65
def update_in_repos
If the given FileLocation maps to somewhere within one of the passed repositories, update it to reflect that.
Definition: metadata.py:226