1 """@namespace IMP.pmi.metadata
2 Classes for attaching metadata to PMI objects.
5 from __future__
import print_function, division
9 """Base class for all metadata"""
14 """Metadata that only makes sense for the top-level PMI object."""
19 """Software (other than IMP) used as part of the modeling protocol."""
20 def __init__(self, name, classification, description, url, type='program',
23 self.classification = classification
24 self.description = description
27 self.version = version
31 """A publication that describes the modeling."""
32 def __init__(self, pmid, title, journal, volume, page_range, year, authors,
34 self.title, self.journal, self.volume = title, journal, volume
35 self.page_range, self.year = page_range, year
36 self.pmid, self.authors, self.doi = pmid, authors, doi
40 """A Python script used as part of the modeling."""
41 def __init__(self, location):
42 self.location = location
46 """A set of input data, for example, a crystal structure or EM map."""
48 _eq_keys = [
'location']
53 return tuple([self.__class__]
54 + [getattr(self, x)
for x
in self._eq_keys])
55 def __eq__(self, other):
56 return self._eq_vals() == other._eq_vals()
58 return hash(self._eq_vals())
60 _data_type =
'unspecified'
61 def __init__(self, location):
62 self.location = location
66 """Add another Dataset from which this one was derived.
67 For example, a 3D EM map may be derived from a set of 2D images."""
68 self._parents[dataset] =
None
71 """Add another Dataset from which the ultimate parent of this one
73 if len(self._parents) == 0:
75 elif len(self._parents) == 1:
76 list(self._parents.keys())[0].
add_parent(dataset)
78 raise ValueError(
"This dataset has multiple parents - don't "
79 "know which one to add to")
82 """Processed crosslinks from a CX-MS experiment"""
83 _data_type =
'CX-MS data'
86 """Raw mass spectrometry files such as peaklists"""
87 _data_type =
'Mass Spectrometry data'
90 """A 3D electron microscopy dataset"""
91 _data_type =
'3DEM volume'
94 """An experimentally-determined 3D structure as a set of a coordinates,
95 usually in a PDB file"""
96 _data_type =
'Experimental model'
99 """A 3D structure determined by comparative modeling"""
100 _data_type =
'Comparative model'
103 """Raw 2D electron micrographs"""
104 _eq_keys = Dataset._eq_keys + [
'number']
106 _data_type =
'EM raw micrographs'
107 def __init__(self, location, number):
108 super(EMMicrographsDataset, self).__init__(location)
112 """2DEM class average"""
113 _data_type =
'2DEM class average'
116 """Identifies the location where a resource can be found."""
120 _allow_duplicates =
False
122 def __init__(self, details=None):
123 self.details = details
128 if self._allow_duplicates:
131 return tuple([self.__class__]
132 + [getattr(self, x)
for x
in self._eq_keys])
133 def __eq__(self, other):
134 return self._eq_vals() == other._eq_vals()
136 return hash(self._eq_vals())
139 """A dataset stored in an official database (PDB, EMDB, PRIDE, etc.)"""
141 _eq_keys = Location._eq_keys + [
'db_name',
'access_code',
'version']
143 def __init__(self, db_name, db_code, version=None, details=None):
144 super(DatabaseLocation, self).__init__(details)
145 self.db_name = db_name
146 self.access_code = db_code
147 self.version = version
150 """Something stored in the EMDB database."""
151 def __init__(self, db_code, version=None, details=None):
152 DatabaseLocation.__init__(self,
'EMDB', db_code, version, details)
155 """Something stored in the PDB database."""
156 def __init__(self, db_code, version=None, details=None):
157 DatabaseLocation.__init__(self,
'PDB', db_code, version, details)
160 """Something stored in the MassIVE database."""
161 def __init__(self, db_code, version=None, details=None):
162 DatabaseLocation.__init__(self,
'MASSIVE', db_code, version, details)
165 """An individual file or directory.
166 This may be in a repository (if `repo` is not None) or only on the
167 local disk (if `repo` is None)."""
169 _eq_keys = Location._eq_keys + [
'repo',
'path']
173 @param path the location of the file or directory.
174 @param repo a Repository object that describes the repository
175 containing the file (if any).
177 super(FileLocation, self).
__init__(details)
182 if not os.path.exists(path):
183 raise ValueError(
"%s does not exist" % path)
185 self.path = os.path.abspath(path)
188 """A repository containing modeling files.
189 This can be used if the PMI script plus input files are part of a
190 repository, which has been archived somewhere with a DOI.
191 This will be used to construct permanent references to files
192 used in this modeling, even if they haven't been uploaded to
193 a database such as PDB or EMDB.
195 @see FileLocation."""
198 def __eq__(self, other):
199 return self.doi == other.doi
and self.url == other.url
201 return hash((self.doi, self.url))
206 @param doi the Digital Object Identifier for the repository.
207 @param root the relative path to the top-level directory
208 of the repository from the working directory of the script,
209 or None if files in this repository aren't checked out.
210 @param url If given, a location that this repository can be
212 @param top_directory If given, prefix all paths for files in this
213 repository with this value. This is useful when the archived
214 version of the repository is found in a subdirectory at the
215 URL or DOI (for example, GitHub repositories archived at
216 Zenodo get placed in a subdirectory named for the repository
221 self.url, self.top_directory = url, top_directory
224 self._root = os.path.abspath(root)
228 """If the given FileLocation maps to somewhere within one of the
229 passed repositories, update it to reflect that."""
232 orig_path = fileloc.path
234 relpath = os.path.relpath(orig_path, repo._root)
235 if not relpath.startswith(
'..'):
237 if fileloc.repo
is None or len(fileloc.path) > len(relpath):
239 fileloc.path = relpath
241 def _get_full_path(self, path):
242 """Prefix the given path with our top-level directory"""
243 return os.path.join(self.top_directory
or "", path)