IMP logo
IMP Reference Guide  develop.d97d4ead1f,2024/11/21
The Integrative Modeling Platform
proteomics_reader.h
Go to the documentation of this file.
1 /**
2  * \file IMP/multifit/proteomics_reader.h
3  * \brief handles reading of proteomics data
4  *
5  * Copyright 2007-2022 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPMULTIFIT_PROTEOMICS_READER_H
10 #define IMPMULTIFIT_PROTEOMICS_READER_H
11 
14 #include <IMP/Model.h>
15 #include <IMP/core/Hierarchy.h>
16 #include <IMP/file.h>
17 #include <IMP/multifit/multifit_config.h>
18 #include <cereal/access.hpp>
19 
20 IMPMULTIFIT_BEGIN_NAMESPACE
21 class ProteinRecordData {
22  public:
23  void reset_all() {
24  name_ = "";
25  start_res_ = 0;
26  end_res_ = 0;
27  filename_ = "";
28  surface_filename_ = "";
29  ref_filename_ = "";
30  }
31  ProteinRecordData() { reset_all(); }
32  ProteinRecordData(const std::string &name) {
33  reset_all();
34  name_ = name;
35  }
36  ProteinRecordData(const std::string &name, const std::string fn) {
37  reset_all();
38  name_ = name;
39  filename_ = fn;
40  }
41  ProteinRecordData(const std::string &name, int start_res, int end_res,
42  const std::string fn) {
43  reset_all();
44  name_ = name;
45  start_res_ = start_res;
46  end_res_ = end_res;
47  filename_ = fn;
48  }
49  ProteinRecordData(const std::string &name, int start_res, int end_res,
50  const std::string &fn, const std::string &surface_fn,
51  const std::string &ref_fn) {
52  reset_all();
53  name_ = name;
54  start_res_ = start_res;
55  end_res_ = end_res;
56  filename_ = fn;
57  surface_filename_ = surface_fn;
58  ref_filename_ = ref_fn;
59  }
60 
61  IMP_SHOWABLE_INLINE(ProteinRecordData, { out << name_; });
62 
63  std::string name_;
64  int start_res_, end_res_;
65  std::string filename_;
66  std::string surface_filename_;
67  std::string ref_filename_;
68 
69 private:
70  friend class cereal::access;
71 
72  template<class Archive> void serialize(Archive &ar) {
73  ar(name_, start_res_, end_res_, filename_, surface_filename_,
74  ref_filename_);
75  }
76 };
77 
78 IMP_VALUES(ProteinRecordData, ProteinRecordDataList);
79 
80 //! Storage of proteomics data.
81 class IMPMULTIFITEXPORT ProteomicsData : public Object {
82  protected:
83  public:
84  ProteomicsData() : Object("ProteomicsData%1%") {}
85  /** return the assigned index
86  */
87  int add_protein(std::string name, int start_res, int end_res,
88  const std::string &mol_fn, const std::string &surface_fn,
89  const std::string &ref_fn) {
90  prot_data_.push_back(ProteinRecordData(name, start_res, end_res, mol_fn,
91  surface_fn, ref_fn));
92  prot_map_[name] = prot_data_.size() - 1;
93  return prot_map_[name];
94  }
95  /** return the assigned index
96  */
97  int add_protein(const ProteinRecordData &rec) {
98  IMP_INTERNAL_CHECK(prot_map_.find(rec.name_) == prot_map_.end(),
99  "protein with name" << rec.name_
100  << " was added already");
101  prot_data_.push_back(rec);
102  prot_map_[rec.name_] = prot_data_.size() - 1;
103  return prot_map_[rec.name_];
104  }
105  // if not found -1 is returned
106  int find(const std::string &name) const {
107  if (prot_map_.find(name) == prot_map_.end()) return -1;
108  return prot_map_.find(name)->second;
109  }
110  void add_interaction(const Ints &ii, bool used_for_filter, float linker_len) {
111  interactions_.push_back(ii);
112  interaction_in_filter_.push_back(used_for_filter);
113  interaction_linker_len_.push_back(linker_len);
114  }
115  void add_cross_link_interaction(Int prot1, Int res1, Int prot2, Int res2,
116  bool used_in_filter, Float linker_len) {
117  xlinks_.push_back(
118  std::make_pair(IntPair(prot1, res1), IntPair(prot2, res2)));
119  xlink_in_filter_.push_back(used_in_filter);
120  xlink_len_.push_back(linker_len);
121  }
122  void add_ev_pair(Int prot1, Int prot2) {
123  ev_.push_back(std::make_pair(prot1, prot2));
124  }
125  int get_number_of_proteins() const { return prot_data_.size(); }
126  int get_number_of_interactions() const { return interactions_.size(); }
127  Ints get_interaction(int interaction_ind) const {
128  IMP_USAGE_CHECK(interaction_ind < (int)interactions_.size(),
129  "index out of range\n");
130  return interactions_[interaction_ind];
131  }
132  bool get_interaction_part_of_filter(int interaction_ind) const {
133  IMP_USAGE_CHECK(interaction_ind < (int)interaction_in_filter_.size(),
134  "index out of range\n");
135  return interaction_in_filter_[interaction_ind];
136  }
137  int get_interaction_linker_length(int interaction_ind) const {
138  IMP_USAGE_CHECK(interaction_ind < (int)interaction_linker_len_.size(),
139  "index out of range\n");
140  return interaction_linker_len_[interaction_ind];
141  }
142  int get_number_of_cross_links() const { return xlinks_.size(); }
143  std::pair<IntPair, IntPair> get_cross_link(int xlink_ind) const {
144  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
145  return xlinks_[xlink_ind];
146  }
147  bool get_cross_link_part_of_filter(int xlink_ind) const {
148  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
149  return xlink_in_filter_[xlink_ind];
150  }
151  float get_cross_link_length(int xlink_ind) const {
152  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
153  return xlink_len_[xlink_ind];
154  }
155 
156  //======== ev access functions
157  int get_number_of_ev_pairs() const { return ev_.size(); }
158  IntPair get_ev_pair(int ev_ind) const {
159  IMP_USAGE_CHECK(ev_ind < (int)ev_.size(), "index out of range\n");
160  return ev_[ev_ind];
161  }
162 
163  std::string get_protein_name(int protein_ind) const {
164  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
165  "index out of range\n");
166  return prot_data_[protein_ind].name_;
167  }
168  int get_end_res(int protein_ind) const {
169  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
170  "index out of range\n");
171  return prot_data_[protein_ind].end_res_;
172  }
173  int get_start_res(int protein_ind) const {
174  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
175  "index out of range\n");
176  return prot_data_[protein_ind].start_res_;
177  }
178  std::string get_protein_filename(int protein_ind) const {
179  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
180  "index out of range\n");
181  return prot_data_[protein_ind].filename_;
182  }
183  std::string get_reference_filename(int protein_ind) const {
184  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
185  "index out of range\n");
186  return prot_data_[protein_ind].ref_filename_;
187  }
188  std::string get_surface_filename(int protein_ind) const {
189  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
190  "index out of range\n");
191  return prot_data_[protein_ind].surface_filename_;
192  }
193  ProteinRecordData get_protein_data(int protein_ind) const {
194  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
195  "index out of range\n");
196  return prot_data_[protein_ind];
197  }
198 
200  /*IMP_OBJECT_INLINE(ProteomicsData, {
201  out<<"Proteins:";
202  for(std::vector<ProteinRecordData>::const_iterator
203  it = prot_data_.begin(); it != prot_data_.end();it++){
204  out<<it->name_<<",";
205  }
206  out<<std::endl;
207  out<<"Interactions:"<<std::endl;
208  for(IntsList::const_iterator
209  it = interactions_.begin();it != interactions_.end();it++){
210  for(Ints::const_iterator it1 = it->begin();
211  it1 != it->end();it1++){
212  out<<prot_data_[*it1].name_<<",";
213  }
214  out<<std::endl;
215  } }, {});*/
216  int get_num_allowed_violated_interactions() const {
217  return num_allowed_violated_interactions_;
218  }
219  void set_num_allowed_violated_interactions(int n) {
220  num_allowed_violated_interactions_ = n;
221  }
222  int get_num_allowed_violated_cross_links() const {
223  return num_allowed_violated_xlinks_;
224  }
225  void set_num_allowed_violated_cross_links(int n) {
226  num_allowed_violated_xlinks_ = n;
227  }
228  int get_num_allowed_violated_ev() const { return num_allowed_violated_ev_; }
229  void set_num_allowed_violated_ev(int n) { num_allowed_violated_ev_ = n; }
230 
231  protected:
232  std::vector<ProteinRecordData> prot_data_;
233  std::map<std::string, int> prot_map_;
234  IntsList interactions_;
235  std::vector<bool> interaction_in_filter_; // for each interaction
236  Floats interaction_linker_len_; // for each interaction in interactions_
237  int num_allowed_violated_interactions_;
238  // decide if it used to build the JT or just for scoring
239  //=========residue cross links
240  std::vector<std::pair<IntPair, IntPair> > xlinks_;
241  std::vector<bool> xlink_in_filter_;
242  Floats xlink_len_;
243  int num_allowed_violated_xlinks_;
244  //=========
245  IntPairs ev_; // pairs of proteins to calculate EV between
246  int num_allowed_violated_ev_;
247 };
248 
249 //! Proteomics reader
250 /**
251 \todo consider using TextInput
252  */
253 IMPMULTIFITEXPORT ProteomicsData *read_proteomics_data(
254  const char *proteomics_fn);
255 IMPMULTIFITEXPORT
256 ProteomicsData *get_partial_proteomics_data(const ProteomicsData *pd,
257  const Strings &prot_names);
258 IMPMULTIFIT_END_NAMESPACE
259 #endif /* IMPMULTIFIT_PROTEOMICS_READER_H */
Decorator for helping deal with a hierarchy.
#define IMP_SHOWABLE_INLINE(Name, how_to_show)
Declare the methods needed by an object that can be printed.
Storage of proteomics data.
IMP::Vector< String > Strings
Standard way to pass a bunch of String values.
Definition: types.h:50
IMP::Vector< Float > Floats
Standard way to pass a bunch of Float values.
Definition: types.h:46
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
Definition: object_macros.h:25
Storage of a model, its restraints, constraints and particles.
Handling of file input/output.
int add_protein(std::string name, int start_res, int end_res, const std::string &mol_fn, const std::string &surface_fn, const std::string &ref_fn)
A more IMP-like version of the std::vector.
Definition: Vector.h:50
#define IMP_INTERNAL_CHECK(expr, message)
An assertion to check for internal errors in IMP. An IMP::ErrorException will be thrown.
Definition: check_macros.h:139
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
Definition: value_macros.h:23
Common base class for heavy weight IMP objects.
Definition: Object.h:111
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
IMP::Vector< Ints > IntsList
Standard way to pass a bunch of Ints values.
Definition: types.h:55
Simple 3D transformation class.
IMP::Vector< IntPair > IntPairs
Definition: types.h:40
Object(std::string name)
Construct an object with the given name.
double Float
Basic floating-point value (could be float, double...)
Definition: types.h:19
#define IMP_USAGE_CHECK(expr, message)
A runtime test for incorrect usage of a class or method.
Definition: check_macros.h:168
int Int
Basic integer value.
Definition: types.h:34
IMP::Vector< Int > Ints
Standard way to pass a bunch of Int values.
Definition: types.h:48
int add_protein(const ProteinRecordData &rec)
Sample best solutions using Domino.