IMP  2.4.0
The Integrative Modeling Platform
proteomics_reader.h
Go to the documentation of this file.
1 /**
2  * \file IMP/multifit/proteomics_reader.h
3  * \brief handles reading of proteomics data
4  *
5  * Copyright 2007-2015 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPMULTIFIT_PROTEOMICS_READER_H
10 #define IMPMULTIFIT_PROTEOMICS_READER_H
11 
14 #include <IMP/kernel/Model.h>
15 #include <IMP/core/Hierarchy.h>
16 #include <IMP/base/file.h>
17 #include <IMP/multifit/multifit_config.h>
18 IMPMULTIFIT_BEGIN_NAMESPACE
19 class ProteinRecordData {
20  public:
21  void reset_all() {
22  name_ = "";
23  start_res_ = 0;
24  end_res_ = 0;
25  filename_ = "";
26  surface_filename_ = "";
27  ref_filename_ = "";
28  }
29  ProteinRecordData() { reset_all(); }
30  ProteinRecordData(const std::string &name) {
31  reset_all();
32  name_ = name;
33  }
34  ProteinRecordData(const std::string &name, const std::string fn) {
35  reset_all();
36  name_ = name;
37  filename_ = fn;
38  }
39  ProteinRecordData(const std::string &name, int start_res, int end_res,
40  const std::string fn) {
41  reset_all();
42  name_ = name;
43  start_res_ = start_res;
44  end_res_ = end_res;
45  filename_ = fn;
46  }
47  ProteinRecordData(const std::string &name, int start_res, int end_res,
48  const std::string &fn, const std::string &surface_fn,
49  const std::string &ref_fn) {
50  reset_all();
51  name_ = name;
52  start_res_ = start_res;
53  end_res_ = end_res;
54  filename_ = fn;
55  surface_filename_ = surface_fn;
56  ref_filename_ = ref_fn;
57  }
58 
59  IMP_SHOWABLE_INLINE(ProteinRecordData, { out << name_; });
60 
61  std::string name_;
62  int start_res_, end_res_;
63  std::string filename_;
64  std::string surface_filename_;
65  std::string ref_filename_;
66 };
67 
68 IMP_VALUES(ProteinRecordData, ProteinRecordDataList);
69 
70 //! Storage of proteomics data.
71 class IMPMULTIFITEXPORT ProteomicsData : public base::Object {
72  protected:
73  public:
74  ProteomicsData() : Object("ProteomicsData%1%") {}
75  /** return the assigned index
76  */
77  int add_protein(std::string name, int start_res, int end_res,
78  const std::string &mol_fn, const std::string &surface_fn,
79  const std::string &ref_fn) {
80  prot_data_.push_back(ProteinRecordData(name, start_res, end_res, mol_fn,
81  surface_fn, ref_fn));
82  prot_map_[name] = prot_data_.size() - 1;
83  return prot_map_[name];
84  }
85  /** return the assigned index
86  */
87  int add_protein(const ProteinRecordData &rec) {
88  IMP_INTERNAL_CHECK(prot_map_.find(rec.name_) == prot_map_.end(),
89  "protein with name" << rec.name_
90  << " was added already");
91  prot_data_.push_back(rec);
92  prot_map_[rec.name_] = prot_data_.size() - 1;
93  return prot_map_[rec.name_];
94  }
95  // if not found -1 is returned
96  int find(const std::string &name) const {
97  if (prot_map_.find(name) == prot_map_.end()) return -1;
98  return prot_map_.find(name)->second;
99  }
100  void add_interaction(const Ints &ii, bool used_for_filter, float linker_len) {
101  interactions_.push_back(ii);
102  interaction_in_filter_.push_back(used_for_filter);
103  interaction_linker_len_.push_back(linker_len);
104  }
105  void add_cross_link_interaction(Int prot1, Int res1, Int prot2, Int res2,
106  bool used_in_filter, Float linker_len) {
107  xlinks_.push_back(
108  std::make_pair(IntPair(prot1, res1), IntPair(prot2, res2)));
109  xlink_in_filter_.push_back(used_in_filter);
110  xlink_len_.push_back(linker_len);
111  }
112  void add_ev_pair(Int prot1, Int prot2) {
113  ev_.push_back(std::make_pair(prot1, prot2));
114  }
115  int get_number_of_proteins() const { return prot_data_.size(); }
116  int get_number_of_interactions() const { return interactions_.size(); }
117  Ints get_interaction(int interaction_ind) const {
118  IMP_USAGE_CHECK(interaction_ind < (int)interactions_.size(),
119  "index out of range\n");
120  return interactions_[interaction_ind];
121  }
122  bool get_interaction_part_of_filter(int interaction_ind) const {
123  IMP_USAGE_CHECK(interaction_ind < (int)interaction_in_filter_.size(),
124  "index out of range\n");
125  return interaction_in_filter_[interaction_ind];
126  }
127  int get_interaction_linker_length(int interaction_ind) const {
128  IMP_USAGE_CHECK(interaction_ind < (int)interaction_linker_len_.size(),
129  "index out of range\n");
130  return interaction_linker_len_[interaction_ind];
131  }
132  int get_number_of_cross_links() const { return xlinks_.size(); }
133  std::pair<IntPair, IntPair> get_cross_link(int xlink_ind) const {
134  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
135  return xlinks_[xlink_ind];
136  }
137  bool get_cross_link_part_of_filter(int xlink_ind) const {
138  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
139  return xlink_in_filter_[xlink_ind];
140  }
141  float get_cross_link_length(int xlink_ind) const {
142  IMP_USAGE_CHECK(xlink_ind < (int)xlinks_.size(), "index out of range\n");
143  return xlink_len_[xlink_ind];
144  }
145 
146  //======== ev access functions
147  int get_number_of_ev_pairs() const { return ev_.size(); }
148  IntPair get_ev_pair(int ev_ind) const {
149  IMP_USAGE_CHECK(ev_ind < (int)ev_.size(), "index out of range\n");
150  return ev_[ev_ind];
151  }
152 
153  std::string get_protein_name(int protein_ind) const {
154  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
155  "index out of range\n");
156  return prot_data_[protein_ind].name_;
157  }
158  int get_end_res(int protein_ind) const {
159  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
160  "index out of range\n");
161  return prot_data_[protein_ind].end_res_;
162  }
163  int get_start_res(int protein_ind) const {
164  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
165  "index out of range\n");
166  return prot_data_[protein_ind].start_res_;
167  }
168  std::string get_protein_filename(int protein_ind) const {
169  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
170  "index out of range\n");
171  return prot_data_[protein_ind].filename_;
172  }
173  std::string get_reference_filename(int protein_ind) const {
174  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
175  "index out of range\n");
176  return prot_data_[protein_ind].ref_filename_;
177  }
178  std::string get_surface_filename(int protein_ind) const {
179  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
180  "index out of range\n");
181  return prot_data_[protein_ind].surface_filename_;
182  }
183  ProteinRecordData get_protein_data(int protein_ind) const {
184  IMP_USAGE_CHECK(protein_ind < (int)prot_data_.size(),
185  "index out of range\n");
186  return prot_data_[protein_ind];
187  }
188 
190  /*IMP_OBJECT_INLINE(ProteomicsData, {
191  out<<"Proteins:";
192  for(std::vector<ProteinRecordData>::const_iterator
193  it = prot_data_.begin(); it != prot_data_.end();it++){
194  out<<it->name_<<",";
195  }
196  out<<std::endl;
197  out<<"Interactions:"<<std::endl;
198  for(IntsList::const_iterator
199  it = interactions_.begin();it != interactions_.end();it++){
200  for(Ints::const_iterator it1 = it->begin();
201  it1 != it->end();it1++){
202  out<<prot_data_[*it1].name_<<",";
203  }
204  out<<std::endl;
205  } }, {});*/
206  int get_num_allowed_violated_interactions() const {
207  return num_allowed_violated_interactions_;
208  }
209  void set_num_allowed_violated_interactions(int n) {
210  num_allowed_violated_interactions_ = n;
211  }
212  int get_num_allowed_violated_cross_links() const {
213  return num_allowed_violated_xlinks_;
214  }
215  void set_num_allowed_violated_cross_links(int n) {
216  num_allowed_violated_xlinks_ = n;
217  }
218  int get_num_allowed_violated_ev() const { return num_allowed_violated_ev_; }
219  void set_num_allowed_violated_ev(int n) { num_allowed_violated_ev_ = n; }
220 
221  protected:
222  std::vector<ProteinRecordData> prot_data_;
223  std::map<std::string, int> prot_map_;
224  IntsList interactions_;
225  std::vector<bool> interaction_in_filter_; // for each interaction
226  Floats interaction_linker_len_; // for each interaction in interactions_
227  int num_allowed_violated_interactions_;
228  // decide if it used to build the JT or just for scoring
229  //=========residue cross links
230  std::vector<std::pair<IntPair, IntPair> > xlinks_;
231  std::vector<bool> xlink_in_filter_;
232  Floats xlink_len_;
233  int num_allowed_violated_xlinks_;
234  //=========
235  IntPairs ev_; // pairs of proteins to calculate EV between
236  int num_allowed_violated_ev_;
237 };
238 
239 //! Proteomics reader
240 /**
241 \todo consider using TextInput
242  */
243 IMPMULTIFITEXPORT ProteomicsData *read_proteomics_data(
244  const char *proteomics_fn);
245 IMPMULTIFITEXPORT
246 ProteomicsData *get_partial_proteomics_data(const ProteomicsData *pd,
247  const Strings &prot_names);
248 IMPMULTIFIT_END_NAMESPACE
249 #endif /* IMPMULTIFIT_PROTEOMICS_READER_H */
Decorator for helping deal with a hierarchy.
#define IMP_SHOWABLE_INLINE(Name, how_to_show)
Declare the methods needed by an object that can be printed.
Storage of proteomics data.
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
Definition: object_macros.h:25
IMP::base::Vector< String > Strings
Standard way to pass a bunch of String values.
Definition: types.h:51
Handling of file input/output.
int add_protein(std::string name, int start_res, int end_res, const std::string &mol_fn, const std::string &surface_fn, const std::string &ref_fn)
#define IMP_INTERNAL_CHECK(expr, message)
An assertion to check for internal errors in IMP. An IMP::ErrorException will be thrown.
Definition: check_macros.h:141
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
Definition: value_macros.h:23
Object(std::string name)
Construct an object with the given name.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
Storage of a model, its restraints, constraints and particles.
Common base class for heavy weight IMP objects.
Definition: Object.h:106
Simple 3D transformation class.
IMP::base::Vector< Ints > IntsList
Standard way to pass a bunch of Ints values.
Definition: types.h:56
IMP::base::Vector< Float > Floats
Standard way to pass a bunch of Float values.
Definition: types.h:47
IMP::base::Vector< IntPair > IntPairs
Definition: types.h:41
double Float
Basic floating-point value (could be float, double...)
Definition: types.h:20
#define IMP_USAGE_CHECK(expr, message)
A runtime test for incorrect usage of a class or method.
Definition: check_macros.h:170
int Int
Basic integer value.
Definition: types.h:35
int add_protein(const ProteinRecordData &rec)
Sample best solutions using Domino.
IMP::base::Vector< Int > Ints
Standard way to pass a bunch of Int values.
Definition: types.h:49