IMP  2.0.0
The Integrative Modeling Platform
proteomics_reader.h
Go to the documentation of this file.
1 /**
2  * \file IMP/multifit/proteomics_reader.h
3  * \brief handles reading of proteomics data
4  *
5  * Copyright 2007-2013 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPMULTIFIT_PROTEOMICS_READER_H
10 #define IMPMULTIFIT_PROTEOMICS_READER_H
11 
14 #include <IMP/Model.h>
15 #include <IMP/core/Hierarchy.h>
16 #include <IMP/file.h>
17 #include <IMP/multifit/multifit_config.h>
18 IMPMULTIFIT_BEGIN_NAMESPACE
19 class ProteinRecordData {
20 public:
21  void reset_all(){
22  name_="";
23  start_res_=0;
24  end_res_=0;
25  filename_="";
26  surface_filename_="";
27  ref_filename_="";
28  }
29  ProteinRecordData() {reset_all();
30  }
31  ProteinRecordData(const std::string &name){
32  reset_all();
33  name_=name;
34  }
35  ProteinRecordData(const std::string &name,const std::string fn){
36  reset_all();
37  name_=name;filename_=fn;
38  }
39  ProteinRecordData(const std::string &name,
40  int start_res,int end_res,const std::string fn){
41  reset_all();
42  name_=name;
43  start_res_=start_res;
44  end_res_=end_res;
45  filename_=fn;
46  }
47  ProteinRecordData(const std::string &name,
48  int start_res,int end_res,const std::string &fn,
49  const std::string &surface_fn,
50  const std::string &ref_fn){
51  reset_all();
52  name_=name;
53  start_res_=start_res;
54  end_res_=end_res;
55  filename_=fn;
56  surface_filename_=surface_fn;
57  ref_filename_=ref_fn;
58  }
59 
60  IMP_SHOWABLE_INLINE(ProteinRecordData, { out << name_; });
61 
62  std::string name_;
63  int start_res_,end_res_;
64  std::string filename_;
65  std::string surface_filename_;
66  std::string ref_filename_;
67  };
68 
69 IMP_VALUES(ProteinRecordData, ProteinRecordDataList);
70 
71 class IMPMULTIFITEXPORT ProteomicsData : public base::Object {
72  protected:
73  public:
74  ProteomicsData() : Object("ProteomicsData%1%") {}
75  /** return the assigned index
76  */
77  int add_protein(std::string name,int start_res,
78  int end_res,const std::string &mol_fn,
79  const std::string &surface_fn,
80  const std::string &ref_fn){
81  prot_data_.push_back(ProteinRecordData(name,start_res,end_res,mol_fn,
82  surface_fn,ref_fn));
83  prot_map_[name]=prot_data_.size()-1;
84  return prot_map_[name];
85  }
86  /** return the assigned index
87  */
88  int add_protein(const ProteinRecordData &rec) {
89  IMP_INTERNAL_CHECK(prot_map_.find(rec.name_)==prot_map_.end(),
90  "protein with name"<<rec.name_<<" was added already");
91  prot_data_.push_back(rec);
92  prot_map_[rec.name_]=prot_data_.size()-1;
93  return prot_map_[rec.name_];
94  }
95  //if not found -1 is returned
96  int find(const std::string &name) const {
97  if (prot_map_.find(name) == prot_map_.end()) return -1;
98  return prot_map_.find(name)->second;
99  }
100  void add_interaction(const Ints &ii,bool used_for_filter,
101  float linker_len) {
102  interactions_.push_back(ii);
103  interaction_in_filter_.push_back(used_for_filter);
104  interaction_linker_len_.push_back(linker_len);
105  }
106  void add_cross_link_interaction(Int prot1,Int res1,Int prot2,Int res2,
107  bool used_in_filter,Float linker_len){
108  xlinks_.push_back(std::make_pair(IntPair(prot1,res1),IntPair(prot2,res2)));
109  xlink_in_filter_.push_back(used_in_filter);
110  xlink_len_.push_back(linker_len);
111  }
112  void add_ev_pair(Int prot1,Int prot2){
113  ev_.push_back(std::make_pair(prot1,prot2));
114  }
115  int get_number_of_proteins() const {return prot_data_.size();}
116  int get_number_of_interactions() const {return interactions_.size();}
117  Ints get_interaction(int interaction_ind) const {
118  IMP_USAGE_CHECK(interaction_ind<(int)interactions_.size(),
119  "index out of range\n");
120  return interactions_[interaction_ind];}
121  bool get_interaction_part_of_filter(int interaction_ind) const {
122  IMP_USAGE_CHECK(interaction_ind<(int)interaction_in_filter_.size(),
123  "index out of range\n");
124  return interaction_in_filter_[interaction_ind];
125  }
126  int get_interaction_linker_length(int interaction_ind) const {
127  IMP_USAGE_CHECK(interaction_ind<(int)interaction_linker_len_.size(),
128  "index out of range\n");
129  return interaction_linker_len_[interaction_ind];
130  }
131  int get_number_of_cross_links() const {return xlinks_.size();}
132  std::pair<IntPair,IntPair> get_cross_link(int xlink_ind) const {
133  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
134  "index out of range\n");
135  return xlinks_[xlink_ind];
136  }
137  bool get_cross_link_part_of_filter(int xlink_ind) const {
138  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
139  "index out of range\n");
140  return xlink_in_filter_[xlink_ind];
141  }
142  float get_cross_link_length(int xlink_ind) const {
143  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
144  "index out of range\n");
145  return xlink_len_[xlink_ind];
146  }
147 
148  //======== ev access functions
149  int get_number_of_ev_pairs() const {return ev_.size();}
150  IntPair get_ev_pair(int ev_ind) const {
151  IMP_USAGE_CHECK(ev_ind<(int)ev_.size(),
152  "index out of range\n");
153  return ev_[ev_ind];
154  }
155 
156 
157  std::string get_protein_name(int protein_ind) const {
158  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
159  "index out of range\n");
160  return prot_data_[protein_ind].name_;
161  }
162  int get_end_res(int protein_ind) const {
163  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
164  "index out of range\n");
165  return prot_data_[protein_ind].end_res_;
166  }
167  int get_start_res(int protein_ind) const {
168  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
169  "index out of range\n");
170  return prot_data_[protein_ind].start_res_;
171  }
172  std::string get_protein_filename(int protein_ind) const {
173  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
174  "index out of range\n");
175  return prot_data_[protein_ind].filename_;
176  }
177  std::string get_reference_filename(int protein_ind) const {
178  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
179  "index out of range\n");
180  return prot_data_[protein_ind].ref_filename_;
181  }
182  std::string get_surface_filename(int protein_ind) const {
183  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
184  "index out of range\n");
185  return prot_data_[protein_ind].surface_filename_;
186  }
187  ProteinRecordData get_protein_data(int protein_ind) const {
188  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),"index out of range\n");
189  return prot_data_[protein_ind];}
190 
191  IMP_OBJECT_INLINE(ProteomicsData, {
192  out<<"Proteins:";
193  for(std::vector<ProteinRecordData>::const_iterator
194  it = prot_data_.begin(); it != prot_data_.end();it++){
195  out<<it->name_<<",";
196  }
197  out<<std::endl;
198  out<<"Interactions:"<<std::endl;
199  for(IntsList::const_iterator
200  it = interactions_.begin();it != interactions_.end();it++){
201  for(Ints::const_iterator it1 = it->begin();
202  it1 != it->end();it1++){
203  out<<prot_data_[*it1].name_<<",";
204  }
205  out<<std::endl;
206  } }, {});
207  int get_num_allowed_violated_interactions() const {
208  return num_allowed_violated_interactions_;}
209  void set_num_allowed_violated_interactions(int n) {
210  num_allowed_violated_interactions_=n;}
211  int get_num_allowed_violated_cross_links() const {
212  return num_allowed_violated_xlinks_;}
213  void set_num_allowed_violated_cross_links(int n) {
214  num_allowed_violated_xlinks_=n;}
215  int get_num_allowed_violated_ev() const {
216  return num_allowed_violated_ev_;}
217  void set_num_allowed_violated_ev(int n) {
218  num_allowed_violated_ev_=n;}
219 
220  protected:
221  std::vector<ProteinRecordData> prot_data_;
222  std::map<std::string,int> prot_map_;
223  IntsList interactions_;
224  std::vector<bool> interaction_in_filter_;//for each interaction
225  Floats interaction_linker_len_;//for each interaction in interactions_
226  int num_allowed_violated_interactions_;
227  //decide if it used to build the JT or just for scoring
228  //=========residue cross links
229  std::vector<std::pair<IntPair,IntPair> > xlinks_;
230  std::vector<bool> xlink_in_filter_;
231  Floats xlink_len_;
232  int num_allowed_violated_xlinks_;
233  //=========
234  IntPairs ev_; //pairs of proteins to calcualte EV between
235  int num_allowed_violated_ev_;
236 };
237 
238 //! Proteomics reader
239 /**
240 \todo consider using TextInput
241  */
242 IMPMULTIFITEXPORT ProteomicsData *read_proteomics_data(
243  const char *proteomics_fn);
244 IMPMULTIFITEXPORT
245 ProteomicsData *get_partial_proteomics_data(
246  const ProteomicsData *pd,
247  const Strings &prot_names);
248 IMPMULTIFIT_END_NAMESPACE
249 #endif /* IMPMULTIFIT_PROTEOMICS_READER_H */