IMP  2.1.0
The Integrative Modeling Platform
proteomics_reader.h
Go to the documentation of this file.
1 /**
2  * \file IMP/multifit/proteomics_reader.h
3  * \brief handles reading of proteomics data
4  *
5  * Copyright 2007-2013 IMP Inventors. All rights reserved.
6  *
7  */
8 
9 #ifndef IMPMULTIFIT_PROTEOMICS_READER_H
10 #define IMPMULTIFIT_PROTEOMICS_READER_H
11 
14 #include <IMP/kernel/Model.h>
15 #include <IMP/core/Hierarchy.h>
16 #include <IMP/file.h>
17 #include <IMP/multifit/multifit_config.h>
18 IMPMULTIFIT_BEGIN_NAMESPACE
19 class ProteinRecordData {
20 public:
21  void reset_all(){
22  name_="";
23  start_res_=0;
24  end_res_=0;
25  filename_="";
26  surface_filename_="";
27  ref_filename_="";
28  }
29  ProteinRecordData() {reset_all();
30  }
31  ProteinRecordData(const std::string &name){
32  reset_all();
33  name_=name;
34  }
35  ProteinRecordData(const std::string &name,const std::string fn){
36  reset_all();
37  name_=name;filename_=fn;
38  }
39  ProteinRecordData(const std::string &name,
40  int start_res,int end_res,const std::string fn){
41  reset_all();
42  name_=name;
43  start_res_=start_res;
44  end_res_=end_res;
45  filename_=fn;
46  }
47  ProteinRecordData(const std::string &name,
48  int start_res,int end_res,const std::string &fn,
49  const std::string &surface_fn,
50  const std::string &ref_fn){
51  reset_all();
52  name_=name;
53  start_res_=start_res;
54  end_res_=end_res;
55  filename_=fn;
56  surface_filename_=surface_fn;
57  ref_filename_=ref_fn;
58  }
59 
60  IMP_SHOWABLE_INLINE(ProteinRecordData, { out << name_; });
61 
62  std::string name_;
63  int start_res_,end_res_;
64  std::string filename_;
65  std::string surface_filename_;
66  std::string ref_filename_;
67  };
68 
69 IMP_VALUES(ProteinRecordData, ProteinRecordDataList);
70 
71 //! Storage of proteomics data.
72 class IMPMULTIFITEXPORT ProteomicsData : public base::Object {
73  protected:
74  public:
75  ProteomicsData() : Object("ProteomicsData%1%") {}
76  /** return the assigned index
77  */
78  int add_protein(std::string name,int start_res,
79  int end_res,const std::string &mol_fn,
80  const std::string &surface_fn,
81  const std::string &ref_fn){
82  prot_data_.push_back(ProteinRecordData(name,start_res,end_res,mol_fn,
83  surface_fn,ref_fn));
84  prot_map_[name]=prot_data_.size()-1;
85  return prot_map_[name];
86  }
87  /** return the assigned index
88  */
89  int add_protein(const ProteinRecordData &rec) {
90  IMP_INTERNAL_CHECK(prot_map_.find(rec.name_)==prot_map_.end(),
91  "protein with name"<<rec.name_<<" was added already");
92  prot_data_.push_back(rec);
93  prot_map_[rec.name_]=prot_data_.size()-1;
94  return prot_map_[rec.name_];
95  }
96  //if not found -1 is returned
97  int find(const std::string &name) const {
98  if (prot_map_.find(name) == prot_map_.end()) return -1;
99  return prot_map_.find(name)->second;
100  }
101  void add_interaction(const Ints &ii,bool used_for_filter,
102  float linker_len) {
103  interactions_.push_back(ii);
104  interaction_in_filter_.push_back(used_for_filter);
105  interaction_linker_len_.push_back(linker_len);
106  }
107  void add_cross_link_interaction(Int prot1,Int res1,Int prot2,Int res2,
108  bool used_in_filter,Float linker_len){
109  xlinks_.push_back(std::make_pair(IntPair(prot1,res1),IntPair(prot2,res2)));
110  xlink_in_filter_.push_back(used_in_filter);
111  xlink_len_.push_back(linker_len);
112  }
113  void add_ev_pair(Int prot1,Int prot2){
114  ev_.push_back(std::make_pair(prot1,prot2));
115  }
116  int get_number_of_proteins() const {return prot_data_.size();}
117  int get_number_of_interactions() const {return interactions_.size();}
118  Ints get_interaction(int interaction_ind) const {
119  IMP_USAGE_CHECK(interaction_ind<(int)interactions_.size(),
120  "index out of range\n");
121  return interactions_[interaction_ind];}
122  bool get_interaction_part_of_filter(int interaction_ind) const {
123  IMP_USAGE_CHECK(interaction_ind<(int)interaction_in_filter_.size(),
124  "index out of range\n");
125  return interaction_in_filter_[interaction_ind];
126  }
127  int get_interaction_linker_length(int interaction_ind) const {
128  IMP_USAGE_CHECK(interaction_ind<(int)interaction_linker_len_.size(),
129  "index out of range\n");
130  return interaction_linker_len_[interaction_ind];
131  }
132  int get_number_of_cross_links() const {return xlinks_.size();}
133  std::pair<IntPair,IntPair> get_cross_link(int xlink_ind) const {
134  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
135  "index out of range\n");
136  return xlinks_[xlink_ind];
137  }
138  bool get_cross_link_part_of_filter(int xlink_ind) const {
139  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
140  "index out of range\n");
141  return xlink_in_filter_[xlink_ind];
142  }
143  float get_cross_link_length(int xlink_ind) const {
144  IMP_USAGE_CHECK(xlink_ind<(int)xlinks_.size(),
145  "index out of range\n");
146  return xlink_len_[xlink_ind];
147  }
148 
149  //======== ev access functions
150  int get_number_of_ev_pairs() const {return ev_.size();}
151  IntPair get_ev_pair(int ev_ind) const {
152  IMP_USAGE_CHECK(ev_ind<(int)ev_.size(),
153  "index out of range\n");
154  return ev_[ev_ind];
155  }
156 
157 
158  std::string get_protein_name(int protein_ind) const {
159  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
160  "index out of range\n");
161  return prot_data_[protein_ind].name_;
162  }
163  int get_end_res(int protein_ind) const {
164  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
165  "index out of range\n");
166  return prot_data_[protein_ind].end_res_;
167  }
168  int get_start_res(int protein_ind) const {
169  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
170  "index out of range\n");
171  return prot_data_[protein_ind].start_res_;
172  }
173  std::string get_protein_filename(int protein_ind) const {
174  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
175  "index out of range\n");
176  return prot_data_[protein_ind].filename_;
177  }
178  std::string get_reference_filename(int protein_ind) const {
179  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
180  "index out of range\n");
181  return prot_data_[protein_ind].ref_filename_;
182  }
183  std::string get_surface_filename(int protein_ind) const {
184  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),
185  "index out of range\n");
186  return prot_data_[protein_ind].surface_filename_;
187  }
188  ProteinRecordData get_protein_data(int protein_ind) const {
189  IMP_USAGE_CHECK(protein_ind<(int)prot_data_.size(),"index out of range\n");
190  return prot_data_[protein_ind];}
191 
193  /*IMP_OBJECT_INLINE(ProteomicsData, {
194  out<<"Proteins:";
195  for(std::vector<ProteinRecordData>::const_iterator
196  it = prot_data_.begin(); it != prot_data_.end();it++){
197  out<<it->name_<<",";
198  }
199  out<<std::endl;
200  out<<"Interactions:"<<std::endl;
201  for(IntsList::const_iterator
202  it = interactions_.begin();it != interactions_.end();it++){
203  for(Ints::const_iterator it1 = it->begin();
204  it1 != it->end();it1++){
205  out<<prot_data_[*it1].name_<<",";
206  }
207  out<<std::endl;
208  } }, {});*/
209  int get_num_allowed_violated_interactions() const {
210  return num_allowed_violated_interactions_;}
211  void set_num_allowed_violated_interactions(int n) {
212  num_allowed_violated_interactions_=n;}
213  int get_num_allowed_violated_cross_links() const {
214  return num_allowed_violated_xlinks_;}
215  void set_num_allowed_violated_cross_links(int n) {
216  num_allowed_violated_xlinks_=n;}
217  int get_num_allowed_violated_ev() const {
218  return num_allowed_violated_ev_;}
219  void set_num_allowed_violated_ev(int n) {
220  num_allowed_violated_ev_=n;}
221 
222  protected:
223  std::vector<ProteinRecordData> prot_data_;
224  std::map<std::string,int> prot_map_;
225  IntsList interactions_;
226  std::vector<bool> interaction_in_filter_;//for each interaction
227  Floats interaction_linker_len_;//for each interaction in interactions_
228  int num_allowed_violated_interactions_;
229  //decide if it used to build the JT or just for scoring
230  //=========residue cross links
231  std::vector<std::pair<IntPair,IntPair> > xlinks_;
232  std::vector<bool> xlink_in_filter_;
233  Floats xlink_len_;
234  int num_allowed_violated_xlinks_;
235  //=========
236  IntPairs ev_; //pairs of proteins to calcualte EV between
237  int num_allowed_violated_ev_;
238 };
239 
240 //! Proteomics reader
241 /**
242 \todo consider using TextInput
243  */
244 IMPMULTIFITEXPORT ProteomicsData *read_proteomics_data(
245  const char *proteomics_fn);
246 IMPMULTIFITEXPORT
247 ProteomicsData *get_partial_proteomics_data(
248  const ProteomicsData *pd,
249  const Strings &prot_names);
250 IMPMULTIFIT_END_NAMESPACE
251 #endif /* IMPMULTIFIT_PROTEOMICS_READER_H */
Decorator for helping deal with a hierarchy.
Storage of proteomics data.
#define IMP_VALUES(Name, PluralName)
Define the type for storing sets of values.
IMP::base::Vector< String > Strings
Standard way to pass a bunch of String values.
Definition: base/types.h:51
#define IMP_SHOWABLE_INLINE(Name, how_to_show)
Declare the methods needed by an object that can be printed.
Import IMP/kernel/file.h in the namespace.
int add_protein(std::string name, int start_res, int end_res, const std::string &mol_fn, const std::string &surface_fn, const std::string &ref_fn)
#define IMP_USAGE_CHECK(expr, message)
A runtime test for incorrect usage of a class or method.
#define IMP_INTERNAL_CHECK(expr, message)
An assertion to check for internal errors in IMP. An IMP::ErrorException will be thrown.
#define IMP_OBJECT_METHODS(Name)
Define the basic things needed by any Object.
ProteomicsData * read_proteomics_data(const char *proteomics_fn)
Proteomics reader.
Storage of a model, its restraints, constraints and particles.
Common base class for heavy weight IMP objects.
Simple 3D transformation class.
IMP::base::Vector< Ints > IntsList
Standard way to pass a bunch of Ints values.
Definition: base/types.h:56
IMP::base::Vector< Float > Floats
Standard way to pass a bunch of Float values.
Definition: base/types.h:47
IMP::base::Vector< IntPair > IntPairs
Definition: base/types.h:41
double Float
Basic floating-point value (could be float, double...)
Definition: base/types.h:20
int Int
Basic integer value.
Definition: base/types.h:35
int add_protein(const ProteinRecordData &rec)
A beyesian infererence-based sampler.
IMP::base::Vector< Int > Ints
Standard way to pass a bunch of Int values.
Definition: base/types.h:49