IMP logo
IMP Reference Guide  develop.031dafb4d2,2024/05/16
The Integrative Modeling Platform
composition_scoring.py
1 """@namespace IMP.spatiotemporal.composition_scoring
2  Functions for weighting graphNode objects based on stoichiometry data.
3 """
4 import numpy as np
5 import warnings
6 import os
7 
8 
9 def get_state(subcomplex_components, prot):
10  """
11  function to calculate how many times a protein appears in a list of
12  proteins, which can be accessed from a graphNode object using
13  node.get_subcomplex_components()
14 
15  @param subcomplex_components: subcomplexes or components in a given node,
16  which can be accessed by graphNode.get_subcomplex_components()
17  @param prot: string, protein or subcomplex we are interested in finding
18  @return state, int, number of times the protein or subcomplex appears
19  in subcomplex_components
20  """
21  state = 0
22  for subcomplex in subcomplex_components:
23  if prot in subcomplex:
24  state += 1
25  return state
26 
27 
28 def composition_likelihood_function(mean, std, prots, node):
29  """Function that calculates the likelihood of an individual node, used by
30  calc_likelihood().
31 
32  @param mean: dictionary of dictionaries where the first key is the protein,
33  the second key is the time, and the expected mean copy number
34  from experiment is returned.
35  @param std: dictionary of dictionaries where the first key is the protein,
36  the second key is the time, and the expected standard deviation
37  of protein copy number from experiment is returned.
38  @param prots: list of proteins or subcomplexes which will be scored
39  according to this likelihood function
40  @param node: the graphNode object for which the likelihood will be
41  calculated.
42  @return w: float, the weight of the graphNode according to the composition
43  likelihood function.
44  """
45  # get time
46  t = node.get_time()
47  w = 0
48  for prot in prots:
49  # x counts the number of proteins of a given type in the node
50  x = get_state(node.get_subcomplex_components(), prot)
51  # check std is greater than 0
52  if std[prot][t] > 0:
53  pass
54  else:
55  warnings.warn(
56  'WARNING!!! Standard deviation of protein ' + prot
57  + ' 0 or less at time ' + t
58  + '. May lead to illogical results.')
59  w += (0.5 * ((x - mean[prot][t]) / std[prot][t])**2
60  + np.log(std[prot][t] * np.sqrt(2 * np.pi)))
61  return w
62 
63 
64 def calc_likelihood(exp_comp_map, nodes):
65  """
66  Function that adds a score for the compositional likelihood for all
67  states represented as nodes in the graph. The composition likelihood
68  assumes a Gaussian distribution for copy number of each protein or
69  subcomplex with means and standard deviatiations derived from experiment.
70  Returns the nodes, with the new weights added.
71 
72  @param exp_comp_map: dictionary, which describes protein stoicheometery.
73  The key describes the protein, which should correspond to names
74  within the expected_subcomplexes. Only copy numbers for proteins
75  or subcomplexes included in this dictionary will be scored. For
76  each of these proteins, a csv file should be provided with protein
77  copy number data. The csv file should have 3 columns,
78  1) "Time", which matches up to the possible times in the graph,
79  2) "mean", the average protein copy number at that time point
80  from experiment, and 3) "std", the standard deviation of that
81  protein copy number from experiment.
82  @param nodes: list of graphNode objects, which have been already been
83  initiated with static scores
84  @return nodes: editted list of graphNode objects, which now have static
85  and composition scores
86  """
87  import pandas as pd
88  # Get list of all all proteins
89  prots = list(exp_comp_map.keys())
90  # Data is stored as a dictionary of dictionaries. The first dictionary
91  # references which protein you are refering to.
92  # the 2nd dictionary references which time you are refering to. The return
93  # is the mean or standard deviation of the protein copy number
94  mean = {}
95  std = {}
96  # import csv file as pandas data frame
97  for prot in prots:
98  prot_dict_mean = {}
99  prot_dict_std = {}
100  if os.path.exists(exp_comp_map[prot]):
101  exp = pd.read_csv(exp_comp_map[prot])
102  else:
103  raise Exception(
104  "Error!!! Check exp_comp_map. Unable to find composition "
105  "file: " + exp_comp_map[prot] + '\nClosing...')
106  for i in range(len(exp)):
107  prot_dict_mean[exp['Time'][i]] = exp['mean'][i]
108  prot_dict_std[exp['Time'][i]] = exp['std'][i]
109  mean[prot] = prot_dict_mean
110  std[prot] = prot_dict_std
111  # loop over all nodes and calculate the likelihood for each noe
112  for node in nodes:
113  # compute the compositional likelihood of the nodes
114  weight = composition_likelihood_function(mean, std, prots, node)
115  # add state weight to node
116  node.add_score(float(weight))
117  return nodes
def composition_likelihood_function
Function that calculates the likelihood of an individual node, used by calc_likelihood().
def get_state
function to calculate how many times a protein appears in a list of proteins, which can be accessed f...
def calc_likelihood
Function that adds a score for the compositional likelihood for all states represented as nodes in th...
The general base class for IMP exceptions.
Definition: exception.h:48