7 from statsmodels.distributions.empirical_distribution
import ECDF
8 from statsmodels.distributions.empirical_distribution
import StepFunction
15 def compute_cdf(dmap, exclude_zero=True):
19 for i
in range(dmap.get_number_of_voxels()):
20 val = dmap.get_value(i)
21 if not exclude_zero
or val != 0:
24 raise ValueError(
"The input density map contains no valid "
30 def compute_inverse_cdf(dmap, rn=(0, 1), exclude_zero=
True):
33 f = compute_cdf(dmap, exclude_zero=exclude_zero)
34 x = np.arange(rn[0], rn[1], 0.0001)
36 fi = StepFunction(y, x)
40 def compare_stats_hist(ref, exp):
42 expcdf = compute_cdf(exp)
43 refinvcdf = compute_inverse_cdf(ref)
44 refcdf = compute_cdf(ref)
45 emax = exp.get_max_value()
46 emin = exp.get_min_value()
47 print(
"Val ecdf refval refcdf")
48 for i
in np.arange(emin, emax, 0.01):
50 refval = refinvcdf(cc)
51 print(i, cc, refval, refinvcdf(refval))
53 print(
"Val ecdf refval refcdf")
54 for i
in np.arange(0, 1, 0.01):
55 print(i, refcdf(i), refinvcdf(i))
58 def convert_map_to_ndarray(dmap):
60 for i
in range(dmap.get_number_of_voxels()):
61 val = dmap.get_value(i)
63 map_array = np.array(vals)
67 def hist_match(source_map, template_map):
69 Adjust the pixel values of a Map such that its histogram
70 matches that of a target Map
74 source: IMP.map object
75 Map to transform; the histogram is computed over the flattened
77 template: IMP.map object
78 Template map; can have different dimensions to source
81 source = convert_map_to_ndarray(source_map)
82 template = convert_map_to_ndarray(template_map)
86 s_values, bin_idx, s_counts = np.unique(source, return_inverse=
True,
88 t_values, t_counts = np.unique(template, return_counts=
True)
93 s_quantiles = np.cumsum(s_counts).astype(np.float64)
94 s_quantiles /= s_quantiles[-1]
95 t_quantiles = np.cumsum(t_counts).astype(np.float64)
96 t_quantiles /= t_quantiles[-1]
98 of = open(
"hmatch_new.dat",
"w")
100 for i
in range(source_map.get_number_of_voxels()):
101 expval = source_map.get_value(i)
103 id_ = np.where(s_values == expval)
107 interp_t_value = np.interp(s_quantiles[id_], t_quantiles, t_values)
108 of.write(str(i)+
" "+str(expval)+
" "+str(interp_t_value[0])+
"\n")
109 source_map.set_value(i, interp_t_value)
112 def histogram_match_map(ref, exp, em_map, df=None):
114 Match the histogram of exp to ref
117 expcdf = compute_cdf(exp)
120 refinvcdf = compute_inverse_cdf(ref)
121 refcdf = compute_cdf(ref)
124 of = open(os.path.join(os.path.dirname(em_map),
"hmatch.dat"),
"w")
125 for i
in range(exp.get_number_of_voxels()):
126 expval = exp.get_value(i)
129 expc = expcdf(expval)
131 refval = refinvcdf(expc)
134 of.write(str(i)+
" "+str(expval)+
" "+str(expc)+
" "
135 + str(refval)+
" "+str(refcdf(refval))+
"\n")
136 exp.set_value(i, refval)
139 def get_percentile_of_value(dmap, value, thresh=-100000):
145 for i
in range(dmap.get_number_of_voxels()):
146 val = dmap.get_value(i)
148 dmap_vals.append(val)
149 pct = (np.array(dmap_vals) < value).sum() / len(dmap_vals)
153 parser = argparse.ArgumentParser(
154 description=
'Given an EMDB id, normalize the density map using '
155 'histogram matching')
156 parser.add_argument(
'emdb', metavar=
'emdb', type=str,
157 help=
'Provide an EMDB id')
158 parser.add_argument(
'--thresh', metavar=
'thresh', type=str,
159 help=
'map threshold value')
161 "--database_home", dest=
"database_home", type=str,
162 help=
"Directory containing all data files used in the protocol",
165 "--reference_map", dest=
"reference_map", type=str,
166 help=
"reference map for voxel data extraction",
167 default=
"reference/ref.mrc")
169 args = parser.parse_args()
170 curr_dir = os.getcwd()
174 em_map = os.path.join(args.database_home, str(emdb),
"0system",
"emdb.map")
175 norm_em_map = os.path.join(args.database_home, str(emdb),
"0system",
176 "emdb_normalized_new.map")
177 pdb_file = os.path.join(args.database_home, str(emdb),
"0system",
179 data_file = os.path.join(args.database_home, str(emdb),
"0system",
180 "normalization_new.dat")
183 if not os.path.exists(em_map):
184 print(
"NO em map; check path", em_map)
187 df = open(data_file,
"w")
188 df.write(
"voxel exp_val exp_cdf ref_val ref_cdf")
199 dmap_c = dmap.get_cropped(particles, 14.0)
202 xml = config.get_xml(emdb)
203 thresh = tools.get_threshold_from_xml(xml)
206 thresh = float(args.thresh)
207 print(
"thresh value {} for the emdb {}".format(str(thresh), emdb))
213 histogram_match_map(ref_dmap, dmap_t, em_map, df=df)
double get_mass_from_number_of_residues(unsigned int num_aa)
Estimate the mass of a protein from the number of amino acids.
DensityMap * get_threshold_map(const DensityMap *orig_map, float threshold)
Return a map with 0 for all voxels below the threshold.
void read_pdb(TextInput input, int model, Hierarchy h)
Class for storing model, its restraints, constraints, and particles.
Basic utilities for handling cryo-electron microscopy 3D density maps.
Select all CA ATOM records.
Select hierarchy particles identified by the biological name.