IMP  2.1.1
The Integrative Modeling Platform
solutions_io.py
1 """@namespace IMP.em2d.solutions_io
2  Utility functions to store and retrieve solution information.
3 """
4 
5 import IMP.em2d.imp_general.io as io
6 import IMP.em2d.Database as Database
7 
8 import sys
9 import heapq
10 import math
11 import os
12 import csv
13 import time
14 import logging
15 import glob
16 import numpy as np
17 
18 try:
19  set = set
20 except NameError:
21  from sets import Set as set
22 
23 log = logging.getLogger("solutions_io")
24 
25 unit_delim = "/" # separate units within a field (eg, reference frames).
26 field_delim = ","
27 
28 class ClusterRecord(tuple):
29  """Simple named tuple class"""
30 
31  class _itemgetter(object):
32  def __init__(self, ind):
33  self.__ind = ind
34  def __call__(self, obj):
35  return obj[self.__ind]
36 
37  def __init__(self, iterable):
38  if len(iterable) != self.__n_fields:
39  raise TypeError("Expected %d arguments, got %d" \
40  % (self.__n_fields, len(iterable)))
41  tuple.__init__(self, iterable)
42 
43  __n_fields = 5
44  cluster_id = property(_itemgetter(0))
45  n_elements = property(_itemgetter(1))
46  representative = property(_itemgetter(2))
47  elements = property(_itemgetter(3))
48  solutions_ids = property(_itemgetter(4))
49 
50 
51 #################################
52 
53 # INPUT/OUTPUT OF SOLUTIONS OBTAINED WITH DominoModel
54 
55 #################################
56 
57 class HeapRecord(tuple):
58  """
59  The heapq algorithm is a min-heap. I want a max-heap, that pops the
60  larger values out of the heap.
61  For that I have to modify the comparison function and also set the
62  index that is used for the comparison. The index corresponds to
63  the restraint that we desired to order by
64  """
65  def __new__(self,x,i):
66  """
67  Build from a tuple and the index used to compare
68  """
69  self.i = i
70  return tuple.__new__(self, x)
71 
72  def __lt__(self, other):
73  """
74  Compare. To convert the min-heap into a max-heap, the lower than
75  comparison is transformed into a greater-than
76  """
77  i = self.i
78  if(self[i] > other[i]):
79  return True
80  return False
81 
82  # Need __le__ as well for older Pythons
83  def __le__(self, other):
84  i = self.i
85  return self[i] >= other[i]
86 
87 
88 def gather_best_solution_results(fns, fn_output, max_number=50000,
89  raisef=0.1, orderby="em2d"):
90  """
91  Reads a set of database files and merge them into a single file.
92 
93  @param fns List of files with databases
94  @param fn_output The database to create
95  @param max_number Maximum number of records to keep, sorted according
96  to orderby
97  @param raisef Ratio of problematic database files tolerated before
98  raising an error. This option is to tolerate some files
99  of the databases being broken because the cluster fails,
100  fill the disks, etc
101  @param orderby Criterium used to sort the the records
102  NOTE:
103  Makes sure to reorder all column names if neccesary before merging
104  The record for the native solution is only added once (from first file).
105  """
106  tbl = "results"
107  # Get names and types of the columns from first database file
108  db = Database.Database2()
109  db.connect(fns[0])
110  names = db.get_table_column_names(tbl)
111  types = db.get_table_types(tbl)
112  indices = get_sorting_indices(names)
113  sorted_names = [ names[i] for i in indices]
114  sorted_types = [ types[i] for i in indices]
115 
116  names.sort()
117  ind = names.index(orderby)
118  they_are_sorted = field_delim.join(names)
119  # Get the native structure data from the first database
120  sql_command = """SELECT %s FROM %s
121  WHERE assignment="native" LIMIT 1 """ % (they_are_sorted, tbl)
122  native_data = db.retrieve_data(sql_command)
123  db.close()
124  log.info("Gathering results. Saving to %s", fn_output)
125  out_db = Database.Database2()
126  out_db.create(fn_output, overwrite=True)
127  out_db.connect(fn_output)
128  out_db.create_table(tbl, sorted_names, sorted_types)
129 
130  best_records = []
131  n_problems = 0
132  for fn in fns:
133  try:
134  log.info("Reading %s",fn)
135  db.connect(fn)
136 # log.debug("Retrieving %s", they_are_sorted)
137  sql_command = """SELECT %s FROM %s
138  WHERE assignment<>"native"
139  ORDER BY %s ASC LIMIT %s """ % (
140  they_are_sorted, tbl,orderby, max_number)
141  data = db.retrieve_data(sql_command)
142  log.info("%s records read from %s",len(data), fn)
143  db.close()
144  # Fill heap
145  for d in data:
146  a = HeapRecord(d, ind)
147  if(len(best_records) < max_number):
148  heapq.heappush(best_records, a)
149  else:
150  # remember that < here compares for greater em2d value,
151  # as a HeapRecord is used
152  if(best_records[0] < a):
153  heapq.heapreplace(best_records, a)
154  except Exception, e:
155  log.error("Error for %s: %s",fn, e)
156  n_problems += 1
157 
158  # If the number of problematic files is too high, report that something
159  # big is going on. Otherwise tolerate some errors from some tasks that
160  # failed (memory errors, locks, writing errors ...)
161  ratio = float(n_problems)/float(len(fns))
162  if ratio > raisef:
163  raise IOError("There are %8.1f %s of the database "\
164  "files to merge with problems! " % (ratio*100,"%"))
165  # append the native data to the best_records
166  heapq.heappush(best_records, native_data[0])
167  out_db.store_data(tbl, best_records)
168  out_db.close()
169 
170 def gather_solution_results(fns, fn_output, raisef=0.1):
171  """
172  Reads a set of database files and puts them in a single file
173  Makes sure to reorder all column names if neccesary before merging
174  @param fns List of database files
175  @param fn_output Name of the output database
176  @param raisef See help for gather_best_solution_results()
177  """
178  tbl = "results"
179  # Get names and types of the columns from first database file
180  db = Database.Database2()
181  db.connect(fns[0])
182  names = db.get_table_column_names(tbl)
183  types = db.get_table_types(tbl)
184  indices = get_sorting_indices(names)
185  sorted_names = [ names[i] for i in indices]
186  sorted_types = [ types[i] for i in indices]
187  log.info("Gathering results. Saving to %s", fn_output)
188  out_db = Database.Database2()
189  out_db.create(fn_output, overwrite=True)
190  out_db.connect(fn_output)
191  out_db.create_table(tbl, sorted_names, sorted_types)
192 
193  n_problems = 0
194  for fn in fns:
195  try:
196  log.info("Reading %s",fn)
197  db.connect(fn)
198  names = db.get_table_column_names(tbl)
199  names.sort()
200  they_are_sorted = field_delim.join(names)
201  log.debug("Retrieving %s", they_are_sorted)
202  sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl)
203  data = db.retrieve_data(sql_command)
204  out_db.store_data(tbl, data)
205  db.close()
206  except Exception, e:
207  log.error("Error for file %s: %s",fn, e)
208  n_problems += 1
209  ratio = float(n_problems)/float(len(fns))
210  if ratio > raisef:
211  raise IOError("There are %8.1f %s of the database "\
212  "files to merge with problems! " % (ratio*100,"%"))
213  out_db.close()
214 
216  """ Return indices that sort the list l """
217  pairs = [(element, i) for i,element in enumerate(l)]
218  pairs.sort()
219  indices = [p[1] for p in pairs]
220  return indices
221 
222 def get_best_solution(fn_database, Nth, fields=False, orderby=False,
223  tbl="results"):
224  """
225  Recover the reference frame of the n-th best solution from a database.
226  The index Nth stars at 0
227  """
228  f = get_fields_string(fields)
229  sql_command = """ SELECT %s FROM %s
230  ORDER BY %s
231  ASC LIMIT 1 OFFSET %d """ % (f, tbl, orderby, Nth)
232  data = Database.read_data(fn_database, sql_command)
233  if len(data) == 0:
234  raise ValueError("The requested %s-th best solution does not exist. "\
235  "Only %s solutions found" % (Nth, len(data) ))
236  # the only field last record is the solution requested
237  return data[0][0]
238 
239 def get_pca(string, delimiter="/"):
240  pca = string.split(delimiter)
241  pca = [float(p) for p in pca]
242  return pca
243 
244 def get_fields_string(fields):
245  """
246  Get a list of fields and return a string with them. If there are no
247  fields, return an *, indicating SQL that all the fields are requested
248  @param fields A list of strings
249  @return a string
250  """
251 
252  if fields:
253  return field_delim.join(fields)
254  return "*"
255 
256 
258  """
259  Class for managing the results of the experiments
260  """
261  def __init__(self, ):
262  self.records = []
263  self.native_table_name = "native"
264  self.results_table = "results"
265  self.placements_table = "placements"
266  self.ccc_table_name = "ccc"
267  self.cluster_records = []
268 
269  # columns describing a solution in the results
270  self.results_description_columns = ["solution_id", "assignment",
271  "reference_frames"]
272  self.results_description_types = [int, str, str]
273  # columns describing measures for a result
274  self.results_measures_columns = ["drms", "cdrms", "crmsd"]
275  self.results_measures_types = [float, float, float]
276 
277  def add_results_table(self,restraints_names, add_measures=False):
278  """
279  Build the table of results
280  @param restraints_names The names given to the columns of the table
281  @param add_measures If True, add fields for comparing models
282  and native conformation
283  """
284  table_fields = self.results_description_columns + \
285  ["total_score"] + restraints_names
286  table_types = self.results_description_types + \
287  [float] + [float for r in restraints_names]
288  if add_measures:
289  # Add columns for measures
290  table_fields += self.results_measures_columns
291  table_types += self.results_measures_types
292  log.debug("Creating table %s\n%s",table_fields,table_types)
293  self.create_table(self.results_table, table_fields, table_types)
294  # create a table for the native assembly if we are benchmarking
295  if add_measures :
296  self.create_table(self.native_table_name, table_fields, table_types)
297 
298  def get_solutions_results_table(self, fields=False,
299  max_number=None, orderby=False):
300  """
301  Recovers solutions
302  @param fields Fields to recover from the table
303  @param max_number Maximum number of solutions to recover
304  @param orderby Name of the restraint used for sorting the states
305  """
306  self.check_if_is_connected()
307  log.info("Getting %s from solutions", fields)
308  f = self.get_fields_string(fields)
309  sql_command = "SELECT %s FROM %s " % (f, self.results_table)
310  if orderby:
311  sql_command += " ORDER BY %s ASC" % orderby
312  if max_number not in (None,False):
313  sql_command += " LIMIT %d" % (max_number)
314  log.debug("Using %s", sql_command )
315  data = self.retrieve_data(sql_command)
316  return data
317 
318  def get_solutions(self, fields=False, max_number=None, orderby=False):
319  """
320  Get solutions from the database.
321  @param fields Fields requested. If the fields are in different
322  tables, a left join is done. Otherwise get_solutions_results_table()
323  is called. See get_solutions_results_table() for the meaning
324  of the parameters.
325  @param max_number
326  @param orderby
327  """
328  tables = self.get_tables_names()
329  log.debug("tables %s", tables)
330  required_tables = set()
331  pairs_table_field = []
332 # fields_string = self.get_fields_string(fields)
333  if not fields:
334  fields = ["*",]
335  for f,t in [(f,t) for f in fields for t in tables]:
336  if t == "native" or f == "solution_id":
337  continue
338  columns = self.get_table_column_names(t)
339  if f in columns:
340  required_tables.add(t)
341  pairs_table_field.append((t,f))
342  required_tables = list(required_tables)
343  log.debug("required_tables %s", required_tables)
344  log.debug("pairs_table_field %s", pairs_table_field)
345  if len(required_tables) == 0:
346  data = self.get_solutions_results_table(fields,
347  max_number, orderby)
348  return data
349  elif len(required_tables) == 1 and required_tables[0] == "results":
350  data = self.get_solutions_results_table(fields,
351  max_number, orderby)
352  return data
353  elif len(required_tables) > 1:
354  sql_command = self.get_left_join_command( pairs_table_field,
355  required_tables)
356  if orderby:
357  sql_command += " ORDER BY %s ASC" % orderby
358  log.debug("Using %s", sql_command )
359  data = self.retrieve_data(sql_command)
360  return data
361  else:
362  raise ValueError("Fields not found in the database")
363 
364  def get_native_solution(self, fields=False):
365  """
366  Recover data for the native solution
367  @param fields Fields to recover
368  """
369 
370  f = self.get_fields_string(fields)
371  sql_command = "SELECT %s FROM %s " % (f, self.native_table_name)
372  data = self.retrieve_data(sql_command)
373  return data
374 
375  def add_record(self, solution_id, assignment, RFs, total_score,
376  restraints_scores, measures):
377  """
378  Add a recorde to the database
379  @param solution_id The key for the solution
380  @param assignment The assigment for the solution provided by
381  domino
382  @param RFs Reference frames of the rigid bodies of the components
383  of the assembly in the solution
384  @param total_score Total value of the scoring function
385  @param restraints_scores A list with all the values for the
386  restraints
387  @param measures A list with the values of all the measures for
388  benchmark
389  """
390  words = [io.ReferenceFrameToText(ref).get_text() for ref in RFs]
391  RFs_txt = unit_delim.join(words)
392  record = [solution_id, assignment, RFs_txt, total_score] + \
393  restraints_scores
394  if measures != None:
395  record = record + measures
396  self.records.append(record)
397 
398  def add_native_record(self, assignment, RFs, total_score,
399  restraints_scores):
400  """
401  Add a record for the native structure to the database
402  see add_record() for the meaning of the parameters
403  """
404  words = [io.ReferenceFrameToText(ref).get_text() for ref in RFs]
405  RFs_txt = unit_delim.join(words)
406  solution_id = 0
407  record = [solution_id, assignment, RFs_txt, total_score] + \
408  restraints_scores
409  measures = [0,0,0] # ["drms", "cdrms", "crmsd"]
410  record = record + measures
411  self.store_data(self.native_table_name, [record])
412 
413  def save_records(self,table="results"):
414  self.store_data(table, self.records)
415 
416  def format_placement_record(self, solution_id, distances, angles):
417  """ both distances and angles are expected to be a list of floats """
418  return [solution_id] + distances + angles
419 
420 
421  def add_placement_scores_table(self, names):
422  """
423  Creates a table to store the values of the placement scores for the
424  models.
425  @param names Names of the components of the assembly
426  """
427  self.check_if_is_connected()
428  self.placement_table_name = self.placements_table
429  table_fields = ["solution_id"]
430  table_fields += ["distance_%s" % name for name in names]
431  table_fields += ["angle_%s" % name for name in names]
432  table_types = [int] + [float for f in table_fields]
433  self.drop_table(self.placement_table_name)
434  self.create_table(self.placement_table_name, table_fields, table_types)
435  self.add_columns(self.native_table_name,
436  table_fields, table_types,check=True)
437  # update all placements scores to 0 for the native assembly
438  native_values = [0 for t in table_fields]
439  log.debug("%s", self.native_table_name)
440  log.debug("table fields %s", table_fields)
441  self.update_data(self.native_table_name,
442  table_fields, native_values,
443  ["assignment"], ["\"native\""])
444 
446  """
447  Return the names of the placement score fields in the database
448  """
449  columns = self.get_table_column_names(self.placements_table)
450  fields = [col for col in columns if "distance" in col or "angle" in col]
451  return fields
452 
453  def add_ccc_table(self):
454  """
455  Add a table to the database for store the values of the cross
456  correlation coefficient between a model and the native configuration
457  """
458 
459  self.check_if_is_connected()
460  table_fields = ["solution_id", "ccc"]
461  table_types = [int, float]
462  self.drop_table(self.ccc_table_name)
463  self.create_table(self.ccc_table_name, table_fields, table_types)
464  # update values for the native assembly
465  self.add_columns(self.native_table_name,
466  table_fields, table_types,check=True)
467  self.update_data(self.native_table_name,
468  table_fields, [0,1.00], ["assignment"], ["\"native\""])
469 
470  def format_ccc_record(self, solution_id, ccc):
471  """ Format for the record to store in the ccc table """
472  return [solution_id, ccc]
473 
474  def get_ccc(self, solution_id):
475  """
476  Recover the cross-correlation coefficient for a solution
477  @param solution_id
478  """
479  sql_command = """ SELECT ccc FROM %s
480  WHERE solution_id=%d """ % (self.ccc_table_name,
481  solution_id)
482  data = self.retrieve_data(sql_command)
483  return data[0][0]
484 
485  def store_ccc_data(self, ccc_data):
486  self.store_data(self.ccc_table_name, ccc_data)
487 
488  def store_placement_data(self, data):
489  log.debug("store placement table %s",data)
490  self.store_data(self.placement_table_name,data)
491 
492  def get_left_join_command(self, pairs_table_field, tables_names):
493  """
494  Format a left join SQL command that recovers all fileds from the
495  tables given
496  @param pairs_table_field Pairs of (table,field)
497  @param tables_names Names of the tables
498 
499  E.g. If pairs_table_filed = ((table1,a), (table2,b), (table3,c),
500  (table2,d)) and tables_names = (table1, table2, table3)
501 
502  The SQL command is:
503  SELECT table1.a, table2.b, table3.c, table2.d FROM table1
504  LEFT JOIN table2 ON table1.solution_id = table2.solution_id
505  LEFT JOIN table3 ON table1.solution_id = table3.solution_id
506  WHERE table1.solution_id IS NOT NULL AND
507  table2.solution_id IS NOT NULL AND
508  table3.solution_id IS NOT NULL
509  """
510 
511  txt = [ "%s.%s" % (p[0],p[1]) for p in pairs_table_field]
512  fields_requested = field_delim.join(txt)
513  sql_command = " SELECT %s FROM %s " % (fields_requested,tables_names[0])
514  n_tables = len(tables_names)
515  for i in range(1, n_tables):
516  a = tables_names[i-1]
517  b = tables_names[i]
518  sql_command += " LEFT JOIN %s " \
519  "ON %s.solution_id = %s.solution_id " % (b,a,b)
520  # add the condition of solution_id being not null, so there are not
521  # problems if some solutions are missing in one table
522  for i in range(n_tables-1):
523  sql_command += "WHERE %s.solution_id " \
524  "IS NOT NULL AND " % tables_names[i]
525  sql_command += " %s.solution_id IS NOT NULL " % tables_names[n_tables-1]
526  log.debug("%s" %sql_command)
527  return sql_command
528 
529  def add_clusters_table(self, name):
530  """
531  Add a table to store information about the clusters of structures
532  @param name Name of the table
533  """
534  self.cluster_table_name = name
535  self.check_if_is_connected()
536  table_fields = ("cluster_id","n_elements",
537  "representative","elements", "solutions_ids")
538  table_types = (int, int, int, str, str)
539  self.drop_table(name)
540  self.create_table(name, table_fields, table_types)
541 
542  def add_cluster_record(self, cluster_id, n_elements, representative,
543  elements, solutions_ids):
544  """
545  Add a record to the cluster database. Actually, only stores it
546  in a list (that will be added later)
547  @param cluster_id Number with the id of the cluster
548  @param n_elements Number of elements in the cluster
549  @param representative Number with the id of the representative
550  element
551  @param elements List with the number of the elements of the cluster
552  @param solutions_ids The numbers above are provided by the
553  clustering algorithm. The solutions_ids are the ids of the models
554  in "elements".
555  """
556 
557  record = (cluster_id, n_elements, representative, elements,
558  solutions_ids)
559  log.debug("Adding cluster record: %s", record)
560  self.cluster_records.append(record)
561 
563  """
564  Store the data for the clusters
565  """
566  log.info("Storing data of clusters. Number of records %s",
567  len(self.cluster_records) )
568  self.store_data(self.cluster_table_name, self.cluster_records)
569 
570  def get_solutions_from_list(self, fields=False, solutions_ids=[]):
571  """
572  Recover solutions for a specific list of results
573  @param fields Fields to recover fro the database
574  @param solutions_ids A list with the desired solutions. E.g. [0,3,6]
575  """
576  sql_command = """ SELECT %s FROM %s WHERE solution_id IN (%s) """
577  f = self.get_fields_string(fields)
578  str_ids = ",".join(map(str,solutions_ids))
579  data = self.retrieve_data( sql_command % (f, self.results_table, str_ids ) )
580  return data
581 
582  def get_native_rank(self, orderby):
583  """
584  Get the position of the native configuration
585  @param orderby Criterium used to sort the solutions
586  """
587  import numpy as np
588 
589  data = self.get_native_solution([orderby,])
590  native_value = data[0][0]
591  data = self.get_solutions_results_table(fields=[orderby,],
592  orderby=orderby)
593  values = [row[0] for row in data]
594  rank = np.searchsorted(values,native_value)
595  return rank
596 
597  def get_nth_largest_cluster(self, position, table_name="clusters"):
598  """
599  Recover the the information about the n-th largest cluster
600  @param position Cluster position (by size) requested
601  (1 is the largest cluster)
602  @param table_name Table where the information about the
603  clusters is stored
604  """
605  s = """ SELECT * FROM %s ORDER BY n_elements DESC """ % table_name
606  data = self.retrieve_data(s)
607  record = ClusterRecord(data[position-1])
608  return record
609 
610 
611  def get_individual_placement_statistics(self, solutions_ids):
612  """
613  Recovers from the database the placement scores for a set of
614  solutions, and returns the mean and standard deviation of the
615  placement score for each of the components of the complex being
616  scored. This function will be typical used to compute the variation
617  of the placement of each component within a cluster of solutions
618  @param solutions_ids The ids of the solutions used to compute
619  the statistics
620  @return The output are 4 numpy vectors:
621  placement_distances_mean - The mean placement distance for each
622  component
623  placement_distances_stddev - The standardd deviation of the
624  placement distance for each component
625  placement_angles_mean - The mean placement angle for each
626  component
627  placement_angles_stddev - The standard deviation of the placement
628  angle for each component,
629  """
630 
631  self.check_if_is_connected()
632  table = self.placements_table
633  fields = self.get_table_column_names(table)
634  distance_fields = filter(lambda x: 'distance' in x, fields)
635  angle_fields = filter(lambda x: 'angle' in x, fields)
636  sql_command = """ SELECT %s FROM %s WHERE solution_id IN (%s) """
637  # string with the solution ids to pass to the sql_command
638  str_ids = ",".join(map(str,solutions_ids))
639  log.debug("Solutions considered %s", solutions_ids)
640  s = sql_command % (",".join(distance_fields), table, str_ids )
641  data_distances = self.retrieve_data(s)
642  s = sql_command % (",".join(angle_fields), table, str_ids )
643  data_angles = self.retrieve_data(s)
644  D = np.array(data_distances)
645  placement_distances_mean = D.mean(axis=0)
646  placement_distances_stddev = D.std(axis=0)
647  A = np.array(data_angles)
648  placement_angles_mean = A.mean(axis=0)
649  placement_angles_stddev = A.std(axis=0)
650  return [placement_distances_mean,placement_distances_stddev,
651  placement_angles_mean, placement_angles_stddev]
652 
653 
654  def get_placement_statistics(self, solutions_ids):
655  """
656  Calculate the placement score and its standard deviation for
657  the complexes in a set of solutions. The values returned are
658  averages, as the placement score for a complex is the average
659  of the placement scores of the components. This function is used
660  to obtain global placement for a cluster of solutions.
661  @param solutions_ids The ids of the solutions used to compute
662  the statistics
663  @return The output are 4 values:
664  plcd_mean - Average of the placement distance for the entire
665  complex over all the solutions.
666  plcd_std - Standard deviation of the placement distance for
667  the entire complex over all the solutions.
668  plca_mean - Average of the placement angle for the entire
669  complex over all the solutions.
670  plca_std - Standard deviation of the placement angle for
671  the entire complex over all the solutions.
672  """
673  [placement_distances_mean,placement_distances_stddev,
674  placement_angles_mean, placement_angles_stddev] = \
675  self.get_individual_placement_statistics(solutions_ids)
676  plcd_mean = placement_distances_mean.mean(axis=0)
677  plcd_std = placement_distances_stddev.mean(axis=0)
678  plca_mean = placement_angles_mean.mean(axis=0)
679  plca_std = placement_angles_stddev.mean(axis=0)
680  return [plcd_mean, plcd_std, plca_mean, plca_std]
def format_placement_record
both distances and angles are expected to be a list of floats
def add_placement_scores_table
Creates a table to store the values of the placement scores for the models.
def check_if_is_connected
Checks if the class is connected to the database filename.
Definition: Database.py:37
Utility functions to manage SQL databases with sqlite3.
Definition: Database.py:1
def get_solutions
Get solutions from the database.
def get_solutions_results_table
Recovers solutions.
def get_best_solution
Recover the reference frame of the n-th best solution from a database.
def store_data
Inserts information in a given table of the database.
Definition: Database.py:91
def get_sorting_indices
Return indices that sort the list l.
def retrieve_data
Retrieves data from the database using the sql_command returns the records as a list of tuples...
Definition: Database.py:113
def get_table_column_names
Get the names of the columns for a given table.
Definition: Database.py:219
def update_data
updates the register in the table identified by the condition values for the condition fields ...
Definition: Database.py:121
def format_ccc_record
Format for the record to store in the ccc table.
def add_ccc_table
Add a table to the database for store the values of the cross correlation coefficient between a model...
def get_placement_statistics
Calculate the placement score and its standard deviation for the complexes in a set of solutions...
Simple named tuple class.
Definition: solutions_io.py:28
def add_results_table
Build the table of results.
def get_nth_largest_cluster
Recover the the information about the n-th largest cluster.
def add_columns
Add columns to the database.
Definition: Database.py:245
def drop_table
Delete a table if it exists.
Definition: Database.py:59
def get_solutions_from_list
Recover solutions for a specific list of results.
Class to manage a SQL database built with sqlite3.
Definition: Database.py:13
def add_native_record
Add a record for the native structure to the database see add_record() for the meaning of the paramet...
def add_record
Add a recorde to the database.
def store_cluster_data
Store the data for the clusters.
def gather_solution_results
Reads a set of database files and puts them in a single file Makes sure to reorder all column names i...
Class for managing the results of the experiments.
def get_native_rank
Get the position of the native configuration.
def create_table
Creates a table.
Definition: Database.py:43
def get_individual_placement_statistics
Recovers from the database the placement scores for a set of solutions, and returns the mean and stan...
def get_fields_string
Get a list of fields and return a string with them.
The heapq algorithm is a min-heap.
Definition: solutions_io.py:57
def get_native_solution
Recover data for the native solution.
def get_ccc
Recover the cross-correlation coefficient for a solution.
def get_left_join_command
Format a left join SQL command that recovers all fileds from the tables given.
def get_placement_fields
Return the names of the placement score fields in the database.
def add_clusters_table
Add a table to store information about the clusters of structures.
def add_cluster_record
Add a record to the cluster database.
def gather_best_solution_results
Reads a set of database files and merge them into a single file.
Definition: solutions_io.py:88