IMP logo
IMP Reference Guide  2.13.0
The Integrative Modeling Platform
database.py
1 """@namespace IMP.EMageFit.database
2  Utility functions to manage SQL databases with sqlite3.
3 """
4 
5 import sqlite3 as sqlite
6 import os
7 import csv
8 import logging
9 
10 log = logging.getLogger("Database")
11 
12 
13 class Database2:
14 
15  """ Class to manage a SQL database built with sqlite3 """
16 
17  def __init__(self):
18  # Connection to the database
19  self.connection = None
20  # Cursor of actions
21  self.cursor = None
22  # Dictionary of tablenames and types (used to convert values when
23  # storing data)
24 
25  def create(self, filename, overwrite=False):
26  """ Creates a database by simply connecting to the file """
27  log.info("Creating database")
28  if overwrite and os.path.exists(filename):
29  os.remove(filename)
30  sqlite.connect(filename)
31 
32  def connect(self, filename):
33  """ Connects to the database in filename """
34  if not os.path.isfile(filename):
35  raise IOError("Database file not found: %s" % filename)
36  self.connection = sqlite.connect(filename)
37  self.cursor = self.connection.cursor()
38 
40  """ Checks if the class is connected to the database filename """
41  if self.connection is None:
42  raise ValueError("The database has not been created "
43  "or connection not established ")
44 
45  def create_table(self, table_name, column_names, column_types):
46  """ Creates a table. It expects a sorted dictionary
47  of (data_field,typename) entries """
48  log.info("Creating table %s", table_name)
50  sql_command = "CREATE TABLE %s (" % (table_name)
51  for name, data_type in zip(column_names, column_types):
52  sql_typename = get_sql_type_name(data_type)
53  sql_command += "%s %s," % (name, sql_typename)
54  # replace last comma for a parenthesis
55  n = len(sql_command)
56  sql_command = sql_command[0:n - 1] + ")"
57  log.debug(sql_command)
58  self.cursor.execute(sql_command)
59  self.connection.commit()
60 
61  def drop_table(self, table_name):
62  """
63  Delete a table if it exists
64  """
65  log.info("Deleting table %s", table_name)
67  sql_command = "DROP TABLE IF EXISTS %s" % (table_name)
68  log.debug(sql_command)
69  self.cursor.execute(sql_command)
70  self.connection.commit()
71 
72  def store_dataV1(self, table_name, data):
73  """ Inserts information in a given table of the database.
74  The info must be a list of tuples containing as many values
75  as columns in the table
76  Conversion of values is done AUTOMATICALLY after checking the
77  types stored in the table
78  """
80  n = len(data[0]) # number of columns for each row inserted
81  tuple_format = "(" + "?," * (n - 1) + "?)"
82  sql_command = "INSERT INTO %s VALUES %s " % (table_name, tuple_format)
83  # Fill the table with the info in the tuples
84  types = self.get_table_types(table_name)
85 # log.debug("Storing types: %s", types)
86  for x in data:
87 # log.debug("DATA %s", x)
88  # convert (applies the types stored in the table dictionary to each
89  # value in x
90  y = [apply_type(i) for i, apply_type in zip(x, types)]
91  self.cursor.execute(sql_command, y)
92  self.connection.commit()
93 
94  def store_data(self, table_name, data):
95  """ Inserts information in a given table of the database.
96  The info must be a list of tuples containing as many values
97  as columns in the table
98  Conversion of values is done AUTOMATICALLY after checking the
99  types stored in the table
100  """
101  if len(data) == 0:
102  log.warning("Inserting empty data")
103  return
104  self.check_if_is_connected()
105  n = len(data[0]) # number of columns for each row inserted
106  tuple_format = "(" + "?," * (n - 1) + "?)"
107  sql_command = "INSERT INTO %s VALUES %s " % (table_name, tuple_format)
108  # Fill the table with the info in the tuples
109  types = self.get_table_types(table_name)
110 # log.debug("Storing types: %s", types)
111  for i in range(len(data)):
112  data[i] = [apply_type(d) for d, apply_type in zip(data[i], types)]
113  self.cursor.executemany(sql_command, data)
114  self.connection.commit()
115 
116  def retrieve_data(self, sql_command):
117  """ Retrieves data from the database using the sql_command
118  returns the records as a list of tuples"""
119  self.check_if_is_connected()
120  log.debug("Retrieving data: %s" % sql_command)
121  self.cursor.execute(sql_command)
122  return self.cursor.fetchall()
123 
124  def update_data(self, table_name,
125  updated_fields,
126  updated_values,
127  condition_fields,
128  condition_values):
129  """ updates the register in the table identified by the condition
130  values for the condition fields
131  """
132  self.check_if_is_connected()
133  sql_command = "UPDATE %s SET " % (table_name)
134  for field, value in zip(updated_fields, updated_values):
135  sql_command += "%s=%s," % (field, value)
136  sql_command = sql_command.rstrip(",") + " WHERE "
137  s = self.get_condition_string(condition_fields, condition_values)
138  sql_command = sql_command + s
139  # print sql_command
140  log.debug("Updating %s: %s", table_name, sql_command)
141  self.cursor.execute(sql_command)
142  self.connection.commit()
143 
144  def create_view(self, view_name, table_name,
145  condition_fields, condition_values):
146  """ creates a view of the given table where the values are selected
147  using the condition values. See the help for update_data()
148  """
149  try: # if this fails is because the view already exist
150  self.drop_view(view_name)
151  except:
152  pass
153  sql_command = 'CREATE VIEW %s AS SELECT * FROM %s WHERE ' % (
154  view_name, table_name)
155  condition = self.get_condition_string(
156  condition_fields, condition_values)
157  sql_command += condition
158  log.info("Creating view %s", sql_command)
159  self.cursor.execute(sql_command)
160 
161  def create_view_of_best_records(
162  self,
163  view_name,
164  table_name,
165  orderby,
166  n_records):
167  try: # if this fails is because the view already exist
168  self.drop_view(view_name)
169  except:
170  pass
171  sql_command = """CREATE VIEW %s AS SELECT * FROM %s
172  ORDER BY %s ASC LIMIT %d """ % (view_name, table_name, orderby, n_records)
173  log.info("Creating view %s", sql_command)
174  self.cursor.execute(sql_command)
175 
176  def drop_view(self, view_name):
177  """ Removes a view from the database """
178  self.cursor.execute('DROP VIEW %s' % view_name)
179 
180  def get_table(self, table_name, fields=False, orderby=False):
181  """ Returns th fields requested from the table """
182  fields = self.get_fields_string(fields)
183  sql_command = "SELECT %s FROM %s " % (fields, table_name)
184  if orderby:
185  sql_command += " ORDER BY %s ASC" % orderby
186  data = self.retrieve_data(sql_command)
187  return data
188 
189  def get_fields_string(self, fields, field_delim=","):
190  if fields:
191  return field_delim.join(fields)
192  return "*"
193 
194  def close(self):
195  """ Closes the database """
196  self.check_if_is_connected()
197  self.cursor.close()
198  self.connection.close()
199 
200  def get_condition_string(self, fields, values):
201  """ creates a condition applying each value to each field
202  """
203  s = ""
204  for field, value in zip(fields, values):
205  s += "%s=%s AND " % (field, value)
206  # remove last AND
207  n = len(s)
208  s = s[0:n - 5]
209  return s
210 
211  def get_table_types(self, name):
212  """
213  Gets info about a table and retuns all the types in it
214  """
215  self.check_if_is_connected()
216  sql_command = "PRAGMA table_info(%s)" % name
217  self.cursor.execute(sql_command)
218  info = self.cursor.fetchall()
219  types = []
220  for row in info:
221  if row[2] == "INT":
222  types.append(int)
223  elif row[2] == "DOUBLE":
224  types.append(float)
225  elif row[2][0:7] == "VARCHAR":
226  types.append(str)
227  return types
228 
229  def get_table_column_names(self, name):
230  """
231  Get the names of the columns for a given table
232  """
233  self.check_if_is_connected()
234  sql_command = "PRAGMA table_info(%s)" % name
235  self.cursor.execute(sql_command)
236  info = self.cursor.fetchall()
237  return [row[1] for row in info]
238 
239  def execute_sql_command(self, sql_command):
240  self.check_if_is_connected()
241  self.cursor.execute(sql_command)
242  self.connection.commit()
243 
244  def add_column(self, table, column, data_type):
245  """
246  Add a column to a table
247  column - the name of the column.
248  data_type - the type: int, float, str
249  """
250  sql_typename = get_sql_type_name(data_type)
251  sql_command = "ALTER TABLE %s ADD %s %s" % (
252  table, column, sql_typename)
253  self.execute_sql_command(sql_command)
254 
255  def add_columns(self, table, names, types, check=True):
256  """
257  Add columns to the database. If check=True, columns with names
258  already in the database are skipped. If check=False no check
259  is done and trying to add a column that already exists will
260  raise and exception
261  """
262  col_names = self.get_table_column_names(table)
263  if check:
264  for name, dtype in zip(names, types):
265  if name not in col_names:
266  self.add_column(table, name, dtype)
267  else:
268  for name, dtype in zip(names, types):
269  self.add_column(table, name, dtype)
270 
271  def get_tables_names(self):
272  sql_command = """ SELECT tbl_name FROM sqlite_master """
273  data = self.retrieve_data(sql_command)
274  names = [d[0] for d in data]
275  return names
276 
277  def select_table(self):
278  """
279  Prompt for tables so the user can choose one
280  """
281  table_name = ""
282  self.check_if_is_connected()
283  tables = self.get_tables_names()
284  for t in tables:
285  say = ''
286  while say not in ('n', 'y'):
287  say = raw_input("Use table %s (y/n) " % t)
288  if say == 'y':
289  table_name = t
290  columns = self.get_table_column_names(t)
291  break
292  return table_name, columns
293 
294  def drop_columns(self, table, columns):
295 
296  cnames = self.get_table_column_names(table)
297  for name in columns:
298  cnames.remove(name)
299  names_txt = ", ".join(cnames)
300  sql_command = [
301  "CREATE TEMPORARY TABLE backup(%s);" % names_txt,
302  "INSERT INTO backup SELECT %s FROM %s" % (names_txt, table),
303  "DROP TABLE %s;" % table,
304  "CREATE TABLE %s(%s);" % (table, names_txt),
305  "INSERT INTO %s SELECT * FROM backup;" % table,
306  "DROP TABLE backup;",
307  ]
308  for command in sql_command:
309  log.debug(command)
310 # print command
311  self.cursor.execute(command)
312 
313 
314 def print_data(data, delimiter=" "):
315  """ Prints the data recovered from a database """
316  for row in data:
317  line = delimiter.join([str(x) for x in row])
318  print(line)
319 
320 
321 def write_data(data, output_file, delimiter=" "):
322  """writes data to a file. The output file is expected to be a python
323  file object """
324  w = csv.writer(output_file, delimiter=delimiter)
325  for row in data:
326  w.writerow(row)
327 
328 
329 def get_sql_type_name(data_type):
330  if(data_type == int):
331  return "INT"
332  elif(data_type == float):
333  return "DOUBLE"
334  elif(data_type == str):
335  return (
336  # 10 is a random number, SQLITE does not chop strings
337  "VARCHAR(10)"
338  )
339 
340 
341 def open(fn_database):
342  db = Database2()
343  db.connect(fn_database)
344  return db
345 
346 
347 def read_data(fn_database, sql_command):
348  db = Database2()
349  db.connect(fn_database)
350  data = db.retrieve_data(sql_command)
351  db.close()
352  return data
353 
354 
356  """ Return indices that sort the list l """
357  pairs = sorted([(element, i) for i, element in enumerate(l)])
358  indices = [p[1] for p in pairs]
359  return indices
360 
361 
362 def merge_databases(fns, fn_output, tbl):
363  """
364  Reads a table from a set of database files into a single file
365  Makes sure to reorder all column names if necessary before merging
366  """
367  # Get names and types of the columns from first database file
368  db = Database2()
369  db.connect(fns[0])
370  names = db.get_table_column_names(tbl)
371  types = db.get_table_types(tbl)
372  indices = get_sorting_indices(names)
373  sorted_names = [names[i] for i in indices]
374  sorted_types = [types[i] for i in indices]
375  log.info("Merging databases. Saving to %s", fn_output)
376  out_db = Database2()
377  out_db.create(fn_output, overwrite=True)
378  out_db.connect(fn_output)
379  out_db.create_table(tbl, sorted_names, sorted_types)
380  for fn in fns:
381  log.debug("Reading %s", fn)
382  db.connect(fn)
383  names = sorted(db.get_table_column_names(tbl))
384  they_are_sorted = ",".join(names)
385  log.debug("Retrieving %s", they_are_sorted)
386  sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl)
387  data = db.retrieve_data(sql_command)
388  out_db.store_data(tbl, data)
389  db.close()
390  out_db.close()
def get_table_column_names
Get the names of the columns for a given table.
Definition: database.py:229
def close
Closes the database.
Definition: database.py:194
Class to manage a SQL database built with sqlite3.
Definition: database.py:13
def get_table_types
Gets info about a table and retuns all the types in it.
Definition: database.py:211
def get_sorting_indices
Return indices that sort the list l.
Definition: database.py:355
def store_data
Inserts information in a given table of the database.
Definition: database.py:94
def get_condition_string
creates a condition applying each value to each field
Definition: database.py:200
def create_table
Creates a table.
Definition: database.py:45
def retrieve_data
Retrieves data from the database using the sql_command returns the records as a list of tuples...
Definition: database.py:116
def create
Creates a database by simply connecting to the file.
Definition: database.py:25
def print_data
Prints the data recovered from a database.
Definition: database.py:314
def get_table
Returns th fields requested from the table.
Definition: database.py:180
def drop_table
Delete a table if it exists.
Definition: database.py:61
def create_view
creates a view of the given table where the values are selected using the condition values...
Definition: database.py:144
def connect
Connects to the database in filename.
Definition: database.py:32
def add_column
Add a column to a table column - the name of the column.
Definition: database.py:244
def store_dataV1
Inserts information in a given table of the database.
Definition: database.py:72
def select_table
Prompt for tables so the user can choose one.
Definition: database.py:277
def merge_databases
Reads a table from a set of database files into a single file Makes sure to reorder all column names ...
Definition: database.py:362
def add_columns
Add columns to the database.
Definition: database.py:255
def drop_view
Removes a view from the database.
Definition: database.py:176
def update_data
updates the register in the table identified by the condition values for the condition fields ...
Definition: database.py:124
def check_if_is_connected
Checks if the class is connected to the database filename.
Definition: database.py:39
def write_data
writes data to a file.
Definition: database.py:321