IMP logo
IMP Reference Guide  2.20.0
The Integrative Modeling Platform
database.py
1 """@namespace IMP.EMageFit.database
2  Utility functions to manage SQL databases with sqlite3.
3 """
4 
5 import sqlite3 as sqlite
6 import os
7 import csv
8 import sys
9 import logging
10 if sys.version_info[0] == 2:
11  input = raw_input # noqa: F821
12 
13 log = logging.getLogger("Database")
14 
15 
16 class Database2:
17 
18  """ Class to manage a SQL database built with sqlite3 """
19 
20  def __init__(self):
21  # Connection to the database
22  self.connection = None
23  # Cursor of actions
24  self.cursor = None
25  # Dictionary of tablenames and types (used to convert values when
26  # storing data)
27 
28  def create(self, filename, overwrite=False):
29  """ Creates a database by simply connecting to the file """
30  log.info("Creating database")
31  if overwrite and os.path.exists(filename):
32  os.remove(filename)
33  sqlite.connect(filename)
34 
35  def connect(self, filename):
36  """ Connects to the database in filename """
37  if not os.path.isfile(filename):
38  raise IOError("Database file not found: %s" % filename)
39  self.connection = sqlite.connect(filename)
40  self.cursor = self.connection.cursor()
41 
43  """ Checks if the class is connected to the database filename """
44  if self.connection is None:
45  raise ValueError("The database has not been created "
46  "or connection not established ")
47 
48  def create_table(self, table_name, column_names, column_types):
49  """ Creates a table. It expects a sorted dictionary
50  of (data_field,typename) entries """
51  log.info("Creating table %s", table_name)
53  sql_command = "CREATE TABLE %s (" % (table_name)
54  for name, data_type in zip(column_names, column_types):
55  sql_typename = get_sql_type_name(data_type)
56  sql_command += "%s %s," % (name, sql_typename)
57  # replace last comma for a parenthesis
58  n = len(sql_command)
59  sql_command = sql_command[0:n - 1] + ")"
60  log.debug(sql_command)
61  self.cursor.execute(sql_command)
62  self.connection.commit()
63 
64  def drop_table(self, table_name):
65  """
66  Delete a table if it exists
67  """
68  log.info("Deleting table %s", table_name)
70  sql_command = "DROP TABLE IF EXISTS %s" % (table_name)
71  log.debug(sql_command)
72  self.cursor.execute(sql_command)
73  self.connection.commit()
74 
75  def store_dataV1(self, table_name, data):
76  """ Inserts information in a given table of the database.
77  The info must be a list of tuples containing as many values
78  as columns in the table
79  Conversion of values is done AUTOMATICALLY after checking the
80  types stored in the table
81  """
83  n = len(data[0]) # number of columns for each row inserted
84  tuple_format = "(" + "?," * (n - 1) + "?)"
85  sql_command = "INSERT INTO %s VALUES %s " % (table_name, tuple_format)
86  # Fill the table with the info in the tuples
87  types = self.get_table_types(table_name)
88  for x in data:
89  # convert (applies the types stored in the table dictionary to each
90  # value in x
91  y = [apply_type(i) for i, apply_type in zip(x, types)]
92  self.cursor.execute(sql_command, y)
93  self.connection.commit()
94 
95  def store_data(self, table_name, data):
96  """ Inserts information in a given table of the database.
97  The info must be a list of tuples containing as many values
98  as columns in the table
99  Conversion of values is done AUTOMATICALLY after checking the
100  types stored in the table
101  """
102  if len(data) == 0:
103  log.warning("Inserting empty data")
104  return
105  self.check_if_is_connected()
106  n = len(data[0]) # number of columns for each row inserted
107  tuple_format = "(" + "?," * (n - 1) + "?)"
108  sql_command = "INSERT INTO %s VALUES %s " % (table_name, tuple_format)
109  # Fill the table with the info in the tuples
110  types = self.get_table_types(table_name)
111 # log.debug("Storing types: %s", types)
112  for i in range(len(data)):
113  data[i] = [apply_type(d) for d, apply_type in zip(data[i], types)]
114  self.cursor.executemany(sql_command, data)
115  self.connection.commit()
116 
117  def retrieve_data(self, sql_command):
118  """ Retrieves data from the database using the sql_command
119  returns the records as a list of tuples"""
120  self.check_if_is_connected()
121  log.debug("Retrieving data: %s" % sql_command)
122  self.cursor.execute(sql_command)
123  return self.cursor.fetchall()
124 
125  def update_data(self, table_name,
126  updated_fields,
127  updated_values,
128  condition_fields,
129  condition_values):
130  """ updates the register in the table identified by the condition
131  values for the condition fields
132  """
133  self.check_if_is_connected()
134  sql_command = "UPDATE %s SET " % (table_name)
135  for field, value in zip(updated_fields, updated_values):
136  sql_command += "%s=%s," % (field, value)
137  sql_command = sql_command.rstrip(",") + " WHERE "
138  s = self.get_condition_string(condition_fields, condition_values)
139  sql_command = sql_command + s
140  # print sql_command
141  log.debug("Updating %s: %s", table_name, sql_command)
142  self.cursor.execute(sql_command)
143  self.connection.commit()
144 
145  def create_view(self, view_name, table_name,
146  condition_fields, condition_values):
147  """ creates a view of the given table where the values are selected
148  using the condition values. See the help for update_data()
149  """
150  try: # if this fails is because the view already exist
151  self.drop_view(view_name)
152  except: # noqa: E722
153  pass
154  sql_command = 'CREATE VIEW %s AS SELECT * FROM %s WHERE ' % (
155  view_name, table_name)
156  condition = self.get_condition_string(
157  condition_fields, condition_values)
158  sql_command += condition
159  log.info("Creating view %s", sql_command)
160  self.cursor.execute(sql_command)
161 
162  def create_view_of_best_records(
163  self,
164  view_name,
165  table_name,
166  orderby,
167  n_records):
168  try: # if this fails is because the view already exist
169  self.drop_view(view_name)
170  except: # noqa: E722
171  pass
172  sql_command = """CREATE VIEW %s AS SELECT * FROM %s
173  ORDER BY %s ASC LIMIT %d """ % (view_name, table_name,
174  orderby, n_records)
175  log.info("Creating view %s", sql_command)
176  self.cursor.execute(sql_command)
177 
178  def drop_view(self, view_name):
179  """ Removes a view from the database """
180  self.cursor.execute('DROP VIEW %s' % view_name)
181 
182  def get_table(self, table_name, fields=False, orderby=False):
183  """ Returns th fields requested from the table """
184  fields = self.get_fields_string(fields)
185  sql_command = "SELECT %s FROM %s " % (fields, table_name)
186  if orderby:
187  sql_command += " ORDER BY %s ASC" % orderby
188  data = self.retrieve_data(sql_command)
189  return data
190 
191  def get_fields_string(self, fields, field_delim=","):
192  if fields:
193  return field_delim.join(fields)
194  return "*"
195 
196  def close(self):
197  """ Closes the database """
198  self.check_if_is_connected()
199  self.cursor.close()
200  self.connection.close()
201 
202  def get_condition_string(self, fields, values):
203  """ creates a condition applying each value to each field
204  """
205  s = ""
206  for field, value in zip(fields, values):
207  s += "%s=%s AND " % (field, value)
208  # remove last AND
209  n = len(s)
210  s = s[0:n - 5]
211  return s
212 
213  def get_table_types(self, name):
214  """
215  Gets info about a table and returns all the types in it
216  """
217  self.check_if_is_connected()
218  sql_command = "PRAGMA table_info(%s)" % name
219  self.cursor.execute(sql_command)
220  info = self.cursor.fetchall()
221  types = []
222  for row in info:
223  if row[2] == "INT":
224  types.append(int)
225  elif row[2] == "DOUBLE":
226  types.append(float)
227  elif row[2][0:7] == "VARCHAR":
228  types.append(str)
229  return types
230 
231  def get_table_column_names(self, name):
232  """
233  Get the names of the columns for a given table
234  """
235  self.check_if_is_connected()
236  sql_command = "PRAGMA table_info(%s)" % name
237  self.cursor.execute(sql_command)
238  info = self.cursor.fetchall()
239  return [row[1] for row in info]
240 
241  def execute_sql_command(self, sql_command):
242  self.check_if_is_connected()
243  self.cursor.execute(sql_command)
244  self.connection.commit()
245 
246  def add_column(self, table, column, data_type):
247  """
248  Add a column to a table
249  column - the name of the column.
250  data_type - the type: int, float, str
251  """
252  sql_typename = get_sql_type_name(data_type)
253  sql_command = "ALTER TABLE %s ADD %s %s" % (
254  table, column, sql_typename)
255  self.execute_sql_command(sql_command)
256 
257  def add_columns(self, table, names, types, check=True):
258  """
259  Add columns to the database. If check=True, columns with names
260  already in the database are skipped. If check=False no check
261  is done and trying to add a column that already exists will
262  raise and exception
263  """
264  col_names = self.get_table_column_names(table)
265  if check:
266  for name, dtype in zip(names, types):
267  if name not in col_names:
268  self.add_column(table, name, dtype)
269  else:
270  for name, dtype in zip(names, types):
271  self.add_column(table, name, dtype)
272 
273  def get_tables_names(self):
274  sql_command = """ SELECT tbl_name FROM sqlite_master """
275  data = self.retrieve_data(sql_command)
276  names = [d[0] for d in data]
277  return names
278 
279  def select_table(self):
280  """
281  Prompt for tables so the user can choose one
282  """
283  table_name = ""
284  self.check_if_is_connected()
285  tables = self.get_tables_names()
286  for t in tables:
287  say = ''
288  while say not in ('n', 'y'):
289  say = input("Use table %s (y/n) " % t)
290  if say == 'y':
291  table_name = t
292  columns = self.get_table_column_names(t)
293  break
294  return table_name, columns
295 
296  def drop_columns(self, table, columns):
297 
298  cnames = self.get_table_column_names(table)
299  for name in columns:
300  cnames.remove(name)
301  names_txt = ", ".join(cnames)
302  sql_command = [
303  "CREATE TEMPORARY TABLE backup(%s);" % names_txt,
304  "INSERT INTO backup SELECT %s FROM %s" % (names_txt, table),
305  "DROP TABLE %s;" % table,
306  "CREATE TABLE %s(%s);" % (table, names_txt),
307  "INSERT INTO %s SELECT * FROM backup;" % table,
308  "DROP TABLE backup;",
309  ]
310  for command in sql_command:
311  log.debug(command)
312 # print command
313  self.cursor.execute(command)
314 
315 
316 def print_data(data, delimiter=" "):
317  """ Prints the data recovered from a database """
318  for row in data:
319  line = delimiter.join([str(x) for x in row])
320  print(line)
321 
322 
323 def write_data(data, output_file, delimiter=" "):
324  """writes data to a file. The output file is expected to be a python
325  file object """
326  w = csv.writer(output_file, delimiter=delimiter)
327  for row in data:
328  w.writerow(row)
329 
330 
331 def get_sql_type_name(data_type):
332  if data_type == int:
333  return "INT"
334  elif data_type == float:
335  return "DOUBLE"
336  elif data_type == str:
337  return (
338  # 10 is a random number, SQLITE does not chop strings
339  "VARCHAR(10)"
340  )
341 
342 
343 def open(fn_database):
344  db = Database2()
345  db.connect(fn_database)
346  return db
347 
348 
349 def read_data(fn_database, sql_command):
350  db = Database2()
351  db.connect(fn_database)
352  data = db.retrieve_data(sql_command)
353  db.close()
354  return data
355 
356 
358  """ Return indices that sort the list ls"""
359  pairs = sorted([(element, i) for i, element in enumerate(ls)])
360  indices = [p[1] for p in pairs]
361  return indices
362 
363 
364 def merge_databases(fns, fn_output, tbl):
365  """
366  Reads a table from a set of database files into a single file
367  Makes sure to reorder all column names if necessary before merging
368  """
369  # Get names and types of the columns from first database file
370  db = Database2()
371  db.connect(fns[0])
372  names = db.get_table_column_names(tbl)
373  types = db.get_table_types(tbl)
374  indices = get_sorting_indices(names)
375  sorted_names = [names[i] for i in indices]
376  sorted_types = [types[i] for i in indices]
377  log.info("Merging databases. Saving to %s", fn_output)
378  out_db = Database2()
379  out_db.create(fn_output, overwrite=True)
380  out_db.connect(fn_output)
381  out_db.create_table(tbl, sorted_names, sorted_types)
382  for fn in fns:
383  log.debug("Reading %s", fn)
384  db.connect(fn)
385  names = sorted(db.get_table_column_names(tbl))
386  they_are_sorted = ",".join(names)
387  log.debug("Retrieving %s", they_are_sorted)
388  sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl)
389  data = db.retrieve_data(sql_command)
390  out_db.store_data(tbl, data)
391  db.close()
392  out_db.close()
def get_table_column_names
Get the names of the columns for a given table.
Definition: database.py:231
def close
Closes the database.
Definition: database.py:196
Class to manage a SQL database built with sqlite3.
Definition: database.py:16
def get_table_types
Gets info about a table and returns all the types in it.
Definition: database.py:213
def get_sorting_indices
Return indices that sort the list ls.
Definition: database.py:357
def store_data
Inserts information in a given table of the database.
Definition: database.py:95
def get_condition_string
creates a condition applying each value to each field
Definition: database.py:202
def create_table
Creates a table.
Definition: database.py:48
def retrieve_data
Retrieves data from the database using the sql_command returns the records as a list of tuples...
Definition: database.py:117
def create
Creates a database by simply connecting to the file.
Definition: database.py:28
def print_data
Prints the data recovered from a database.
Definition: database.py:316
def get_table
Returns th fields requested from the table.
Definition: database.py:182
def drop_table
Delete a table if it exists.
Definition: database.py:64
def create_view
creates a view of the given table where the values are selected using the condition values...
Definition: database.py:145
def connect
Connects to the database in filename.
Definition: database.py:35
def add_column
Add a column to a table column - the name of the column.
Definition: database.py:246
def store_dataV1
Inserts information in a given table of the database.
Definition: database.py:75
def select_table
Prompt for tables so the user can choose one.
Definition: database.py:279
def merge_databases
Reads a table from a set of database files into a single file Makes sure to reorder all column names ...
Definition: database.py:364
def add_columns
Add columns to the database.
Definition: database.py:257
def drop_view
Removes a view from the database.
Definition: database.py:178
def update_data
updates the register in the table identified by the condition values for the condition fields ...
Definition: database.py:125
def check_if_is_connected
Checks if the class is connected to the database filename.
Definition: database.py:42
def write_data
writes data to a file.
Definition: database.py:323