IMP  2.1.0
The Integrative Modeling Platform
Database.py
1 """@namespace IMP.em2d.Database
2  Utility functions to manage SQL databases with sqlite3.
3 """
4 
5 import sqlite3 as sqlite
6 import os
7 import csv
8 import logging
9 
10 log = logging.getLogger("Database")
11 
12 
13 class Database2:
14  """ Class to manage a SQL database built with sqlite3 """
15 
16  def __init__(self):
17  # Connection to the database
18  self.connection = None
19  # Cursor of actions
20  self.cursor = None
21  # Dictionary of tablenames and types (used to convert values when storing data)
22 
23  def create(self,filename,overwrite=False):
24  """ Creates a database by simply connecting to the file """
25  log.info("Creating database")
26  if overwrite and os.path.exists(filename):
27  os.remove(filename)
28  sqlite.connect(filename)
29 
30  def connect(self,filename):
31  """ Connects to the database in filename """
32  if not os.path.isfile(filename):
33  raise IOError,"Database file not found: %s" % filename
34  self.connection = sqlite.connect(filename)
35  self.cursor = self.connection.cursor()
36 
38  """ Checks if the class is connected to the database filename """
39  if self.connection == None:
40  raise ValueError,"The database has not been created " \
41  "or connection not established "
42 
43  def create_table(self, table_name, column_names, column_types):
44  """ Creates a table. It expects a sorted dictionary
45  of (data_field,typename) entries """
46  log.info("Creating table %s",table_name)
48  sql_command = "CREATE TABLE %s (" % (table_name)
49  for name, data_type in zip(column_names, column_types):
50  sql_typename = get_sql_type_name(data_type)
51  sql_command += "%s %s," % (name, sql_typename)
52  # replace last comma for a parenthesis
53  n = len(sql_command)
54  sql_command = sql_command[0:n-1] + ")"
55  log.debug(sql_command)
56  self.cursor.execute(sql_command)
57  self.connection.commit()
58 
59  def drop_table(self, table_name):
60  """
61  Delete a table if it exists
62  """
63  log.info("Deleting table %s",table_name)
65  sql_command = "DROP TABLE IF EXISTS %s" % (table_name)
66  log.debug(sql_command)
67  self.cursor.execute(sql_command)
68  self.connection.commit()
69 
70  def store_dataV1(self,table_name,data):
71  """ Inserts information in a given table of the database.
72  The info must be a list of tuples containing as many values
73  as columns in the table
74  Conversion of values is done AUTOMATICALLY after checking the
75  types stored in the table
76  """
78  n = len(data[0]) # number of columns for each row inserted
79  tuple_format="("+"?,"*(n-1)+"?)"
80  sql_command="INSERT INTO %s VALUES %s " % (table_name, tuple_format)
81  # Fill the table with the info in the tuples
82  types = self.get_table_types(table_name)
83 # log.debug("Storing types: %s", types)
84  for x in data:
85 # log.debug("DATA %s", x)
86  # convert (applies the types stored in the table dictionary to each value in x
87  y = [apply_type(i) for i,apply_type in zip(x, types)]
88  self.cursor.execute(sql_command, y)
89  self.connection.commit()
90 
91  def store_data(self,table_name,data):
92  """ Inserts information in a given table of the database.
93  The info must be a list of tuples containing as many values
94  as columns in the table
95  Conversion of values is done AUTOMATICALLY after checking the
96  types stored in the table
97  """
98  if len(data) == 0:
99  log.warning("Inserting empty data")
100  return
101  self.check_if_is_connected()
102  n = len(data[0]) # number of columns for each row inserted
103  tuple_format="("+"?,"*(n-1)+"?)"
104  sql_command="INSERT INTO %s VALUES %s " % (table_name, tuple_format)
105  # Fill the table with the info in the tuples
106  types = self.get_table_types(table_name)
107 # log.debug("Storing types: %s", types)
108  for i in xrange(len(data)):
109  data[i] = [apply_type(d) for d,apply_type in zip(data[i], types)]
110  self.cursor.executemany(sql_command, data)
111  self.connection.commit()
112 
113  def retrieve_data(self,sql_command):
114  """ Retrieves data from the database using the sql_command
115  returns the records as a list of tuples"""
116  self.check_if_is_connected()
117  log.debug("Retrieving data: %s" % sql_command)
118  self.cursor.execute(sql_command)
119  return self.cursor.fetchall()
120 
121  def update_data(self, table_name,
122  updated_fields,
123  updated_values,
124  condition_fields,
125  condition_values):
126  """ updates the register in the table identified by the condition
127  values for the condition fields
128  """
129  self.check_if_is_connected()
130  sql_command = "UPDATE %s SET " % (table_name)
131  for field, value in zip(updated_fields, updated_values):
132  sql_command += "%s=%s," % (field, value)
133  sql_command = sql_command.rstrip(",") + " WHERE "
134  s = self.get_condition_string(condition_fields, condition_values)
135  sql_command = sql_command + s
136  #print sql_command
137  log.debug("Updating %s: %s",table_name, sql_command)
138  self.cursor.execute(sql_command)
139  self.connection.commit()
140 
141  def create_view(self,view_name,table_name,
142  condition_fields, condition_values):
143  """ creates a view of the given table where the values are selected
144  using the condition values. See the help for update_data()
145  """
146  try: # if this fails is because the view already exist
147  self.drop_view(view_name)
148  except:
149  pass
150  sql_command = 'CREATE VIEW %s AS SELECT * FROM %s WHERE ' % (view_name, table_name)
151  condition = self.get_condition_string(condition_fields, condition_values)
152  sql_command += condition
153  log.info("Creating view %s", sql_command)
154  self.cursor.execute(sql_command)
155 
156  def create_view_of_best_records(self, view_name, table_name, orderby, n_records):
157  try: # if this fails is because the view already exist
158  self.drop_view(view_name)
159  except:
160  pass
161  sql_command = """CREATE VIEW %s AS SELECT * FROM %s
162  ORDER BY %s ASC LIMIT %d """ % (view_name, table_name, orderby, n_records)
163  log.info("Creating view %s", sql_command)
164  self.cursor.execute(sql_command)
165 
166  def drop_view(self,view_name):
167  """ Removes a view from the database """
168  self.cursor.execute('DROP VIEW %s' % view_name)
169 
170  def get_table(self, table_name, fields=False, orderby=False):
171  """ Returns th fields requested from the table """
172  fields = self.get_fields_string(fields)
173  sql_command = "SELECT %s FROM %s " % (fields, table_name)
174  if orderby:
175  sql_command += " ORDER BY %s ASC" % orderby
176  data = self.retrieve_data(sql_command)
177  return data
178 
179  def get_fields_string(self, fields, field_delim=","):
180  if fields:
181  return field_delim.join(fields)
182  return "*"
183 
184  def close(self):
185  """ Closes the database """
186  self.check_if_is_connected()
187  self.cursor.close()
188  self.connection.close()
189 
190  def get_condition_string(self, fields, values):
191  """ creates a condition applying each value to each field
192  """
193  s = ""
194  for field,value in zip(fields,values):
195  s += "%s=%s AND " % (field, value)
196  # remove last AND
197  n = len(s)
198  s = s[0:n-5]
199  return s
200 
201  def get_table_types(self, name):
202  """
203  Gets info about a table and retuns all the types in it
204  """
205  self.check_if_is_connected()
206  sql_command = "PRAGMA table_info(%s)" % name
207  self.cursor.execute(sql_command)
208  info = self.cursor.fetchall()
209  types = []
210  for row in info:
211  if row[2] == "INT":
212  types.append(int)
213  elif row[2] == "DOUBLE":
214  types.append(float)
215  elif row[2][0:7] == "VARCHAR":
216  types.append(str)
217  return types
218 
219  def get_table_column_names(self, name):
220  """
221  Get the names of the columns for a given table
222  """
223  self.check_if_is_connected()
224  sql_command = "PRAGMA table_info(%s)" % name
225  self.cursor.execute(sql_command)
226  info = self.cursor.fetchall()
227  return [ row[1] for row in info]
228 
229  def execute_sql_command(self, sql_command):
230  self.check_if_is_connected()
231  self.cursor.execute(sql_command)
232  self.connection.commit()
233 
234 
235  def add_column(self,table,column, data_type):
236  """
237  Add a column to a table
238  column - the name of the column.
239  data_type - the type: int, float, str
240  """
241  sql_typename = get_sql_type_name(data_type)
242  sql_command = "ALTER TABLE %s ADD %s %s" % (table, column, sql_typename)
243  self.execute_sql_command(sql_command)
244 
245  def add_columns(self, table, names, types, check=True):
246  """
247  Add columns to the database. If check=True, columns with names
248  already in the database are skipped. If check=False no check
249  is done and trying to add a column that already exists will
250  raise and exception
251  """
252  col_names = self.get_table_column_names(table)
253  if check:
254  for name, dtype in zip(names, types):
255  if name not in col_names:
256  self.add_column(table, name, dtype)
257  else:
258  for name, dtype in zip(names, types):
259  self.add_column(table, name, dtype)
260 
261  def get_tables_names(self):
262  sql_command = """ SELECT tbl_name FROM sqlite_master """
263  data = self.retrieve_data(sql_command)
264  names = [d[0] for d in data]
265  return names
266 
267 
268  def select_table(self):
269  """
270  Prompt for tables so the user can choose one
271  """
272  table_name = ""
273  self.check_if_is_connected()
274  tables = self.get_tables_names()
275  for t in tables:
276  say = ''
277  while say not in ('n','y'):
278  say = raw_input("Use table %s (y/n) " % t)
279  if say == 'y':
280  table_name = t
281  columns = self.get_table_column_names(t)
282  break
283  return table_name, columns
284 
285 
286  def drop_columns(self, table, columns):
287 
288  cnames = self.get_table_column_names(table)
289  for name in columns:
290  cnames.remove(name)
291  names_txt = ", ".join(cnames)
292  sql_command = [
293  "CREATE TEMPORARY TABLE backup(%s);" % names_txt,
294  "INSERT INTO backup SELECT %s FROM %s" % (names_txt, table),
295  "DROP TABLE %s;" % table,
296  "CREATE TABLE %s(%s);" % (table, names_txt),
297  "INSERT INTO %s SELECT * FROM backup;" % table,
298  "DROP TABLE backup;",
299  ]
300  for command in sql_command:
301  log.debug(command)
302 # print command
303  self.cursor.execute(command)
304 
305 def print_data(data, delimiter=" "):
306  """ Prints the data recovered from a database """
307  for row in data:
308  line = delimiter.join([str(x) for x in row])
309  print line
310 
311 def write_data(data,output_file,delimiter=" "):
312  """writes data to a file. The output file is expected to be a python
313  file object """
314  w = csv.writer(output_file, delimiter=delimiter)
315  for row in data:
316  w.writerow(row)
317 
318 def get_sql_type_name(data_type):
319  if(data_type == int):
320  return "INT"
321  elif(data_type == float):
322  return "DOUBLE"
323  elif(data_type == str):
324  return "VARCHAR(10)" # 10 is a random number, SQLITE does not chop strings
325 
326 def open(fn_database):
327  db = Database2()
328  db.connect(fn_database)
329  return db
330 
331 def read_data(fn_database, sql_command):
332  db = Database2()
333  db.connect(fn_database)
334  data = db.retrieve_data(sql_command)
335  db.close()
336  return data
337 
339  """ Return indices that sort the list l """
340  pairs = [(element, i) for i,element in enumerate(l)]
341  pairs.sort()
342  indices = [p[1] for p in pairs]
343  return indices
344 
345 def merge_databases(fns, fn_output, tbl):
346  """
347  Reads a table from a set of database files into a single file
348  Makes sure to reorder all column names if neccesary before merging
349  """
350  # Get names and types of the columns from first database file
351  db = Database2()
352  db.connect(fns[0])
353  names = db.get_table_column_names(tbl)
354  types = db.get_table_types(tbl)
355  indices = get_sorting_indices(names)
356  sorted_names = [ names[i] for i in indices]
357  sorted_types = [ types[i] for i in indices]
358  log.info("Merging databases. Saving to %s", fn_output)
359  out_db = Database2()
360  out_db.create(fn_output, overwrite=True)
361  out_db.connect(fn_output)
362  out_db.create_table(tbl, sorted_names, sorted_types)
363  for fn in fns:
364  log.debug("Reading %s",fn)
365  db.connect(fn)
366  names = db.get_table_column_names(tbl)
367  names.sort()
368  they_are_sorted = ",".join(names)
369  log.debug("Retrieving %s", they_are_sorted)
370  sql_command = "SELECT %s FROM %s" % (they_are_sorted, tbl)
371  data = db.retrieve_data(sql_command)
372  out_db.store_data(tbl, data)
373  db.close()
374  out_db.close()
def connect
Connects to the database in filename.
Definition: Database.py:30
def check_if_is_connected
Checks if the class is connected to the database filename.
Definition: Database.py:37
def store_dataV1
Inserts information in a given table of the database.
Definition: Database.py:70
def get_table
Returns th fields requested from the table.
Definition: Database.py:170
def select_table
Prompt for tables so the user can choose one.
Definition: Database.py:268
def write_data
writes data to a file.
Definition: Database.py:311
def store_data
Inserts information in a given table of the database.
Definition: Database.py:91
def get_condition_string
creates a condition applying each value to each field
Definition: Database.py:190
def merge_databases
Reads a table from a set of database files into a single file Makes sure to reorder all column names ...
Definition: Database.py:345
def drop_view
Removes a view from the database.
Definition: Database.py:166
def add_column
Add a column to a table column - the name of the column.
Definition: Database.py:235
def retrieve_data
Retrieves data from the database using the sql_command returns the records as a list of tuples...
Definition: Database.py:113
def get_table_column_names
Get the names of the columns for a given table.
Definition: Database.py:219
def update_data
updates the register in the table identified by the condition values for the condition fields ...
Definition: Database.py:121
def print_data
Prints the data recovered from a database.
Definition: Database.py:305
def add_columns
Add columns to the database.
Definition: Database.py:245
def drop_table
Delete a table if it exists.
Definition: Database.py:59
def close
Closes the database.
Definition: Database.py:184
def get_sorting_indices
Return indices that sort the list l.
Definition: Database.py:338
Class to manage a SQL database built with sqlite3.
Definition: Database.py:13
def create_table
Creates a table.
Definition: Database.py:43
def create_view
creates a view of the given table where the values are selected using the condition values...
Definition: Database.py:141
def create
Creates a database by simply connecting to the file.
Definition: Database.py:23
def get_table_types
Gets info about a table and retuns all the types in it.
Definition: Database.py:201