""" Module : csvDump.py License : BSD License (see LICENSE.txt) This module dumps the contents of a database table to a csv file Arguments; 1 - database to connect to (Oracle, Sqlite, MySQL) 2 - db connection string 3 - filename to output to 4 - table name This module requires Python 2.3 or greater as it makes use of generators and the csv library module. To get round the problem of large tables, I've utilised the ResultIter function from the Python Cookbook (http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/137270). Many thanks to Christopher Prinos for the marvelous code. """ __version__ = (1, 2, 0) __date__ = (2006, 4, 13) __author__ = "Andy Todd " import sys, getopt, csv from utilities.Log import get_log def resultIter(cursor, arraySize=1000, log=None): "An iterator that uses fetchmany to keep memory usage down" done = False iterationCount = 0 while not done: results = cursor.fetchmany(arraySize) iterationCount += 1 if not results: done = True # log.debug("Fetched %d rows" % (iterationCount*arraySize)) for result in results: yield result def get_conn(dbType, connectionString): if dbType == 'Sqlite': from pysqlite2 import dbapi2 as db elif dbType == 'Oracle': import cx_Oracle as db elif dbType == 'MySQL': import MySQLdb as db return db.connect(connectionString) def dump_to_file(results, fileName, headerRow=None, log=None): if not log: log = get_log() outputFile = open(fileName, "w") # The next two lines added quoting to the output (non-number) data csvDialect = csv.get_dialect('excel') csvDialect.quoting = csv.QUOTE_NONNUMERIC csvWriter = csv.writer(outputFile, csvDialect) if headerRow: csvWriter.writerow(headerRow) log.info("Created file %s and written heading row" % fileName) for row in results: csvWriter.writerow(row) outputFile.close() log.info("Finished dumping to file %s" % fileName) def dump_statement(connection, fileName, statement, log=None): "Execute statement against connection and write the results to fileName as csv" cursor = connection.cursor() cursor.execute(statement) columns = [col[0] for col in cursor.description] dump_to_file(resultIter(cursor, log=log), fileName, columns, log) def dump(connection, fileName, tableName, whereClause=None, columnList=None, log=None): if not log: log = get_log() log.debug("Creating cursor object") cursor = connection.cursor() stmt = "SELECT " if columnList: stmt += ','.join(columnList) else: stmt += "*" stmt += " FROM %s " % tableName if whereClause: stmt += "WHERE " + whereClause log.debug("Executing %s" % stmt) cursor.execute(stmt) columns = [col[0] for col in cursor.description] dump_to_file(resultIter(cursor, log=log), fileName, columns, log) def main(argv=None): if argv is None: argv = sys.argv # parse command line options try: opts, args = getopt.getopt(argv[1:], "dh", ["debug", "help",]) except getopt.error, msg: print msg print "for help use --help" return 2 # process options skip = False check = False log = None for o, a in opts: if o in ("-h", "--help"): print __doc__ return 0 if o in ("-d", "--debug"): log = get_log(level='DEBUG') # process arguments if not log: log = get_log() log.debug("database %s" % args[0]) log.debug("connection string %s" % args[1]) log.debug("file name %s" % args[2]) log.debug("table name %s" % args[3]) connection = get_conn(args[0], args[1]) dump(connection, args[2], args[3], log) if __name__ == "__main__": sys.exit(main())