#!/usr/bin/python
"""
A module to take a data model from specification in an Excel spreadsheet and turn
it into DB2 ddl scripts.

This program ONLY works on Windows dummy.

The ddl scripts produced can be run from a DOS window (or AIX shell) with the 
command;

  c:\>db2 -f <script file> -t

Our general approach is to take each worksheet in a workbook and turn its contents
into a seperate ddl file. Each worksheet can contain the definitions of one or
more tables. (Or possible none, but we are ignoring that case for the moment).

On a worksheet we identify the starting row of a new table by the presence of
'TABLE' in column A of the row and the end of a table by the presence
of ')' in column A.

Valid command line options (argv to main) are;
  -s spreadsheet-file, --sheet=spreadsheet-file
      use spreadsheet-file as the source dictionary
  -o output-directory, --output=output-directory
      place the generated DDL scripts in output-directory
  -d, --debug
      change the logging level so that debug messages are recorded
  -w sheet-name, --worksheet sheet-name
      generate output only for the sheet named sheet-name

NB the format of the main() function comes from Guido's blog post of 15.05.2003
(http://www.artima.com/weblogs/viewpost.jsp?thread=4829)

Future versions of this script may use my Schema class (halfcooked.utilities.Schema) as an intermediate format. This will make adapting this code for multiple back ends a lot easier.
"""
__author__ = "Andy Todd <andy.todd@javelingroup.com>"
__date__ = "$Date: $"
__version__ = "$Revision: $"

import os
import sys
import getopt
import win32com.client

# A little light logging. 
from halfcooked.utilities import Log
log=Log.log('schemaGen', 'schemaGen.log')

SPREADSHEET="C:\\Home\\Andy\\ExpressGifts\\DataModel\\DataDictionaryDetail v0.5.xls"
OUTPUTDIR="C:\\\Work\\Projects\\EGL\\SQL"
FILEEXTENSION=".db2"
DEFAULTSCHEMA="EGL"
SCHEMAVERSION=('0','0','5')
FILEHEADER="""----------------------------------------------------------------------
-- DDL Script for EGL QH&WSS Websphere Application
--
-- File    : %FILENAME%
-- Author  : $Author$
-- Date    : $Date$
-- Version : $Revision$
--
----------------------------------------------------------------------
"""

class Table:
    def __init__(self, name, worksheet, shortName=None):
        self.name=name
        self.schema=None
        self.worksheet=worksheet
        self.shortName=shortName
        self.columns=[]
        self.pkColumns=[]
        self.foreignKeys={}

class Column:
    def __init__(self, name):
        self.name=name

class ForeignKey:
    # Dummy class to hold our foreign key details
    def __init__(self, name, table):
        self.name=name
        self.table=table
        self.schema=None # The schema of the target table, natch
        self.sourceCols=[]
        self.targetCols=[]

class Spreadsheet:
    def __init__(self, file, outputDir):
        "Open the file and create an object to reference it"
        self._outputDir=outputDir
        excel=win32com.client.Dispatch("Excel.Application")
        self._workbook=excel.Workbooks.Open(file)
        log.info("workbook=%s" % self._workbook.Name)
        self.worksheets=self._getWorksheets()
        self.tables={}
        self.capture() # Should probably be optional

    def close(self):
        self._workbook.Close()

    def _getWorksheets(self):
        """Return a mapping of sheet names and worksheets
        
        This enables us to refer to individual worksheets by name, not just index
        """
        worksheets={}
        for ws in self._workbook.Worksheets:
            if ws.Name not in ['Notes', 'empty']:
                worksheets[ws.Name]=ws
        return worksheets

    def capture(self):
        "Capture the details from the spreadsheet"
        for worksheet in self.worksheets.keys():
            cells=self.worksheets[worksheet].Cells
            # Start at A3
            yPos=3
            while cells(yPos, 1).Text.find("TABLE") != -1:
                # we have a new table, get the name from column 8 - BODGE!
                table=Table(cells(yPos, 2).Text, worksheet, cells(yPos, 10).Text)
                # the schema name is on the next line
                yPos+=1
                if cells(yPos, 1).Text.find("SCHEMA") != -1:
                    table.schema=cells(yPos, 2).Text
                else:
                    table.schema=DEFAULTSCHEMA
                # Foreign key definitions (may) start on the next line
                yPos+=1
                while cells(yPos,1).Text.find("FK") != -1:
                    # We have at least one foreign key
                    fk=ForeignKey(cells(yPos,2).Text, cells(yPos,3).Text)
                    table.foreignKeys[fk.name]=fk
                    yPos+=1
                newCols=1
                while newCols:
                    yPos+=1
                    if cells(yPos, 1).Text == ")":
                        # We've reached the end of this table definition
                        newCols=0
                    else:
                        column=Column(cells(yPos,2).Text)
                        column.dataType=cells(yPos,3).Text
                        column.nullable=cells(yPos,4).Text
                        if cells(yPos,5).Text != "":           # Default
                            column.defaultValue=cells(yPos,5).Text
                        if cells(yPos,6).Text!="":
                            table.pkColumns.append(column.name)
                        if cells(yPos,11).Text != "":         # Sequence
                            column.sequence='Y'
                        else:
                            column.sequence=''
                        fkString=cells(yPos,7).Text
                        # A column can be in more than one foreign key, if this is
                        # the case then the names of the keys should be seperated
                        # by a comma ","
                        if fkString!="":
                            for fkName in fkString.split(","):
                                if fkName in table.foreignKeys.keys():
                                    table.foreignKeys[fkName].sourceCols.append(column.name)
                                    table.foreignKeys[fkName].targetCols.append(cells(yPos,8).Text)
                                else:
                                    log.error("I do not have a foreign key called %s for the table %s" % ( fkName, table.name ))
                        table.columns.append(column)
                yPos+=2 # To take us to the next table definition
                self.tables[table.name]=table

    def generate(self, worksheet=None):
        "Generate the DDL files from the spreadsheet, or for <worksheet>"
        if not worksheet:
            for ws in self.worksheets.keys():
                if (ws != "empty" and ws != "Notes"): # BODGE!
                    self.generate(ws)
        else:
            log.info("Generating worksheet %s" % worksheet)
            filename=os.path.join(self._outputDir, worksheet+"_tab"+FILEEXTENSION)
            consname=os.path.join(self._outputDir, worksheet+"_con"+FILEEXTENSION)
            log.debug("Generating into files %s and %s" % (filename, consname))
            ddlFile=file(filename, "w")
            conFile=file(consname, "w")
            ddlFile.write(FILEHEADER.replace('%FILENAME%', os.path.split(filename)[1]))
            conFile.write(FILEHEADER.replace('%FILENAME%', os.path.split(consname)[1]))
            ddlFile.write("CONNECT TO EGL;\n\n")
            conFile.write("CONNECT TO EGL;\n\n")
            for table in self.tables.values():
                if table.worksheet==worksheet:
                    # Output the table
                    log.info("Building table %s" % table.name)
                    tableStr="CREATE TABLE "+table.schema+"."+table.name+"\n  (\n"
                    seqStr=""
                    commaStr="     "
                    for column in table.columns:
                        tableStr+=commaStr+"%-30s" % column.name
                        tableStr+=" %-15s" % column.dataType
                        tableStr+=" "+column.nullable
                        if hasattr(column, 'defaultValue'):
                            tableStr+=" WITH DEFAULT "+column.defaultValue
                        tableStr+="\n"
                        commaStr="    ,"
                        if column.sequence:
                            seqStr+="CREATE SEQUENCE "+table.schema+"."+column.name+"_seq START WITH 1;\n"
                    if len(table.pkColumns) > 0:
                        tableStr+="    ,CONSTRAINT "+table.shortName+"_pk PRIMARY KEY ("
                        tableStr+=",".join(table.pkColumns)+")\n"
                    for fk in table.foreignKeys.values():
                        if fk.table in self.tables.keys():
                            fkStr="ALTER TABLE "+table.schema+"."+table.name
                            fkStr+=" ADD CONSTRAINT "+fk.name+"\n"
                            fkStr+="    FOREIGN KEY ("+",".join(fk.sourceCols)
                            fkStr+=")\n    REFERENCES "
                            fkStr+=self.tables[fk.table].schema+"."+fk.table+" ("
                            fkStr+=",".join(fk.targetCols)+");\n"
                            conFile.write(fkStr)
                        else:
                            log.error("Unknown foreign key table %s from %s" % (fk.table, table.name))
                    tableStr+="  );\n\n"
                    log.debug("%s : %s" % (filename, tableStr))
                    ddlFile.write(tableStr)
                    ddlFile.write(seqStr)
            if worksheet=="Configuration":
                ddlString="INSERT INTO conf.schema_controls\n  (schema_name, version_major, version_minor, version_patch, version_notes)\nVALUES\n  ('" +DEFAULTSCHEMA+"',"+",".join(SCHEMAVERSION) + ", 'Initial Schema');\n"
                ddlFile.write(ddlString)
            ddlFile.write("-- End of File --\n")
            ddlFile.close()

def main(argv=None):
    if argv is None:
        argv = sys.argv
    try:
        opts, args = getopt.getopt(argv[1:], "dihs:o:w:", ["debug", "info", "help", "sheet", "output", "worksheet"])
    except getopt.error, msg:
        print msg
        print "for help use --help"
        return 2
    # Set defaults
    spreadsheetFile=SPREADSHEET
    outputDir=OUTPUTDIR
    allWorksheets=1
    for o, a in opts:
        if o in ("-h", "--help"):
            print __doc__
            return 0
        if o in ("-s", "--sheet"):
            spreadsheetFile=a
        if o in ("-o", "--output"):
            outputDir=a
        if o in ("-d", "--debug"):
            log.setLevel(Log.logging.DEBUG)
        elif o in ("-i", "--info"):
            log.setLevel(Log.logging.INFO)
        if o in ("-w", "--worksheet"):
            allWorksheets=0
            worksheet=a
    log.info("spreadsheetFile=%s" % spreadsheetFile)
    log.info("outputDir=%s" % outputDir)
    mySheet = Spreadsheet(spreadsheetFile, outputDir)
    if allWorksheets:
        mySheet.generate()
    else:
        mySheet.generate(worksheet)
    mySheet.close()
    return 0 # Not strictly necessary

if __name__ == "__main__":
    sys.exit(main())
