Loading Fact Tables with PyGramETL

The previous post described how to load dimension tables with PyGramETL. This one is about fact tables.

The source CVS file has the following structure:

cust_id;res_date;future_guests;future_revenue
105;2008/04/01 00:00:00.000;2;300
106;2007/04/17 00:00:00.000;4;380
106;2007/04/17 00:00:00.000;4;768
...

Fact Table

CREATE TABLE reservation_fact(
    customer_sk INT,
    reservation_date DATETIME,
    future_guests FLOAT,
    future_revenue FLOAT
)

Kettle

PyGramETL

import MySQLdb
import pygrametl
from pygrametl.datasources import CSVSource
from pygrametl.tables import SlowlyChangingDimension, BatchFactTable

myconn = MySQLdb.connect(user='test', passwd='test', db='test')

connection = pygrametl.ConnectionWrapper(myconn)
connection.setasdefault()

customer_dim = SlowlyChangingDimension(
    name='customer_dim',
    key='customer_sk',
    attributes=['customer_id', 'valid_from', 'valid_to', 'version',
                'first_name', 'last_name', 'age', 'phone_number', 'address'],
    lookupatts=['customer_id'],
    versionatt='version',
    fromatt='valid_from',
    toatt='valid_to',
    type1atts=('first_name', 'last_name'),
    cachesize=-1)

customer_dim.defaultidvalue = 0

reservation_fact = BatchFactTable(
    name='reservation_fact',
    keyrefs=['customer_sk'],
    measures=['reservation_date', 'future_guests', 'future_revenue'])

def load_customer_dim():
    customer = CSVSource(file('./customer.txt', 'r', 16384), delimiter=';')
    for row in customer:
        customer_dim.scdensure(row)
    connection.commit()

def load_reservation_fact():
    reservation = CSVSource(file('./reservation.txt', 'r', 16384), delimiter=';')
    for row in reservation:
        row['customer_sk'] = customer_dim.lookup(row, {'customer_id':'cust_id'})
        row['reservation_date'] = row['res_date']
        reservation_fact.insert(row)
    connection.commit()

def main():
    #load_customer_dim()
    load_reservation_fact()

if __name__ == '__main__':
    main()