#!/usr/bin/python import psycopg2, psycopg2.extras import psycopg2.extensions psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) psycopg2.extensions.register_type(psycopg2.extensions.UNICODEARRAY) import re import time import datetime from datetime import datetime import sys import os DB_DEBUG = False #DB_DEBUG = True ############################################################################## db = psycopg2.connect("dbname=nsalog user=nsabot") def close_db(): db.close() def exec_db(query, args=()): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) if (DB_DEBUG): print "query: '{0}'".format(query) if (DB_DEBUG): print "args: '{0}'".format(args) if (DB_DEBUG): print "EXEC:" cur.execute(query, args) def rollback_db(): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) cur.execute("ROLLBACK") db.commit() def commit_db(): cur = db.cursor(cursor_factory=psycopg2.extras.RealDictCursor) db.commit() ############################################################################## # Eat individual line of a Phf-style log dump def eat_logline(line, chan, era): match = re.search("(\d+)\;(\d+)\;([^;]+)\;(.*$)", line) if match: g = match.groups() self_speak = False try: idx = int(g[0]) # Serial Number of Log Line time = int(g[1]) # Unix Epochal Time of Log Line except Exception, e: print("Malformed Line! '" + line +"' ! : " + e) close_db() exit(1) speaker = g[2] # Name of Speaker payload = g[3] # Payload (remainder of line) ## If spoken line is of form "* user ..." : if speaker == "*": spl = payload.split(' ', 1) speaker = spl[0] payload = spl[1] self_speak = True ## Put in DB: try: exec_db('''insert into loglines (idx, t, chan, era, speaker, self, payload) values (%s, %s, %s, %s, %s, %s, %s) ; ''', [int(idx), datetime.fromtimestamp(time), str(chan), int(era), str(speaker), bool(self_speak), str(payload)]) commit_db() except psycopg2.IntegrityError as e: rollback_db() print "Dupe Ignored, Idx=", idx else: print("Malformed Line! '" + line +"' !") close_db() exit(1) # Eat Phf-style log dump at given path def eat_dump(path, chan, era): with open(path) as fp: for line in fp: eat_logline(line, chan, era) ############################################################################## if (len(sys.argv) == 4): logdump = sys.argv[1] # Path to Phf-style log dump chan = sys.argv[2] # Chan Name era = sys.argv[3] # Era (integer) # Eat: eat_dump(logdump, chan, era) close_db() else: print "Usage: ./eat_dump LOGFILE CHAN ERA" exit(0)