[svn-commits] r178 - in branches/advisor/src/front: . analyzedb analyzedb/iianalyze
thial01 at ingres.com
thial01 at ingres.com
Thu Aug 7 01:58:05 PDT 2008
Author: thial01
Date: 2008-08-07 01:58:05 -0700 (Thu, 07 Aug 2008)
New Revision: 178
Added:
branches/advisor/src/front/analyzedb/
branches/advisor/src/front/analyzedb/analyzedb
branches/advisor/src/front/analyzedb/iianalyze/
branches/advisor/src/front/analyzedb/iianalyze/Analyze.py
branches/advisor/src/front/analyzedb/iianalyze/Config.py
branches/advisor/src/front/analyzedb/iianalyze/Containers.py
branches/advisor/src/front/analyzedb/iianalyze/Db.py
branches/advisor/src/front/analyzedb/iianalyze/Draw.py
branches/advisor/src/front/analyzedb/iianalyze/Present.py
branches/advisor/src/front/analyzedb/iianalyze/__init__.py
branches/advisor/src/front/analyzedb/iianalyze/functions.py
Log:
Adding the analyzer client - see #174
Added: branches/advisor/src/front/analyzedb/analyzedb
===================================================================
--- branches/advisor/src/front/analyzedb/analyzedb (rev 0)
+++ branches/advisor/src/front/analyzedb/analyzedb 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+import sys
+import pprint
+import string
+import os
+import getopt
+try:
+ """ Adding import path """
+ II_SYSTEM = os.path.join(os.getenv("II_SYSTEM"))
+ sys.path.insert(0, (II_SYSTEM, "ingres", "utility", "iianalyze"))
+except:
+ print "II_SYSTEM is not set"
+ sys.exit(1)
+
+from iianalyze.Config import *
+from iianalyze.Analyze import *
+from iianalyze.Present import *
+from iianalyze.functions import *
+
+config.outputfolder = os.path.join(II_SYSTEM, "files", "analyzer")
+
+config.vnode = "vm3"
+config.outputfolder = "./tmp"
+config.querylimit = 10
+
+def usage():
+ output("""
+Usage:
+ analyzedb [options] dbname
+
+ dbname the name of the database
+
+ options:
+ -h, --help this screen
+
+ -d, --debug run in verbose mode
+
+ -l n, --limit=n limits the number of queries
+ to analyse
+ (default: -1 = all)
+
+ -t n, --timeout=n sets the joinop timeout in seconds
+ (default: 300 seconds)
+
+ -o f, --output=folder the folder where the result will
+ be placed in
+ (default: II_SYSTEM/files/analyser)
+
+ -v v, --vnode=vnode vnode used for the connection
+ (default: (local))
+""")
+
+""" Main function """
+def main():
+
+ output("Ingres Design Analyzer\n")
+
+ if len(sys.argv) == 0:
+ usage()
+ error("Database name is missing")
+
+ config.userdb = sys.argv[len(sys.argv)-1]
+
+ argv = sys.argv[1:len(sys.argv)-1]
+
+ try:
+ opts, args = getopt.gnu_getopt(argv, "hdl:t:o:v:", ["help", "debug", "limit=", "timeout=", "output=", "vnode="])
+ except:
+ usage()
+ error("Invalid arguments given")
+
+ for opt, arg in opts:
+ if opt in ("-h", "--help"):
+ usage()
+ sys.exit(0)
+ elif opt in ("-d", "--debug"):
+ config.verbose = True
+ debug("Running in verbose mode")
+ elif opt in ("-l", "--limit"):
+ try:
+ config.querylimit = int(arg)
+ except:
+ error("Limit needs a number as value")
+ elif opt in ("-t", "--timeout"):
+ try:
+ config.timeout = int(arg) * 1000
+ except:
+ error("Timeout needs a number as value")
+ elif opt in ("-v", "--vnode"):
+ config.vnode = arg
+
+
+ sys.exit(0)
+
+ analyze = Analyze(config)
+ analyze.start()
+
+ results = analyze.results()
+
+ present = Present(results)
+ present.start()
+
+
+main()
+
+
Property changes on: branches/advisor/src/front/analyzedb/analyzedb
___________________________________________________________________
Name: svn:executable
+ *
Added: branches/advisor/src/front/analyzedb/iianalyze/Analyze.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Analyze.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Analyze.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,503 @@
+import time
+from iianalyze.Db import *
+from iianalyze.functions import *
+from iianalyze.Containers import *
+
+""" Analyze class """
+class Analyze():
+
+ __recommendations = []
+ __statements = []
+ __indexes = []
+ __attributes = []
+ __tables = []
+ __statistics = False
+ __overall_cost = [-1, -1]
+ __avg_win = []
+ __userdb = False
+ __imadb= False
+ __workloaddb = False
+ __time = [0, 0]
+ __workloadsize = 0
+
+ def __init__(self, config):
+ self.__config = config
+
+ def start(self):
+
+ """ Start time """
+ self.__time[0] = time.ctime()
+
+ self.__userdb = DB(self.__config, self.__config.userdb)
+ self.__imadb = DB(self.__config, "imadb")
+ self.__workloaddb = DB(self.__config, "workloaddb", True)
+
+ """ Tell the DBMS we're going to test now """
+ self.__userdb.execute("set trace point sc7")
+
+
+ """ Set the joinop timeout to a high value to give OPF enough time to find better plans """
+ self.__userdb.execute("set joinop timeout %d" % config.timeout)
+
+ debug("## Start Analyzing...\n")
+
+ self.__load_indexes()
+ self.__load_attributes()
+ self.__load_tables()
+ self.__load_statements()
+
+ for statement in self.__statements:
+ self.__process_statement(statement)
+
+ """ Get the cost of the new configuration """
+ self.__test_configuration()
+
+ """ Find indexes we may want to drop """
+ debug(" ## Find unused indexes")
+ for index in self.__indexes:
+ if index.frequency == 0:
+ for attr in self.__attributes:
+ if attr.table_id == index.table_id and attr.id == index.attribute_id:
+ self.__recommend(index, "drop_index", (index.name, attr.table_name, attr.name))
+ break
+
+ self.__process_statistics()
+
+ """ End time """
+ self.__time[1] = time.ctime()
+
+
+ def __load_indexes(self):
+
+ """ Get all indexes """
+ results = self.__workloaddb.execute("select \
+ i.index_id, i.name, i.table_id, i.attribute_id, i.frequency, i.structure, i.data_pages, i.overflow_pages \
+ from indexes i where i.database = ?",
+ (self.__config.userdb,))
+
+ for row in results:
+ self.__indexes.append(Index(row))
+
+
+ def __load_attributes(self):
+
+ """ Get all attributes """
+ results = self.__workloaddb.execute("select \
+ a.attribute_id, a.name, a.table_id, a.frequency, a.statistics, t.name \
+ from attributes a, tables t where a.table_id = t.table_id and a.database = t.database and a.database = ?",
+ (self.__config.userdb,) )
+
+ for row in results:
+ self.__attributes.append(Attribute(row, self.__indexes))
+
+
+ def __load_tables(self):
+
+ """ Get all tables """
+ results = self.__workloaddb.execute("select \
+ t.table_id, t.name, t.frequency, t.est_cpu, t.act_cpu, t.est_dio, t.act_dio, \
+ t.est_tup, t.act_tup, t.structure, t.data_pages, t.overflow_pages \
+ from tables t where t.database = ?",
+ (self.__config.userdb,) )
+
+ for row in results:
+ self.__tables.append(Table(row, self.__attributes))
+
+
+ def __load_statements(self):
+
+
+ """ Get statements - most frequent and expensive ones first """
+ results = self.__workloaddb.execute("select \
+ query_key, query_text, frequency, opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time integer4 \
+ from statements where database = ? order by (frequency * (opf_cpu + opf_dio + qef_cpu + qef_dio)) desc",
+ (self.__config.userdb,) )
+
+ self.__workloadsize = len(results)
+
+ count = 1
+ for row in results:
+
+ if self.__config.querylimit != -1 and count > self.__config.querylimit:
+ debug("Limit of queries to process reached - stopping")
+ break
+
+ """ Try to find out if this is a select """
+ if (row[1].lstrip().lower())[0:6] == "select":
+ count += 1
+ stmt = Statement(row)
+ self.__load_references(stmt)
+ self.__statements.append(stmt)
+
+
+ def __load_references(self, statement):
+
+ debug("\n\n## Statement %d: %s\n" % (statement.query_key, statement.query_text))
+
+ debug(" ## Tables:")
+
+ """ Get all tables used in this statement """
+ results = self.__workloaddb.execute("select \
+ t.table_id from tables t, references r where t.table_id = r.object_id and r.object_type = 0 \
+ and r.query_key = ? and r.database = ? and t.database = r.database \
+ order by t.frequency desc",
+ (statement.query_key, self.__config.userdb))
+
+ table_references = []
+ for row in results:
+
+ """ Find the corresponding table object """
+ for table in self.__tables:
+ if table.id == row[0]:
+ break
+
+ debug(" %s" % table.name)
+
+ debug(" ## Attributes:")
+
+ """ Get all attributes for this table that were used in this statement """
+ results1 = self.__workloaddb.execute("select \
+ a.attribute_id, a.table_id from attributes a, references r where a.table_id = ? \
+ and a.table_id = r.table_id and a.attribute_id = r.object_id and r.object_type = 1 \
+ and r.query_key = ? and r.database = ? and a.database = r.database \
+ order by a.frequency desc",
+ (table.id, statement.query_key, self.__config.userdb))
+
+ attr_list = []
+ for row1 in results1:
+ for attribute in self.__attributes:
+ """ Find the corresponding attribute object """
+ if attribute.id == row1[0] and attribute.table_id == row1[1]:
+ debug(" %s" % attribute.name)
+ attr_list.append(attribute)
+ break
+
+ table_references.append([table, attr_list])
+
+ """ Get indexes used in this statement """
+ results = self.__workloaddb.execute("select \
+ i.index_id from indexes i, references r where i.index_id = r.object_id and r.object_type = 2 and r.query_key = ? \
+ and r.database = ? and i.database = r.database and i.frequency > 0 order by i.frequency desc",
+ (statement.query_key, self.__config.userdb))
+
+ debug(" ## Used indexes:")
+
+ index_references = []
+ for row in results:
+ """ Find the corresponding index object """
+ for index in self.__indexes:
+ if index.id == row[0]:
+ index_references.append(index)
+ debug(" %s" % index.name)
+
+ statement.attach_references(table_references, index_references)
+
+
+ def __process_statement(self, statement):
+
+ test_indexes = []
+
+ debug(" ## Analyzing Query: %d" % statement.query_key)
+
+ if len(statement.indexes) == 0:
+ self.__recommend(statement, "no_indexes_used", ())
+
+ """ OPF cost more than 50% of QEF cost """
+ pct = 100 * (statement.opf_cpu + statement.opf_dio + 1) / \
+ (statement.opf_cpu + statement.opf_dio + statement.qef_cpu + statement.qef_dio + 1)
+ if pct > 50:
+ self.__recommend(statement, "high_opf_cost", (pct))
+
+ """ Estimates wrong - no or outdated statistics? """
+ pct = 100 - (100 * (statement.est_cpu + statement.est_dio + 1) / \
+ (statement.opf_cpu + statement.opf_dio + statement.qef_cpu + statement.qef_dio + 1))
+ if pct > 20 or pct < -20:
+ self.__recommend(statement, "wrong_estimates", (pct))
+
+ for i in statement.tables:
+ table = i[0]
+ need_statistics = False
+
+ """ More than 10% overflow pages """
+ if table.overflow_pages > table.data_pages * 10 / 100:
+ self.__recommend(table, "overflow_pages", (table.name))
+
+
+ for attribute in table.attributes:
+
+ """ No statistics """
+ if attribute.statistics == 0:
+ need_statistics = True
+
+ """ More than one index on one attribute """
+ if len(attribute.indexes) > 1:
+ self.__recommend(attribute, "duplicate_index", (table.name, attribute.name))
+
+ if need_statistics:
+ self.__recommend(table, "missing_statistics", (table.name))
+
+
+ """ Indexes on half of all attributes """
+ max_idx_count = len(table.attributes) / 2
+ if max_idx_count < 5:
+ max_idx_count = 5
+ idx_count = 0
+ """ Count the existing indexes on this table """
+ for attribute in table.attributes:
+ if len(attribute.indexes) > 0:
+ idx_count += 1
+
+ if idx_count > max_idx_count:
+ self.__recommend(table, "too_many_indexes", (table.name, idx_count, len(table.attributes)))
+
+ """ Recommend new indexes on the used attributes """
+ for attribute in i[1]:
+ if idx_count < max_idx_count:
+ if len(attribute.indexes) == 0:
+ found = False
+ for attr in test_indexes:
+ if id(attr) == id(attribute):
+ """ We recommended this before """
+ found = True
+ if not found:
+ idx_count += 1
+ test_indexes.append(attribute)
+
+
+ """ We collected index recommendations - let's test them """
+ if len(test_indexes):
+ debug(" Start testing index recommendations")
+
+ i = 0
+ best_co = [-1, []]
+ for attribute in test_indexes:
+ i += 1
+ idx_name = "virt_idx__%d" % i
+ debug(" Creating virtual index %d on %s(%s)" % (i, attribute.table_name, attribute.name) )
+ try:
+ self.__userdb.execute("create virtual index %s on %s(%s) with structure = btree" % (idx_name, attribute.table_name, attribute.name))
+ except:
+ debug(" Error while creating virtual index - removed from list!")
+ test_indexes[i-1] = False
+
+ debug(" Runnging query")
+ rows = self.__userdb.execute(statement.query_text)
+ debug(" Evaluating query cost")
+ result = self.__imadb.execute("select cpu, dio, pages_touched, vindexes from ima_scm_analyze")
+ cpu = result[0][0]
+ dio = result[0][1]
+ newcost = cpu + dio
+ oldcost = statement.est_cpu + statement.est_dio
+ debug(" old %d - new %d" % (oldcost, newcost))
+ """ Better plan found """
+ if newcost < oldcost:
+ debug(" New plan is better")
+ best_co[0] = newcost
+ """ Split strings of used virtual indexes into list """
+ best_co[1] = result[0][3].split(" ")
+
+ self.__userdb.rollback()
+
+ if best_co[0] > 0:
+ for idx in best_co[1]:
+ if len(idx):
+ attribute = test_indexes[(int(idx[10:])-1)]
+ if attribute:
+ self.__recommend(attribute, "new_index", (attribute.table_name, attribute.name))
+
+
+ def __test_configuration(self):
+ debug("## Testing new configuration")
+
+ """ Reset joinop timeout to default """
+ self.__userdb.execute("set joinop timeout")
+
+ costs = []
+ i = 0
+ """ Get all recommended indexes and create them """
+ for rec in self.__recommendations:
+ if rec.msg == "new_index":
+ attr = rec.object
+ i += 1
+ idx_name = "virt_idx__%d" % i
+ debug(" Creating virtual index %d on %s(%s)" % (i, attr.table_name, attr.name) )
+ self.__userdb.execute("create virtual index %s on %s(%s) with structure = btree" % (idx_name, attr.table_name, attr.name))
+
+ """ Re-run all queries to get their new cost with the final index set """
+ debug(" ## Running workload")
+ for statement in self.__statements:
+ debug(" Query: %d" % statement.query_key)
+ try:
+ self.__userdb.execute(statement.query_text)
+ except:
+ debug(" Error running query - skipped")
+ continue
+ result = self.__imadb.execute("select cpu, dio, pages_touched, vindexes from ima_scm_analyze")
+ cpu = result[0][0]
+ dio = result[0][1]
+ newcost = cpu + dio
+ statement.newcost = newcost
+ oldcost = statement.est_cpu + statement.est_dio
+ win = 100 - (100 * (newcost+1) / (oldcost+1))
+ self.__avg_win.append(win)
+ debug(" %d %% old %d new %d" % (win, oldcost, newcost))
+ costs.append([statement.query_key, oldcost, newcost])
+
+ self.__userdb.rollback()
+
+ oldcost = 0
+ newcost = 0
+ for cost in costs:
+ oldcost += cost[1]
+ newcost += cost[2]
+
+ self.__overall_cost[0] = oldcost
+ self.__overall_cost[1] = newcost
+
+
+ def __process_statistics(self):
+
+ self.__statistics = Statistics()
+
+ """ Get statistical data """
+ results = self.__workloaddb.execute("select \
+ time, current_connections, current_sessions, max_sessions, total_rows, selects_processed, \
+ locks_per_tx, max_locks, locks_used, deadlocks, escalated_locks, lock_wait \
+ from statistics order by time")
+
+ last_row = False
+ for row in results:
+ self.__statistics.current_connections.append([row[0], row[1]])
+ self.__statistics.current_sessions.append([row[0], row[2]])
+ self.__statistics.locks_used.append([row[0], row[8]])
+ self.__statistics.deadlocks.append([row[0], row[9]])
+ self.__statistics.escalated_locks.append([row[0], row[10]])
+ self.__statistics.lock_wait.append([row[0], row[11]])
+ last_row = row
+
+ self.__statistics.max_sessions = last_row[3]
+ self.__statistics.total_rows = last_row[4]
+ self.__statistics.selects_processed = last_row[5]
+ self.__statistics.locks_per_tx = last_row[6]
+ self.__statistics.max_locks = last_row[7]
+ self.__statistics.workloadsize = self.__workloadsize
+ self.__statistics.timespan = [time.ctime(results[0][0]), time.ctime(last_row[0])]
+ self.__statistics.counts = [len(self.__tables), len(self.__attributes), len(self.__indexes)]
+
+
+ def __recommend(self, object, message, rpl = False):
+
+ debug(" %s" % message)
+
+ """ Check if this is a duplicate recommendation """
+ for rec in self.__recommendations:
+ if id(rec.object) == id(object) and rec.msg == message:
+ """ Count how often we saw this recommendation """
+ rec.recommend()
+ return
+ self.__recommendations.append(Recommendation(object, message, rpl))
+
+
+ def results(self):
+
+ overall = 100 - (100 * (self.__overall_cost[1]+1) / (self.__overall_cost[0]+1))
+ debug("\n Overall win: %d %%" % overall)
+ debug(" old %d - new %d" % (self.__overall_cost[0], self.__overall_cost[1]))
+ avg = 0
+ for win in self.__avg_win:
+ avg += win
+ if len(self.__avg_win) > 0:
+ avg /= len(self.__avg_win)
+ debug(" Average win per statement: %d %%" % avg)
+
+ return Results(self.__statements, self.__recommendations, overall, avg, self.__statistics, self.__time)
+
+
+
+""" Static Messages class """
+class Messages():
+
+ messages={
+ "no_indexes_used":
+ ["This is not using any indexes. You should take a look at its QEP.",
+ "SET QEP; SET OPTIMIZE ONLY; [your query]", []],
+ "drop_index":
+ ["The index %s on %s(%s) is never used in this workload and should therefore be dropped.",
+ "DROP INDEX %(name)s", ["name"]],
+ "high_opf_cost":
+ ["This statement spends %d %% of its time in the optimzer. You should take a look at its QEP.",
+ "SET QEP; SET OPTIMIZE ONLY; [your query]", []],
+ "wrong_estimates":
+ ["The cost estimates of this statement are %d %% off the actual costs - this may caused by missing or outdated statistics.",
+ "", []],
+ "missing_statistics":
+ ["One or more attributes of %s don't have statistics. Run optimizedb to create statistics.",
+ "", []],
+ "duplicate_index":
+ ["Attribute %s(%s) has more than one index. You should remove duplicate indexes.",
+ "", []],
+ "new_index":
+ ["Creating an index on %s(%s) would be beneficial.",
+ "CREATE INDEX %(name)s_idx ON %(table_name)s(%(name)s) WITH STRUCTURE = BTREE", ["name", "table_name"]],
+ "overflow_pages":
+ ["%s has many overflow pages. You should restructurize the table or modify it to B-Tree.",
+ "MODIFY %(name)s TO BTREE", ["name"]],
+ "too_many_indexes":
+ ["In table %s %d of %d attributes have indexes. This is considered to be too much and may decrease performance.",
+ "", []]
+ }
+
+
+
+""" Recommendation class """
+class Recommendation():
+
+ def __init__(self, object, message, rpl):
+ self.object = object
+ self.msg = message
+ self.__rpl = rpl
+ """ How often did we recommend this """
+ self.frequency = 1
+
+ def recommend(self):
+ self.frequency += 1
+
+ def message(self):
+ if self.__rpl:
+ message = Messages().messages[self.msg][0] % self.__rpl
+ else:
+ message = Messages().messages[self.msg][0]
+ return message
+
+ def sql(self):
+ if Messages().messages[self.msg][1] == "":
+ return False
+
+ values = {}
+ for m in Messages().messages[self.msg][2]:
+ values[m] = getattr(self.object, m)
+
+ return Messages().messages[self.msg][1] % values
+
+
+
+
+""" Result class """
+class Results():
+
+ statements = []
+ recommendations = []
+ overall_win = [-1, -1]
+ avg_win = []
+ avg_win = []
+ statistics = False
+
+ def __init__(self, statements, recommendations, overall, avg, statistics, time ):
+ self.statements = statements
+ self.recommendations = recommendations
+ self.overall_win = overall
+ self.avg_win = avg
+ self.statistics = statistics
+ self.time = time
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/Config.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Config.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Config.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,14 @@
+
+""" Config class """
+class Config():
+
+ verbose = False
+ vnode = "(local)"
+ outputfolder = "/tmp/"
+ querylimit = -1
+ timeout = 300000
+ userdb = ""
+
+config = Config()
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/Containers.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Containers.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Containers.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,104 @@
+
+""" Statement class """
+class Statement():
+
+ def __init__(self, values):
+
+ self.query_key = values[0]
+ self.query_text = values[1].strip()
+ self.frequency = values[2]
+ self.opf_cpu = values[3]
+ self.opf_dio = values[4]
+ self.qef_cpu = values[5]
+ self.qef_dio = values[6]
+ self.est_cpu = values[7]
+ self.est_dio = values[8]
+ self.pages_touched = values[9]
+ self.time = values[10]
+ self.newcost = -1
+ self.tables = []
+ self.indexes = []
+
+ def attach_references(self, tables, indexes):
+
+ self.tables = tables
+ self.indexes = indexes
+
+
+""" Table class """
+class Table():
+
+ def __init__(self, values, attributes):
+ self.id = values[0]
+ self.name = values[1].strip()
+ self.frequency = values[2]
+ self.est_cpu = values[3]
+ self.act_cpu = values[4]
+ self.est_dio = values[5]
+ self.act_dio = values[6]
+ self.est_tup = values[7]
+ self.act_tup = values[8]
+ self.structure = values[9]
+ self.data_pages = values[10]
+ self.overflow_pages = values[11]
+ self.attributes = []
+
+ for attribute in attributes:
+ if attribute.table_id == self.id:
+ self.attributes.append(attribute)
+
+
+
+""" Attribute class """
+class Attribute():
+
+ def __init__(self, values, indexes):
+ self.id = values[0]
+ self.name = values[1].strip()
+ self.table_id = values[2]
+ self.frequency = values[3]
+ self.statistics = values[4]
+ self.table_name = values[5].strip()
+ self.indexes = []
+
+ for index in indexes:
+ if index.table_id == self.table_id and index.attribute_id == self.id:
+ self.indexes.append(index)
+
+
+
+""" Index class """
+class Index():
+
+ def __init__(self, values):
+ self.id = values[0]
+ self.name = values[1].strip()
+ self.table_id = values[2]
+ self.attribute_id = values[3]
+ self.frequency = values[4]
+ self.structure = values[5]
+ self.data_pages = values[6]
+ self.overflow_pages = values[7]
+
+
+
+""" Statistics class """
+class Statistics():
+
+ current_connections = []
+ current_sessions = []
+ max_session = 0
+ total_rows = 0
+ selects_processed = 0
+ locks_per_tx = 0
+ max_locks = 0
+ locks_used = []
+ deadlocks = []
+ escalated_locks = []
+ lock_wait = []
+
+ workloadsize = 0
+ timespan = [0, 0]
+ counts = [0, 0, 0]
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/Db.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Db.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Db.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,51 @@
+import ingresdbi
+from iianalyze.functions import *
+
+""" Database Class """
+class DB():
+
+ __link = False
+ __cursor = False
+
+ def __init__(self, config, dbname, ac=False):
+ if ac:
+ auto = "Y"
+ else:
+ auto = "N"
+
+ try:
+ self.__link=ingresdbi.connect(database=dbname, vnode=config.vnode, autocommit=auto)
+ self.__cursor=c=self.__link.cursor()
+ debug("Connected to %s" % dbname)
+ except ingresdbi.DataError:
+ error("Could not connect to %s" % dbname)
+
+ def __del__(self):
+ if self.__link:
+ self.__link.close()
+
+ def execute(self, query, params=False):
+ try:
+ if params:
+ self.__cursor.execute(query, params)
+ else:
+ self.__cursor.execute(query)
+ except ingresdbi.DataError, e:
+ if not e[2] == '50000':
+ raise
+ try:
+ rows = self.__cursor.fetchall()
+ except:
+ rows = []
+ return rows
+
+ def commit(self):
+ self.__link.commit()
+
+ def rollback(self):
+ try:
+ self.__link.rollback()
+ except ingresdbi.DataError:
+ pass
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/Draw.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Draw.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Draw.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,83 @@
+import Gnuplot, Gnuplot.funcutils
+from iianalyze.Config import *
+
+""" Plot class """
+class Plot():
+
+ gp = "test"
+ data = []
+
+ def __init__(self, title, xlabel, ylabel):
+ self.data = []
+ self.gp = Gnuplot.Gnuplot(debug=0)
+ self.gp.xlabel(xlabel)
+ self.gp.ylabel(ylabel)
+ self.gp.title(title)
+ self.gp("set yrange [0:]")
+ self.gp("set terminal png small size 800,600")
+ """self.gp("set terminal postscript eps enhanced color size 17cm,12cm")"""
+ self.gp("set key box width 1 below")
+
+ def draw(self, file):
+ self.gp('set output "%s/%s.png"' % (config.outputfolder, file))
+ ar = []
+ for i in range(len(self.data)):
+ ar.append("self.data[%d]" % i)
+ cmd = "self.gp.plot(%s)" % ", ".join(ar)
+ exec cmd
+
+
+
+""" Curve class """
+class Curve(Plot):
+
+ def __init__(self, title, xlabel, ylabel, time = False):
+ Plot.__init__(self, title, xlabel, ylabel)
+ self.gp("set data style linespoints")
+ if time:
+ self.gp('set xdata time')
+ self.gp('set timefmt "%s"')
+ self.gp('set format x "%m-%d\\n%H:%M"')
+
+ def add_curve(self, rows, title):
+ self.data.append(Gnuplot.Data(rows, using="1:2 with lines lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
+
+ def add_line(self, value, title):
+ self.data.append(Gnuplot.Func("%d" % value, with_="lines lw 2 title '%s'" % title))
+
+ def draw(self, file):
+ Plot.draw(self, file)
+
+
+
+""" Histogram class """
+class Histogram(Plot):
+
+ __legend = ("")
+ rows = []
+
+ def __init__(self, title, xlabel, ylabel, legend):
+ Plot.__init__(self, title, xlabel, ylabel)
+ self.__legend = legend
+ self.gp("set style data histograms")
+ self.gp("set style histogram clustered gap 2")
+ self.gp("set style fill solid 1.0 border -2")
+ self.gp("set grid y")
+ self.gp("set format y '%.0f'")
+ self.gp("set format x '%.0f'")
+
+ def draw(self, file):
+ if len(self.rows) == 0:
+ return
+ for row in self.rows:
+ row.append(0)
+ str = ""
+ for i in range(len(self.rows[0]) - 2):
+ if i == len(self.rows[0]) - 3:
+ str += ", '' u %d" % (i+3)
+ else:
+ str += ", '' u %d title '%s'" % (i+3, self.__legend[i+1])
+ self.data.append(Gnuplot.Data(self.rows, using="2:xtic(1) title '%s'%s" % (self.__legend[0], str)))
+ Plot.draw(self, file)
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/Present.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Present.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Present.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,338 @@
+import random
+from iianalyze.functions import *
+from iianalyze.Config import *
+from iianalyze.Draw import *
+
+""" Present class """
+class Present():
+
+ __results = False
+
+ __javascript = []
+
+ __html = '''
+ <html>
+ <head>
+ <meta http-equiv="cache-control" content="no-cache">
+ <meta http-equiv="pragma" content="no-cache">
+ <meta http-equiv="expires" content="Thu, 01 Jan 1970 12:00:00 GMT">
+ <script type="text/javascript">
+ %s
+ </script>
+ </head>
+ <body>
+ %s
+ </body>
+ </html>
+ '''
+
+ __main = '''
+ <h1>Results for %s</h1>
+ %s
+ <hr />
+ %s
+ <hr />
+ %s
+ '''
+
+ __info = '''
+ <h2>Info:</h2>
+ <b>Started:</b> %s<br />
+ <b>Completed:</b> %s<br />
+ <br />
+ <b>VNode:</b> %s<br />
+ <b>Database:</b> %s<br />
+ <b>Queries Analyzed:</b> %d<br />
+ <br />
+ <b>Overall estimated win:</b> %d %%<br />
+ <b>Average win per statement:</b> %d %%<br />
+
+ <h3>Statistics</h3>
+ (Recorded between %s and %s)<br />
+ <br />
+ There are %d attributes in %d tables with %d indexes in this database.<br />
+ The current workload counts %d distinct statements.<br />
+ Since the DBMS is up %d selects have been executed and returned %d rows in total.<br />
+ At most %d users were connected at the same time. On average %d users are connected.<br />
+ There were at most %d sessions at the same time and %d sessions on average.<br />
+ <br /><br />
+ %s %s
+ '''
+
+ __recommendations = '''
+ <h2>Recommendations:</h2>
+ %s
+ <br /><br />
+ %s
+ '''
+
+ __workload = '''
+ <h2>Workload:</h2>
+ %s
+ '''
+
+ def __init__(self, results):
+ self.__results = results
+
+
+ def start(self):
+
+ debug("\n## Starting to prepare presentation")
+
+ self.__draw_graphs()
+ self.__render()
+ self.__write()
+
+
+ def __draw_graphs(self):
+ costs = []
+ for stmt in self.__results.statements:
+ costs.append([stmt.query_key,
+ stmt.opf_cpu+stmt.opf_dio+stmt.qef_cpu+stmt.qef_dio,
+ stmt.est_cpu+stmt.est_dio,
+ stmt.newcost])
+ """"%d %%" % (100 - (100 * (stmt.newcost+1) / (stmt.est_cpu+stmt.est_dio+1)))])"""
+
+ 'costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+ 'act_costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+ 'new_costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+
+ debug(" Drawing costs histogram")
+ histo1 = Histogram("Top 10 - Cost Comparison", "Query Key", "Cost", ("Actual Costs", "Old Cost Estimates", "New Cost Estimates"))
+ histo1.rows = costs[0:10]
+ histo1.draw("costs")
+
+ debug(" Drawing connection curve")
+ curve1 = Curve("DBMS Connections", "Time", "Connections / Sessions", True)
+ curve1.add_curve(self.__results.statistics.current_connections, "Current Connections")
+ curve1.add_curve(self.__results.statistics.current_sessions, "Current Sessions")
+ curve1.add_line(self.__results.statistics.max_sessions, "Max Sessions")
+ curve1.draw("connections")
+
+ debug(" Drawing locks curve")
+ curve2 = Curve("DBMS Locks", "Time", "Locks", True)
+ curve2.add_curve(self.__results.statistics.locks_used, "Locks Used")
+ curve2.add_curve(self.__results.statistics.deadlocks, "Deadlocks")
+ curve2.add_curve(self.__results.statistics.escalated_locks, "Escalated Locks")
+ curve2.add_curve(self.__results.statistics.lock_wait, "Lock Waits")
+ curve2.add_line(self.__results.statistics.locks_per_tx, "Locks Per Transaction")
+ curve2.add_line(self.__results.statistics.max_locks, "Max Locks")
+ curve2.draw("locks")
+
+
+ def __render(self):
+
+ debug(" Render HTML")
+
+ """ General statistics """
+
+ avg_connections = 0
+ max_connections = 0
+ for i in self.__results.statistics.current_connections:
+ avg_connections += i[1]
+ if i[1] > max_connections:
+ max_connections = i[1]
+ if avg_connections > 0:
+ avg_connections /= len(self.__results.statistics.current_connections)
+
+ avg_sessions = 0
+ max_sessions = 0
+ for i in self.__results.statistics.current_sessions:
+ avg_sessions += i[1]
+ if i[1] > max_sessions:
+ max_sessions = i[1]
+ if avg_sessions > 0:
+ avg_sessions /= len(self.__results.statistics.current_sessions)
+
+ self.__info = self.__info % (
+ self.__results.time[0],
+ self.__results.time[1],
+ config.vnode,
+ config.userdb,
+ len(self.__results.statements),
+ self.__results.overall_win,
+ self.__results.avg_win,
+ self.__results.statistics.timespan[0],
+ self.__results.statistics.timespan[1],
+ self.__results.statistics.counts[1],
+ self.__results.statistics.counts[0],
+ self.__results.statistics.counts[2],
+ self.__results.statistics.workloadsize,
+ self.__results.statistics.selects_processed,
+ self.__results.statistics.total_rows,
+ max_connections,
+ avg_connections,
+ max_sessions,
+ avg_sessions,
+ self.__picture("connections"),
+ self.__picture("locks")
+ )
+
+ self.__recommendations = self.__recommendations % (
+ self.__picture("costs"),
+ self.__get_recommendations()
+ )
+
+ self.__main = self.__main % (
+ config.userdb,
+ self.__info,
+ self.__recommendations,
+ self.__get_workload()
+ )
+
+ jscript = "\n".join(self.__javascript)
+
+ self.__html = self.__html % (
+ jscript,
+ self.__main
+ )
+
+
+ def __get_recommendations(self):
+
+ stm = {}
+ tab = {}
+ atr = {}
+ idx = {}
+ """ Recommendations need to be sorted first """
+ for rec in self.__results.recommendations:
+ if rec.object.__class__.__name__ == "Statement":
+ if not rec.object.query_key in stm:
+ stm[rec.object.query_key] = []
+ stm[rec.object.query_key].append(rec)
+ if rec.object.__class__.__name__ == "Table":
+ if not rec.object.name in tab:
+ tab[rec.object.name] = []
+ tab[rec.object.name].append(rec)
+ if rec.object.__class__.__name__ == "Attribute":
+ if not rec.object.name in atr:
+ atr[rec.object.name] = []
+ atr[rec.object.name].append(rec)
+ if rec.object.__class__.__name__ == "Index":
+ if not rec.object.name in idx:
+ idx[rec.object.name] = []
+ idx[rec.object.name].append(rec)
+
+ recom_str = """
+ <i>(Recommended %d time(s))</i><br />
+ %s %s<br /><br />
+ """
+
+ stm_str = "<h3>Statements:</h3>"
+ for s in stm:
+ stm_str += "<b>Query %s:</b><br /> " % self.__querylink(s)
+ stm[s].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+ for v in stm[s]:
+ stm_str += recom_str % (
+ v.frequency,
+ v.message(),
+ self.__get_sql(v.sql())
+ )
+
+ tab_str = "<h3>Tables:</h3>"
+ for t in tab:
+ tab_str += "<b>Table %s:</b><br /> " % t
+ tab[t].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+ for v in tab[t]:
+ tab_str += recom_str % (
+ v.frequency,
+ v.message(),
+ self.__get_sql(v.sql())
+ )
+
+ atr_str = "<h3>Attributes:</h3>"
+ for a in atr:
+ atr_str += "<b>Attribute %s(%s):</b><br /> " % (atr[a][0].object.table_name, a)
+ atr[a].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+ for v in atr[a]:
+ atr_str += recom_str % (
+ v.frequency,
+ v.message(),
+ self.__get_sql(v.sql())
+ )
+
+ idx_str = "<h3>Indexes:</h3>"
+ for i in idx:
+ idx_str += "<b>Index %s:</b><br /> " % (i)
+ idx[i].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+ for v in idx[i]:
+ idx_str += recom_str % (
+ v.frequency,
+ v.message(),
+ self.__get_sql(v.sql())
+ )
+
+ return "%s %s %s %s" % (stm_str, tab_str, atr_str, idx_str)
+
+
+ def __get_sql(self, sql):
+
+ if not sql:
+ return ""
+
+ str = "To execute this open a terminal monitor to <b>%s::%s</b> and type:<br /><br /><pre>%s \\\\g</pre>" % (
+ config.vnode, config.userdb, sql)
+
+ return self.__createpopup("(Show this as SQL)", str)
+
+
+ def __get_workload(self):
+
+ list = []
+
+ for statement in self.__results.statements:
+ str = '''
+ <h3>Query Key: <a name="%d">%d</a></h3>
+ <i>%s</i><br /><br />
+ <b>Old Estimated Costs:</b> %d | <b>New Estimated Costs:</b> %d | (%d %%)
+ <br /><br />
+ ''' % (
+ statement.query_key,
+ statement.query_key,
+ statement.query_text,
+ statement.est_cpu + statement.est_dio,
+ statement.newcost,
+ (100 - (100 * (statement.newcost+1) / (statement.est_cpu + statement.est_dio+1)))
+ )
+ list.append(str)
+
+ return self.__workload % "\n".join(list)
+
+
+ def __querylink(self, key):
+
+ return '<a href="#%d">%d</a>' % (key, key)
+
+
+ def __createpopup(self, name, content):
+
+ """ For unique names of popup windows """
+ window = random.randint(1,50000)
+
+ js = '''function open_%s() {
+ w_%s = window.open("", "", "");
+ w_%s.document.writeln('%s<br /><br /><a href="#" onclick="window.close()">Close this window</a>');
+ }
+ ''' % (window, window, window, content)
+
+ self.__javascript.append(js)
+
+ return '<a href="#" onclick="open_%s(); return false">%s</a>' % (window, name)
+
+
+ def __picture(self, name):
+
+ thumb = '<img src="%s.png" border="0" height="200" alt="%s" />' % (name, name)
+ img = '<img src="%s.png" border="0" alt="%s" />' % (name, name)
+
+ return self.__createpopup(thumb, img)
+
+
+ def __write(self):
+ debug(" Write HTML to file")
+ file = "%s/index.html" % config.outputfolder
+ f = open(file,'w')
+ f.writelines(self.__html)
+ f.close()
+
+
Added: branches/advisor/src/front/analyzedb/iianalyze/__init__.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/__init__.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/__init__.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1 @@
+""" Nothing """
Added: branches/advisor/src/front/analyzedb/iianalyze/functions.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/functions.py (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/functions.py 2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,16 @@
+import sys
+from iianalyze.Config import *
+
+""" Global functions """
+def output(message):
+ print message
+
+def debug(message):
+ if config.verbose:
+ output(message)
+
+def error(message):
+ output("Error: %s" % message)
+ sys.exit(1)
+
+
More information about the svn-commits
mailing list