[svn-commits] r178 - in branches/advisor/src/front: . analyzedb analyzedb/iianalyze

thial01 at ingres.com thial01 at ingres.com
Thu Aug 7 01:58:05 PDT 2008


Author: thial01
Date: 2008-08-07 01:58:05 -0700 (Thu, 07 Aug 2008)
New Revision: 178

Added:
   branches/advisor/src/front/analyzedb/
   branches/advisor/src/front/analyzedb/analyzedb
   branches/advisor/src/front/analyzedb/iianalyze/
   branches/advisor/src/front/analyzedb/iianalyze/Analyze.py
   branches/advisor/src/front/analyzedb/iianalyze/Config.py
   branches/advisor/src/front/analyzedb/iianalyze/Containers.py
   branches/advisor/src/front/analyzedb/iianalyze/Db.py
   branches/advisor/src/front/analyzedb/iianalyze/Draw.py
   branches/advisor/src/front/analyzedb/iianalyze/Present.py
   branches/advisor/src/front/analyzedb/iianalyze/__init__.py
   branches/advisor/src/front/analyzedb/iianalyze/functions.py
Log:
Adding the analyzer client - see #174


Added: branches/advisor/src/front/analyzedb/analyzedb
===================================================================
--- branches/advisor/src/front/analyzedb/analyzedb	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/analyzedb	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+import sys
+import pprint
+import string
+import os
+import getopt
+try:
+	""" Adding import path """
+	II_SYSTEM = os.path.join(os.getenv("II_SYSTEM"))
+	sys.path.insert(0, (II_SYSTEM, "ingres", "utility", "iianalyze"))
+except:
+	print "II_SYSTEM is not set"
+	sys.exit(1)
+
+from iianalyze.Config import *
+from iianalyze.Analyze import *
+from iianalyze.Present import *
+from iianalyze.functions import *
+
+config.outputfolder = os.path.join(II_SYSTEM, "files", "analyzer")
+
+config.vnode = "vm3"
+config.outputfolder = "./tmp"
+config.querylimit = 10
+
+def usage():
+	output("""
+Usage:
+  analyzedb [options] dbname
+ 
+	dbname		the name of the database
+
+	options:
+		-h, --help		this screen
+
+		-d, --debug		run in verbose mode
+
+		-l n, --limit=n		limits the number of queries 
+					to analyse 
+					(default: -1 = all)
+
+		-t n, --timeout=n	sets the joinop timeout in seconds 
+					(default: 300 seconds)
+
+		-o f, --output=folder	the folder where the result will
+					be placed in 
+					(default: II_SYSTEM/files/analyser)
+
+		-v v, --vnode=vnode	vnode used for the connection
+					(default: (local))
+""")
+
+""" Main function """
+def main():
+	
+	output("Ingres Design Analyzer\n")
+
+	if len(sys.argv) == 0:
+		usage()
+		error("Database name is missing")
+
+	config.userdb = sys.argv[len(sys.argv)-1]
+
+	argv = sys.argv[1:len(sys.argv)-1]
+
+	try:
+		opts, args = getopt.gnu_getopt(argv, "hdl:t:o:v:", ["help", "debug", "limit=", "timeout=", "output=", "vnode="])
+	except:
+		usage()
+		error("Invalid arguments given")
+
+	for opt, arg in opts:
+		if opt in ("-h", "--help"):
+			usage()
+			sys.exit(0)
+		elif opt in ("-d", "--debug"):
+			config.verbose = True
+			debug("Running in verbose mode")
+		elif opt in ("-l", "--limit"):
+			try:
+				config.querylimit = int(arg)
+			except:
+				error("Limit needs a number as value")
+		elif opt in ("-t", "--timeout"):
+			try:
+				config.timeout = int(arg) * 1000
+			except:
+				error("Timeout needs a number as value")
+		elif opt in ("-v", "--vnode"):
+			config.vnode = arg
+
+
+	sys.exit(0)
+
+	analyze = Analyze(config)
+	analyze.start()
+
+	results = analyze.results()
+
+	present = Present(results)
+	present.start()
+
+
+main()
+
+


Property changes on: branches/advisor/src/front/analyzedb/analyzedb
___________________________________________________________________
Name: svn:executable
   + *

Added: branches/advisor/src/front/analyzedb/iianalyze/Analyze.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Analyze.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Analyze.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,503 @@
+import time
+from iianalyze.Db import *
+from iianalyze.functions import *
+from iianalyze.Containers import *
+
+""" Analyze class """
+class Analyze():
+	
+	__recommendations = []
+	__statements = []
+	__indexes = []
+	__attributes = []
+	__tables = []
+	__statistics = False
+	__overall_cost = [-1, -1]
+	__avg_win = []
+	__userdb = False
+	__imadb= False
+	__workloaddb = False
+	__time = [0, 0]
+	__workloadsize = 0
+
+	def __init__(self, config):
+		self.__config = config
+
+	def start(self):
+
+		""" Start time """
+		self.__time[0] = time.ctime()
+		
+		self.__userdb = DB(self.__config, self.__config.userdb)
+		self.__imadb = DB(self.__config, "imadb")
+		self.__workloaddb = DB(self.__config, "workloaddb", True)
+
+		""" Tell the DBMS we're going to test now """
+		self.__userdb.execute("set trace point sc7")
+		
+	        
+		""" Set the joinop timeout to a high value to give OPF enough time to find better plans """
+		self.__userdb.execute("set joinop timeout %d" % config.timeout)
+		
+		debug("## Start Analyzing...\n")
+		
+		self.__load_indexes()
+		self.__load_attributes()
+		self.__load_tables()
+		self.__load_statements()
+
+		for statement in self.__statements:
+			self.__process_statement(statement)
+	
+		""" Get the cost of the new configuration """
+		self.__test_configuration()
+
+		""" Find indexes we may want to drop """
+		debug("  ## Find unused indexes")
+		for index in self.__indexes:
+			if index.frequency == 0:
+				for attr in self.__attributes:
+					if attr.table_id == index.table_id and attr.id == index.attribute_id:
+						self.__recommend(index, "drop_index", (index.name, attr.table_name, attr.name))
+						break
+
+		self.__process_statistics()
+
+		""" End time """
+		self.__time[1] = time.ctime()
+
+
+	def __load_indexes(self):
+
+		""" Get all indexes """
+		results = self.__workloaddb.execute("select \
+			i.index_id, i.name, i.table_id, i.attribute_id, i.frequency, i.structure, i.data_pages, i.overflow_pages \
+			from indexes i where i.database = ?",
+			(self.__config.userdb,))
+
+		for row in results:
+			self.__indexes.append(Index(row))
+
+
+	def __load_attributes(self):
+
+		""" Get all attributes """
+		results = self.__workloaddb.execute("select \
+			a.attribute_id, a.name, a.table_id, a.frequency, a.statistics, t.name \
+			from attributes a, tables t where a.table_id = t.table_id and a.database = t.database and a.database = ?", 
+			(self.__config.userdb,) )
+		
+		for row in results:
+			self.__attributes.append(Attribute(row, self.__indexes))
+
+
+	def __load_tables(self):
+			
+		""" Get all tables """
+		results = self.__workloaddb.execute("select \
+			t.table_id, t.name, t.frequency, t.est_cpu, t.act_cpu, t.est_dio, t.act_dio, \
+			t.est_tup, t.act_tup, t.structure, t.data_pages, t.overflow_pages \
+			from tables t where t.database = ?",
+			(self.__config.userdb,) )
+		
+		for row in results:
+			self.__tables.append(Table(row, self.__attributes))
+
+
+	def __load_statements(self):
+
+
+		""" Get statements - most frequent and expensive ones first """
+		results = self.__workloaddb.execute("select \
+			query_key, query_text, frequency, opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time integer4 \
+			from statements where database = ? order by (frequency * (opf_cpu + opf_dio + qef_cpu + qef_dio)) desc", 
+			(self.__config.userdb,) )
+
+		self.__workloadsize = len(results)
+
+		count = 1 
+		for row in results:
+
+			if self.__config.querylimit != -1 and count > self.__config.querylimit:
+				debug("Limit of queries to process reached - stopping")
+				break
+
+			""" Try to find out if this is a select """
+			if (row[1].lstrip().lower())[0:6] == "select":
+				count += 1 
+				stmt = Statement(row)
+				self.__load_references(stmt)
+				self.__statements.append(stmt)
+
+
+	def __load_references(self, statement):	
+
+		debug("\n\n## Statement %d: %s\n" % (statement.query_key, statement.query_text))
+
+		debug("  ## Tables:")
+
+		""" Get all tables used in this statement """
+		results = self.__workloaddb.execute("select \
+			t.table_id from tables t, references r where t.table_id = r.object_id and r.object_type = 0 \
+			and r.query_key = ? and r.database = ?  and t.database = r.database \
+			order by t.frequency desc",
+			(statement.query_key, self.__config.userdb))
+	
+		table_references = []
+		for row in results:
+
+			""" Find the corresponding table object """
+			for table in self.__tables:
+				if table.id == row[0]:
+					break
+					
+			debug("    %s" % table.name)
+
+			debug("      ## Attributes:")
+
+			""" Get all attributes for this table that were used in this statement """
+			results1 = self.__workloaddb.execute("select \
+				a.attribute_id, a.table_id from attributes a, references r where a.table_id = ? \
+				and a.table_id = r.table_id and a.attribute_id = r.object_id and r.object_type = 1 \
+				and r.query_key = ? and r.database = ? and a.database = r.database \
+				order by a.frequency desc",
+				(table.id, statement.query_key, self.__config.userdb))
+
+			attr_list = []
+			for row1 in results1:
+				for attribute in self.__attributes:
+					""" Find the corresponding attribute object """
+					if attribute.id == row1[0] and attribute.table_id == row1[1]:
+						debug("          %s" % attribute.name)
+						attr_list.append(attribute)
+						break
+			
+			table_references.append([table, attr_list])
+
+		""" Get indexes used in this statement """
+		results = self.__workloaddb.execute("select \
+			i.index_id from indexes i, references r where i.index_id = r.object_id and r.object_type = 2 and r.query_key = ? \
+			and r.database = ? and i.database = r.database and i.frequency > 0 order by i.frequency desc",
+			(statement.query_key, self.__config.userdb))
+
+		debug("  ## Used indexes:")
+		
+		index_references = []
+		for row in results:
+			""" Find the corresponding index object """
+			for index in self.__indexes:
+				if index.id == row[0]:
+					index_references.append(index)
+					debug("    %s" % index.name)
+
+		statement.attach_references(table_references, index_references)
+
+
+	def __process_statement(self, statement):
+
+		test_indexes = []
+
+		debug("  ## Analyzing Query: %d" % statement.query_key)
+
+		if len(statement.indexes) == 0:
+			self.__recommend(statement, "no_indexes_used", ())
+			
+		""" OPF cost more than 50% of QEF cost """
+		pct = 100 * (statement.opf_cpu + statement.opf_dio + 1) / \
+			(statement.opf_cpu + statement.opf_dio + statement.qef_cpu + statement.qef_dio + 1)
+		if pct > 50:
+			self.__recommend(statement, "high_opf_cost", (pct))
+				
+		""" Estimates wrong - no or outdated statistics? """
+		pct = 100 - (100 * (statement.est_cpu + statement.est_dio + 1) / \
+			(statement.opf_cpu + statement.opf_dio + statement.qef_cpu + statement.qef_dio + 1))
+		if pct > 20 or pct < -20:
+			self.__recommend(statement, "wrong_estimates", (pct))
+				
+		for i in statement.tables:
+			table = i[0]
+			need_statistics = False
+
+			""" More than 10% overflow pages """
+			if table.overflow_pages > table.data_pages * 10 / 100:
+				self.__recommend(table, "overflow_pages", (table.name))
+	
+
+			for attribute in table.attributes:
+				
+				""" No statistics """
+				if attribute.statistics == 0:
+					need_statistics = True
+				
+				""" More than one index on one attribute """
+				if len(attribute.indexes) > 1:
+					self.__recommend(attribute, "duplicate_index", (table.name, attribute.name))
+		
+			if need_statistics:
+				self.__recommend(table, "missing_statistics", (table.name))
+	
+
+			""" Indexes on half of all attributes """
+			max_idx_count = len(table.attributes) / 2
+			if max_idx_count < 5:
+				max_idx_count = 5
+			idx_count = 0
+			""" Count the existing indexes on this table """
+			for attribute in table.attributes:
+				if len(attribute.indexes) > 0:
+					idx_count += 1
+
+			if idx_count > max_idx_count:
+				self.__recommend(table, "too_many_indexes", (table.name, idx_count, len(table.attributes)))
+			
+			""" Recommend new indexes on the used attributes """
+			for attribute in i[1]:
+				if idx_count < max_idx_count:
+					if len(attribute.indexes) == 0:
+						found = False
+						for attr in test_indexes:
+							if id(attr) == id(attribute):
+								""" We recommended this before """
+								found = True
+						if not found:
+							idx_count += 1
+							test_indexes.append(attribute)
+
+
+		""" We collected index recommendations - let's test them """
+		if len(test_indexes):
+			debug("      Start testing index recommendations")
+
+			i = 0
+			best_co = [-1, []]
+			for attribute in test_indexes:
+				i += 1
+				idx_name = "virt_idx__%d" % i
+				debug("        Creating virtual index %d on %s(%s)" % (i, attribute.table_name, attribute.name) )
+				try:
+					self.__userdb.execute("create virtual index %s on %s(%s) with structure = btree" % (idx_name, attribute.table_name, attribute.name))
+				except:
+					debug("        Error while creating virtual index - removed from list!")
+					test_indexes[i-1] = False
+
+			debug("        Runnging query")
+			rows = self.__userdb.execute(statement.query_text)
+			debug("        Evaluating query cost")
+			result = self.__imadb.execute("select cpu, dio, pages_touched, vindexes from ima_scm_analyze")
+			cpu = result[0][0]
+			dio = result[0][1]
+			newcost = cpu + dio
+			oldcost = statement.est_cpu + statement.est_dio
+			debug("          old %d - new %d" % (oldcost, newcost))
+			""" Better plan found """
+			if newcost < oldcost:
+				debug("        New plan is better")
+				best_co[0] = newcost
+				""" Split strings of used virtual indexes into list """
+				best_co[1] = result[0][3].split(" ")
+			
+			self.__userdb.rollback()
+
+			if best_co[0] > 0:
+				for idx in best_co[1]:	
+					if len(idx):
+						attribute = test_indexes[(int(idx[10:])-1)]
+						if attribute:
+							self.__recommend(attribute, "new_index", (attribute.table_name, attribute.name))
+
+
+	def __test_configuration(self):
+		debug("## Testing new configuration")
+		
+		""" Reset joinop timeout to default """
+		self.__userdb.execute("set joinop timeout")
+
+		costs = []
+		i = 0
+		""" Get all recommended indexes and create them """
+		for rec in self.__recommendations:
+			if rec.msg == "new_index":
+				attr = rec.object
+				i += 1
+				idx_name = "virt_idx__%d" % i
+				debug("  Creating virtual index %d on %s(%s)" % (i, attr.table_name, attr.name) )
+				self.__userdb.execute("create virtual index %s on %s(%s) with structure = btree" % (idx_name, attr.table_name, attr.name))
+		
+		""" Re-run all queries to get their new cost with the final index set """
+		debug("  ## Running workload")
+		for statement in self.__statements:
+			debug("    Query: %d" % statement.query_key)
+			try:
+				self.__userdb.execute(statement.query_text)
+			except:
+				debug("      Error running query - skipped")
+				continue
+			result = self.__imadb.execute("select cpu, dio, pages_touched, vindexes from ima_scm_analyze")
+			cpu = result[0][0]
+			dio = result[0][1]
+			newcost = cpu + dio
+			statement.newcost = newcost
+			oldcost = statement.est_cpu + statement.est_dio
+			win = 100 - (100 * (newcost+1) / (oldcost+1))
+			self.__avg_win.append(win)
+			debug("      %d %%   old %d   new %d" % (win, oldcost, newcost))
+			costs.append([statement.query_key, oldcost, newcost])
+
+		self.__userdb.rollback()
+
+		oldcost = 0
+		newcost = 0
+		for cost in costs:
+			oldcost += cost[1]
+			newcost += cost[2]
+
+		self.__overall_cost[0] = oldcost
+		self.__overall_cost[1] = newcost
+
+
+	def __process_statistics(self):
+
+		self.__statistics = Statistics()
+		
+		""" Get statistical data """
+		results = self.__workloaddb.execute("select \
+			time, current_connections, current_sessions, max_sessions, total_rows, selects_processed, \
+			locks_per_tx, max_locks, locks_used, deadlocks, escalated_locks, lock_wait \
+			from statistics order by time")
+
+		last_row = False
+		for row in results:
+			self.__statistics.current_connections.append([row[0], row[1]])
+			self.__statistics.current_sessions.append([row[0], row[2]])
+			self.__statistics.locks_used.append([row[0], row[8]])
+			self.__statistics.deadlocks.append([row[0], row[9]])
+			self.__statistics.escalated_locks.append([row[0], row[10]])
+			self.__statistics.lock_wait.append([row[0], row[11]])
+			last_row = row
+
+		self.__statistics.max_sessions = last_row[3]
+		self.__statistics.total_rows = last_row[4]
+		self.__statistics.selects_processed = last_row[5]
+		self.__statistics.locks_per_tx = last_row[6]
+		self.__statistics.max_locks = last_row[7]
+		self.__statistics.workloadsize = self.__workloadsize
+		self.__statistics.timespan = [time.ctime(results[0][0]), time.ctime(last_row[0])]
+		self.__statistics.counts = [len(self.__tables), len(self.__attributes), len(self.__indexes)]
+
+
+	def __recommend(self, object, message, rpl = False):
+
+		debug("        %s" % message)
+
+		""" Check if this is a duplicate recommendation """
+		for rec in self.__recommendations:
+			if id(rec.object) == id(object) and rec.msg == message:
+				""" Count how often we saw this recommendation """
+				rec.recommend()
+				return
+		self.__recommendations.append(Recommendation(object, message, rpl))
+
+
+	def results(self):
+
+		overall = 100 - (100 * (self.__overall_cost[1]+1) / (self.__overall_cost[0]+1))
+		debug("\n  Overall win: %d %%" % overall)
+		debug("    old %d  -  new %d" % (self.__overall_cost[0], self.__overall_cost[1]))
+		avg = 0
+		for win in self.__avg_win:
+			avg += win
+		if len(self.__avg_win) > 0:
+			avg /= len(self.__avg_win)
+		debug("  Average win per statement: %d %%" % avg)
+
+		return Results(self.__statements, self.__recommendations, overall, avg, self.__statistics, self.__time)
+
+
+
+""" Static Messages class """
+class Messages():
+
+	messages={
+		"no_indexes_used":
+			["This is not using any indexes. You should take a look at its QEP.",
+			"SET QEP; SET OPTIMIZE ONLY; [your query]", []],
+		"drop_index":
+			["The index %s on %s(%s) is never used in this workload and should therefore be dropped.",
+			"DROP INDEX %(name)s", ["name"]],
+		"high_opf_cost":
+			["This statement spends %d %% of its time in the optimzer. You should take a look at its QEP.",
+			"SET QEP; SET OPTIMIZE ONLY; [your query]", []],
+		"wrong_estimates":
+			["The cost estimates of this statement are %d %% off the actual costs - this may caused by missing or outdated statistics.",
+			"", []],
+		"missing_statistics":
+			["One or more attributes of %s don't have statistics. Run optimizedb to create statistics.",
+			"", []],
+		"duplicate_index":
+			["Attribute %s(%s) has more than one index. You should remove duplicate indexes.",
+			"", []],
+		"new_index":
+			["Creating an index on %s(%s) would be beneficial.",
+			"CREATE INDEX %(name)s_idx ON %(table_name)s(%(name)s) WITH STRUCTURE = BTREE", ["name", "table_name"]],
+		"overflow_pages":
+			["%s has many overflow pages. You should restructurize the table or modify it to B-Tree.",
+			"MODIFY %(name)s TO BTREE", ["name"]],
+		"too_many_indexes":
+			["In table %s %d of %d attributes have indexes. This is considered to be too much and may decrease performance.",
+			"", []]
+		}
+
+
+
+""" Recommendation class """
+class Recommendation():
+
+	def __init__(self, object, message, rpl):
+		self.object = object
+		self.msg = message
+		self.__rpl = rpl
+		""" How often did we recommend this """
+		self.frequency = 1
+
+	def recommend(self):
+		self.frequency += 1
+
+	def message(self):
+		if self.__rpl:
+			message = Messages().messages[self.msg][0] % self.__rpl
+		else:	
+			message = Messages().messages[self.msg][0]
+		return message
+
+	def sql(self):
+		if Messages().messages[self.msg][1] == "":
+			return False
+		
+		values = {}
+		for m in Messages().messages[self.msg][2]:
+			values[m] = getattr(self.object, m) 
+		
+		return Messages().messages[self.msg][1] % values
+
+
+
+
+""" Result class """
+class Results():
+
+	statements = []
+	recommendations = []
+	overall_win = [-1, -1]
+	avg_win = []
+	avg_win = []
+	statistics = False
+
+	def __init__(self, statements, recommendations, overall, avg, statistics, time ):
+		self.statements = statements
+		self.recommendations = recommendations
+		self.overall_win = overall
+		self.avg_win = avg
+		self.statistics = statistics
+		self.time = time
+
+

Added: branches/advisor/src/front/analyzedb/iianalyze/Config.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Config.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Config.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,14 @@
+
+""" Config class """
+class Config():
+
+	verbose = False
+	vnode = "(local)"
+	outputfolder = "/tmp/"
+	querylimit = -1
+	timeout = 300000
+	userdb = ""
+
+config = Config()
+
+

Added: branches/advisor/src/front/analyzedb/iianalyze/Containers.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Containers.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Containers.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,104 @@
+
+""" Statement class """
+class Statement():
+
+	def __init__(self, values):
+
+		self.query_key = values[0]
+		self.query_text = values[1].strip()
+		self.frequency = values[2]
+		self.opf_cpu = values[3]
+		self.opf_dio = values[4]
+		self.qef_cpu = values[5]
+		self.qef_dio = values[6]
+		self.est_cpu = values[7]
+		self.est_dio = values[8]
+		self.pages_touched = values[9]
+		self.time = values[10]
+		self.newcost = -1
+		self.tables = []
+		self.indexes = []
+	
+	def attach_references(self, tables, indexes):
+		
+		self.tables = tables
+		self.indexes = indexes
+
+
+""" Table class """
+class Table():
+
+	def __init__(self, values, attributes):
+		self.id = values[0]
+		self.name = values[1].strip()
+		self.frequency = values[2]
+		self.est_cpu = values[3]
+		self.act_cpu = values[4]
+		self.est_dio = values[5]
+		self.act_dio = values[6]
+		self.est_tup = values[7]
+		self.act_tup = values[8]
+		self.structure = values[9]
+		self.data_pages = values[10]
+		self.overflow_pages = values[11]
+		self.attributes = []
+		
+		for attribute in attributes:
+			if attribute.table_id == self.id:			
+				self.attributes.append(attribute)
+
+
+
+""" Attribute class """
+class Attribute():
+	
+	def __init__(self, values, indexes):
+		self.id = values[0]
+		self.name = values[1].strip()
+		self.table_id = values[2]
+		self.frequency = values[3]
+		self.statistics = values[4]
+		self.table_name = values[5].strip()
+		self.indexes = []
+		
+		for index in indexes:
+			if index.table_id == self.table_id and index.attribute_id == self.id:
+				self.indexes.append(index)
+
+
+
+""" Index class """
+class Index():
+
+	def __init__(self, values):
+		self.id = values[0]
+		self.name = values[1].strip()
+		self.table_id = values[2]
+		self.attribute_id = values[3]
+		self.frequency = values[4]
+		self.structure = values[5]
+		self.data_pages = values[6]
+		self.overflow_pages = values[7]
+
+
+
+""" Statistics class """
+class Statistics():
+
+	current_connections = []
+	current_sessions = []
+	max_session = 0
+	total_rows = 0
+	selects_processed = 0
+	locks_per_tx = 0
+	max_locks = 0
+	locks_used = []
+	deadlocks = []
+	escalated_locks = []
+	lock_wait = []
+
+	workloadsize = 0
+	timespan = [0, 0]
+	counts = [0, 0, 0]
+
+

Added: branches/advisor/src/front/analyzedb/iianalyze/Db.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Db.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Db.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,51 @@
+import ingresdbi
+from iianalyze.functions import *
+
+""" Database Class """
+class DB():
+	
+	__link = False
+	__cursor = False
+
+	def __init__(self, config, dbname, ac=False):
+		if ac:
+			auto = "Y"
+		else:
+			auto = "N"
+
+		try:
+			self.__link=ingresdbi.connect(database=dbname, vnode=config.vnode, autocommit=auto)
+			self.__cursor=c=self.__link.cursor()
+			debug("Connected to %s" % dbname)
+		except ingresdbi.DataError:	
+			error("Could not connect to %s" % dbname)
+
+	def __del__(self):
+		if self.__link:
+			self.__link.close()
+
+	def execute(self, query, params=False):
+		try:
+			if params:
+				self.__cursor.execute(query, params)
+			else:
+				self.__cursor.execute(query)
+		except ingresdbi.DataError, e:
+			if not e[2] == '50000':
+				raise
+		try:
+			rows = self.__cursor.fetchall()
+		except:
+			rows = []
+		return rows
+
+	def commit(self):
+		self.__link.commit()
+
+	def rollback(self):
+		try:
+			self.__link.rollback()
+		except ingresdbi.DataError:
+			pass
+
+

Added: branches/advisor/src/front/analyzedb/iianalyze/Draw.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Draw.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Draw.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,83 @@
+import Gnuplot, Gnuplot.funcutils
+from iianalyze.Config import *
+
+""" Plot class """
+class Plot():
+
+	gp = "test"
+	data = []
+
+	def __init__(self, title, xlabel, ylabel):
+		self.data = []
+		self.gp = Gnuplot.Gnuplot(debug=0)		
+		self.gp.xlabel(xlabel)
+		self.gp.ylabel(ylabel)
+		self.gp.title(title)
+		self.gp("set yrange [0:]")
+		self.gp("set terminal png small size 800,600")
+		"""self.gp("set terminal postscript eps enhanced color size 17cm,12cm")"""
+		self.gp("set key box width 1 below")
+	
+	def draw(self, file):
+		self.gp('set output "%s/%s.png"' % (config.outputfolder, file))
+		ar = []
+		for i in range(len(self.data)):
+			ar.append("self.data[%d]" % i)
+		cmd = "self.gp.plot(%s)" % ", ".join(ar)
+		exec cmd
+
+
+
+""" Curve class """
+class Curve(Plot):
+
+	def __init__(self, title, xlabel, ylabel, time = False):
+		Plot.__init__(self, title, xlabel, ylabel)
+		self.gp("set data style linespoints")
+		if time:
+			self.gp('set xdata time')
+			self.gp('set timefmt "%s"')
+			self.gp('set format x "%m-%d\\n%H:%M"')
+	
+	def add_curve(self, rows, title):
+		self.data.append(Gnuplot.Data(rows, using="1:2 with lines lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
+		
+	def add_line(self, value, title):
+		self.data.append(Gnuplot.Func("%d" % value, with_="lines lw 2 title '%s'" % title))
+
+	def draw(self, file):
+		Plot.draw(self, file)
+
+
+
+""" Histogram class """
+class Histogram(Plot):
+
+	__legend = ("")
+	rows = []
+
+	def __init__(self, title, xlabel, ylabel, legend):
+		Plot.__init__(self, title, xlabel, ylabel)
+		self.__legend = legend
+		self.gp("set style data histograms")
+		self.gp("set style histogram clustered gap 2")
+		self.gp("set style fill solid 1.0 border -2")
+		self.gp("set grid y")
+		self.gp("set format y '%.0f'")
+		self.gp("set format x '%.0f'")
+		
+	def draw(self, file):
+		if len(self.rows) == 0:
+			return
+                for row in self.rows:
+                        row.append(0)
+                str = ""
+                for i in range(len(self.rows[0]) - 2):
+                        if i == len(self.rows[0]) - 3:
+                                str += ", '' u %d" % (i+3)
+                        else:
+                                str += ", '' u %d title '%s'" % (i+3, self.__legend[i+1])
+                self.data.append(Gnuplot.Data(self.rows, using="2:xtic(1) title '%s'%s" % (self.__legend[0], str)))
+                Plot.draw(self, file)	
+	
+

Added: branches/advisor/src/front/analyzedb/iianalyze/Present.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/Present.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/Present.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,338 @@
+import random
+from iianalyze.functions import *
+from iianalyze.Config import *
+from iianalyze.Draw import *
+
+""" Present class """
+class Present():
+
+	__results = False
+
+	__javascript = []
+
+	__html = '''
+	<html>
+	<head>
+	<meta http-equiv="cache-control" content="no-cache">
+	<meta http-equiv="pragma" content="no-cache">
+	<meta http-equiv="expires" content="Thu, 01 Jan 1970 12:00:00 GMT">
+	<script type="text/javascript">
+	%s
+	</script>
+	</head>
+	<body>
+	%s
+	</body>
+	</html>
+	'''
+
+	__main = '''
+	<h1>Results for %s</h1>
+	%s
+	<hr />
+	%s
+	<hr />
+	%s
+	'''
+
+	__info = '''
+	<h2>Info:</h2>
+	<b>Started:</b> %s<br />
+	<b>Completed:</b> %s<br />
+	<br />
+	<b>VNode:</b> %s<br />
+	<b>Database:</b> %s<br />
+	<b>Queries Analyzed:</b> %d<br />
+	<br />
+	<b>Overall estimated win:</b> %d %%<br />
+	<b>Average win per statement:</b> %d %%<br />
+
+	<h3>Statistics</h3>
+	(Recorded between %s and %s)<br />
+	<br />
+	There are %d attributes in %d tables with %d indexes in this database.<br />
+	The current workload counts %d distinct statements.<br />
+	Since the DBMS is up %d selects have been executed and returned %d rows in total.<br />
+	At most %d users were connected at the same time. On average %d users are connected.<br />
+	There were at most %d sessions at the same time and %d sessions on average.<br />
+	<br /><br />
+	%s &nbsp;&nbsp; %s
+	'''	
+
+	__recommendations = '''
+	<h2>Recommendations:</h2>
+	%s
+	<br /><br />
+	%s
+	'''
+
+	__workload = '''
+	<h2>Workload:</h2>
+	%s
+	'''
+
+	def __init__(self, results):
+		self.__results = results
+
+
+	def start(self):
+		
+		debug("\n## Starting to prepare presentation")
+		
+		self.__draw_graphs()
+		self.__render()		
+		self.__write()		
+
+
+	def __draw_graphs(self):
+		costs = []
+		for stmt in self.__results.statements:
+			costs.append([stmt.query_key, 
+				stmt.opf_cpu+stmt.opf_dio+stmt.qef_cpu+stmt.qef_dio,
+				stmt.est_cpu+stmt.est_dio, 
+				stmt.newcost])
+			""""%d %%" % (100 - (100 * (stmt.newcost+1) / (stmt.est_cpu+stmt.est_dio+1)))])"""
+
+		'costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+		'act_costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+		'new_costs.sort(lambda x, y: cmp(x[1], y[1]), reverse=True)'
+
+		debug("  Drawing costs histogram")
+		histo1 = Histogram("Top 10 - Cost Comparison", "Query Key", "Cost", ("Actual Costs", "Old Cost Estimates", "New Cost Estimates"))
+		histo1.rows = costs[0:10]
+		histo1.draw("costs")
+		
+		debug("  Drawing connection curve")
+		curve1 = Curve("DBMS Connections", "Time", "Connections / Sessions", True)
+		curve1.add_curve(self.__results.statistics.current_connections, "Current Connections")
+		curve1.add_curve(self.__results.statistics.current_sessions, "Current Sessions")
+		curve1.add_line(self.__results.statistics.max_sessions, "Max Sessions")
+		curve1.draw("connections")
+
+		debug("  Drawing locks curve")
+		curve2 = Curve("DBMS Locks", "Time", "Locks", True)
+		curve2.add_curve(self.__results.statistics.locks_used, "Locks Used")
+		curve2.add_curve(self.__results.statistics.deadlocks, "Deadlocks")
+		curve2.add_curve(self.__results.statistics.escalated_locks, "Escalated Locks")
+		curve2.add_curve(self.__results.statistics.lock_wait, "Lock Waits")
+		curve2.add_line(self.__results.statistics.locks_per_tx, "Locks Per Transaction")
+		curve2.add_line(self.__results.statistics.max_locks, "Max Locks")
+		curve2.draw("locks")
+
+
+	def __render(self):
+		
+		debug("  Render HTML")
+
+		""" General statistics """
+	
+		avg_connections = 0
+		max_connections = 0
+		for i in self.__results.statistics.current_connections:
+			avg_connections += i[1]
+			if i[1] > max_connections:
+				max_connections = i[1]
+		if avg_connections > 0:
+			avg_connections /= len(self.__results.statistics.current_connections)
+	
+		avg_sessions = 0
+		max_sessions = 0
+		for i in self.__results.statistics.current_sessions:
+			avg_sessions += i[1]
+			if i[1] > max_sessions:
+				max_sessions = i[1]
+		if avg_sessions > 0:
+			avg_sessions /= len(self.__results.statistics.current_sessions)
+	
+		self.__info = self.__info % (
+			self.__results.time[0],
+			self.__results.time[1],
+			config.vnode,
+			config.userdb,
+			len(self.__results.statements),
+			self.__results.overall_win,
+			self.__results.avg_win,
+			self.__results.statistics.timespan[0],
+			self.__results.statistics.timespan[1],
+			self.__results.statistics.counts[1],
+			self.__results.statistics.counts[0],
+			self.__results.statistics.counts[2],
+			self.__results.statistics.workloadsize,
+			self.__results.statistics.selects_processed,
+			self.__results.statistics.total_rows,
+			max_connections,
+			avg_connections,
+			max_sessions,
+			avg_sessions,
+			self.__picture("connections"),
+			self.__picture("locks")
+			)
+
+		self.__recommendations = self.__recommendations % (
+			self.__picture("costs"),
+			self.__get_recommendations()
+			)
+
+		self.__main = self.__main % (
+			config.userdb,
+			self.__info,
+			self.__recommendations,
+			self.__get_workload()
+			)
+		
+		jscript = "\n".join(self.__javascript)
+
+		self.__html = self.__html % (
+			jscript,
+			self.__main
+			)
+
+
+	def __get_recommendations(self):
+		
+		stm = {}
+		tab = {} 
+		atr = {}
+		idx = {}
+		""" Recommendations need to be sorted first """
+		for rec in self.__results.recommendations:
+			if rec.object.__class__.__name__ == "Statement":
+				if not rec.object.query_key in stm:
+					stm[rec.object.query_key] = []
+				stm[rec.object.query_key].append(rec)
+			if rec.object.__class__.__name__ == "Table":
+				if not rec.object.name in tab:
+					tab[rec.object.name] = []
+				tab[rec.object.name].append(rec)
+			if rec.object.__class__.__name__ == "Attribute":
+				if not rec.object.name in atr:
+					atr[rec.object.name] = []
+				atr[rec.object.name].append(rec)
+			if rec.object.__class__.__name__ == "Index":
+				if not rec.object.name in idx:
+					idx[rec.object.name] = []
+				idx[rec.object.name].append(rec)
+
+		recom_str = """
+			<i>(Recommended %d time(s))</i><br />
+			%s %s<br /><br />		
+			"""
+
+		stm_str = "<h3>Statements:</h3>"
+		for s in stm:
+			stm_str += "<b>Query %s:</b><br /> " % self.__querylink(s)
+			stm[s].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+			for v in stm[s]:
+				stm_str += recom_str % (
+					v.frequency,
+					v.message(),
+					self.__get_sql(v.sql())
+					)
+				
+		tab_str = "<h3>Tables:</h3>"
+		for t in tab:
+			tab_str += "<b>Table %s:</b><br /> " % t
+			tab[t].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+			for v in tab[t]:
+				tab_str += recom_str % (
+					v.frequency,
+					v.message(),
+					self.__get_sql(v.sql())
+					)
+				
+		atr_str = "<h3>Attributes:</h3>"
+		for a in atr:
+			atr_str += "<b>Attribute %s(%s):</b><br /> " % (atr[a][0].object.table_name, a)
+			atr[a].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+			for v in atr[a]:
+				atr_str += recom_str % (
+					v.frequency,
+					v.message(),
+					self.__get_sql(v.sql())
+					)	
+				
+		idx_str = "<h3>Indexes:</h3>"
+		for i in idx:
+			idx_str += "<b>Index %s:</b><br /> " % (i)
+			idx[i].sort(lambda x, y: cmp(x.frequency, y.frequency), reverse=True)
+			for v in idx[i]:
+				idx_str += recom_str % (
+					v.frequency,
+					v.message(),
+					self.__get_sql(v.sql())
+					)
+				
+		return "%s %s %s %s" % (stm_str, tab_str, atr_str, idx_str)
+
+	
+	def __get_sql(self, sql):
+
+		if not sql:
+			return ""
+
+		str = "To execute this open a terminal monitor to <b>%s::%s</b> and type:<br /><br /><pre>%s \\\\g</pre>" % (
+			config.vnode, config.userdb, sql)
+
+		return self.__createpopup("(Show this as SQL)", str)
+
+
+	def __get_workload(self):
+
+		list = []
+		
+		for statement in self.__results.statements:
+			str = '''
+			<h3>Query Key: <a name="%d">%d</a></h3>
+			<i>%s</i><br /><br />
+			<b>Old Estimated Costs:</b> %d | <b>New Estimated Costs:</b> %d | (%d %%)
+			<br /><br />
+			''' % (
+				statement.query_key, 
+				statement.query_key, 
+				statement.query_text, 
+				statement.est_cpu + statement.est_dio, 
+				statement.newcost,
+				(100 - (100 * (statement.newcost+1) / (statement.est_cpu + statement.est_dio+1)))
+				)
+			list.append(str)
+
+		return self.__workload % "\n".join(list)
+
+
+	def __querylink(self, key):
+		
+		return '<a href="#%d">%d</a>' % (key, key)
+
+
+	def __createpopup(self, name, content):
+		
+		""" For unique names of popup windows """
+		window = random.randint(1,50000)
+
+		js = '''function open_%s() {
+	w_%s = window.open("", "", "");
+	w_%s.document.writeln('%s<br /><br /><a href="#" onclick="window.close()">Close this window</a>');
+	}
+		''' % (window, window, window, content)
+		
+		self.__javascript.append(js)
+
+		return '<a href="#" onclick="open_%s(); return false">%s</a>' % (window, name)
+
+
+	def __picture(self, name):
+		
+		thumb = '<img src="%s.png" border="0" height="200" alt="%s" />' % (name, name)
+		img = '<img src="%s.png" border="0" alt="%s" />' % (name, name)
+
+		return self.__createpopup(thumb, img)
+
+
+	def __write(self):
+		debug("  Write HTML to file")
+		file = "%s/index.html" % config.outputfolder
+		f = open(file,'w')
+		f.writelines(self.__html)
+		f.close()
+
+

Added: branches/advisor/src/front/analyzedb/iianalyze/__init__.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/__init__.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/__init__.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1 @@
+""" Nothing """

Added: branches/advisor/src/front/analyzedb/iianalyze/functions.py
===================================================================
--- branches/advisor/src/front/analyzedb/iianalyze/functions.py	                        (rev 0)
+++ branches/advisor/src/front/analyzedb/iianalyze/functions.py	2008-08-07 08:58:05 UTC (rev 178)
@@ -0,0 +1,16 @@
+import sys
+from iianalyze.Config import *
+
+""" Global functions """
+def output(message):
+	print message
+
+def debug(message):
+	if config.verbose:
+		output(message)		
+
+def error(message):	
+	output("Error: %s" % message)
+	sys.exit(1)
+
+




More information about the svn-commits mailing list