[svn-commits] r190 - branches/advisor/src/front/misc/analyzedb

thial01 at ingres.com thial01 at ingres.com
Wed Aug 13 07:52:22 PDT 2008


Author: thial01
Date: 2008-08-13 07:52:22 -0700 (Wed, 13 Aug 2008)
New Revision: 190

Modified:
   branches/advisor/src/front/misc/analyzedb/Analyze.py
   branches/advisor/src/front/misc/analyzedb/Containers.py
   branches/advisor/src/front/misc/analyzedb/Draw.py
   branches/advisor/src/front/misc/analyzedb/Present.py
Log:
Some fixes for the analyzer - see #174


Modified: branches/advisor/src/front/misc/analyzedb/Analyze.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Analyze.py	2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Analyze.py	2008-08-13 14:52:22 UTC (rev 190)
@@ -112,7 +112,8 @@
 		""" Get workload history data """
 		results = self.__workloaddb.execute("select \
 			query_key, opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time, wctime \
-			from workload where database = ?", 
+			from workload where database = ? \
+			order by time", 
 			(self.__config.userdb,) )
 
 		for row in results:
@@ -121,39 +122,37 @@
 
 	def __load_statements(self):
 
-		""" Get statements """
+		result = self.__workloaddb.execute("select count(query_key) from statements");
+		self.__workloadsize = result[0][0]
+		
+		""" 
+		Get statements with big join 
+		This returns a join between statements and workload
+		with the most expensive statements first
+		"""
 		results = self.__workloaddb.execute("select \
-			query_key, query_text, frequency \
-			from statements where database = ?", 
-			(self.__config.userdb,) )
+			s.query_key, query_text, frequency, \
+			opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, s.time, wctime \
+			from (select query_key, max(time) as time from workload where database = ? group by query_key) as t \
+			join workload w on t.query_key = w.query_key and t.time = w.time \
+			join statements s on s.query_key = w.query_key \
+			where s.database = ? and w.database = s.database \
+			order by frequency * (opf_cpu+opf_dio+qef_cpu+qef_dio) desc", 
+			(self.__config.userdb, self.__config.userdb) )
 
-		self.__workloadsize = len(results)
-
+		count = 1
 		for row in results:
+			if self.__config.querylimit != -1 and count > self.__config.querylimit:
+				debug("Limit of queries to process reached - stopping")
+				break
 			""" Try to find out if this is a select """
 			if (row[1].lstrip().lower())[0:6] == "select":
-				row1 = self.__workloaddb.execute("select first 1 \
-				opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time, wctime \
-				from workload where query_key = ? order by time desc",
-				(row[0],))
-				if len(row) > 0:
-					self.__statements.append(Statement(row, row1[0]))
+				stmt = Statement(row)
+				self.__load_references(stmt)
+				self.__statements.append(stmt)
+				count += 1
 		
-		""" Sort statements - most frequent and expensive ones first """
-		self.__statements.sort(lambda x, y: cmp(
-				x.frequency * (x.opf_cpu+x.opf_dio+x.qef_cpu+x.qef_dio), 
-				y.frequency * (y.opf_cpu+y.opf_dio+y.qef_cpu+y.qef_dio)), 
-				reverse=True)
 
-		if self.__config.querylimit != -1:
-			""" Only keep the first n statements """
-			del self.__statements[self.__config.querylimit:]
-		
-		""" Load references for the rest """
-		for stmt in self.__statements:
-			self.__load_references(stmt)
-
-
 	def __load_references(self, statement):	
 
 		debug("\n\n## Statement %d: %s\n" % (statement.query_key, statement.query_text))

Modified: branches/advisor/src/front/misc/analyzedb/Containers.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Containers.py	2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Containers.py	2008-08-13 14:52:22 UTC (rev 190)
@@ -2,20 +2,20 @@
 """ Statement class """
 class Statement():
 
-	def __init__(self, values, values1):
+	def __init__(self, values):
 
 		self.query_key = values[0]
 		self.query_text = values[1].strip()
 		self.frequency = values[2]
-		self.opf_cpu = values1[0]
-		self.opf_dio = values1[1]
-		self.qef_cpu = values1[2]
-		self.qef_dio = values1[3]
-		self.est_cpu = values1[4]
-		self.est_dio = values1[5]
-		self.pages_touched = values1[6]
-		self.time = values1[7]
-		self.wctime = values1[8]
+		self.opf_cpu = values[3]
+		self.opf_dio = values[4]
+		self.qef_cpu = values[5]
+		self.qef_dio = values[6]
+		self.est_cpu = values[7]
+		self.est_dio = values[8]
+		self.pages_touched = values[9]
+		self.time = values[10]
+		self.wctime = values[11]
 		self.newcost = -1
 		self.tables = []
 		self.indexes = []

Modified: branches/advisor/src/front/misc/analyzedb/Draw.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Draw.py	2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Draw.py	2008-08-13 14:52:22 UTC (rev 190)
@@ -15,7 +15,8 @@
 		self.gp.xlabel(xlabel)
 		self.gp.ylabel(ylabel)
 		self.gp.title(title)
-		self.gp("set yrange [0:]")
+		"""self.gp("set yrange [1:]")"""
+		self.gp("set logscale y")
 		self.gp("set key box width 1 below")
 	
 	def draw(self, file):
@@ -43,6 +44,10 @@
 		if len(rows) > 0:
 			self.data.append(Gnuplot.Data(rows, using="1:2 with lines lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
 		
+	def add_bars(self, rows, title):
+		if len(rows) > 0:
+			self.data.append(Gnuplot.Data(rows, using="1:2 with impulses lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
+
 	def add_line(self, value, title):
 		self.data.append(Gnuplot.Func("%d" % value, with_="lines lw 2 title '%s'" % title))
 

Modified: branches/advisor/src/front/misc/analyzedb/Present.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Present.py	2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Present.py	2008-08-13 14:52:22 UTC (rev 190)
@@ -50,14 +50,14 @@
 	<h3>Statistics</h3>
 	(Recorded between %s and %s)<br />
 	<br />
-	There are %d attributes in %d tables with %d indexes in this database.<br />
+	There are %d attributes in %d tables with %d secondary indexes in this database.<br />
 	The current workload counts %d distinct statements and %d selects were executed.<br />
 	Since the DBMS is up %d selects have been executed and returned %d rows in total.<br />
 	At most %d users were connected at the same time. On average %d users are connected.<br />
 	There were at most %d sessions at the same time and %d sessions on average.<br />
 	<br /><br />
 	%s &nbsp;&nbsp; %s<br />
-	%s
+	%s &nbsp;&nbsp; %s
 	'''	
 
 	__recommendations = '''
@@ -96,14 +96,19 @@
 		
 		runtime = []
 		coststime = []
+		usage = []
+		hour = [0, 0]
 		for wkl in self.__results.workload:
 			runtime.append([wkl.time,
 					wkl.wctime])
 			coststime.append([wkl.time,
 					(wkl.opf_cpu+wkl.opf_dio+wkl.qef_cpu+wkl.qef_dio) / 1000])
-			
-		runtime.sort(lambda x, y: cmp(x[0], y[0]))
-		coststime.sort(lambda x, y: cmp(x[0], y[0]))
+			if wkl.time > hour[0]+3600:
+				if hour[0] > 0:
+					usage.append([hour[0], hour[1]])
+				hour[0] = wkl.time
+				hour[1] = 0
+			hour[1] += 1
 
 		debug("  Drawing costs histogram")
 		histo1 = Histogram("Top 10 - Cost Comparison", "Query Key", "Cost", ("Actual Costs", "Old Cost Estimates", "New Cost Estimates"))
@@ -127,12 +132,16 @@
 		curve2.add_line(self.__results.statistics.max_locks, "Max Locks")
 		curve2.draw("locks")
 		
-		debug("  Drawing workload curve")
-		curve3 = Curve("Workload", "Time", "Runtime", True)
+		debug("  Drawing workload cost curve")
+		curve3 = Curve("Workload Cost", "Time", "Runtime", True)
 		curve3.add_curve(runtime, "Time to Execute")
 		curve3.add_curve(coststime, "Cost / 1000")
 		curve3.draw("runtime")
 	
+		debug("  Drawing workload usage curve")
+		curve4 = Curve("Workload Usage", "Time", "No of Queries", True)
+		curve4.add_bars(usage, "Queries per Hour")
+		curve4.draw("usage")
 
 	def __render(self):
 		
@@ -181,7 +190,8 @@
 			avg_sessions,
 			self.__picture("connections"),
 			self.__picture("locks"),
-			self.__picture("runtime")
+			self.__picture("runtime"),
+			self.__picture("usage")
 			)
 
 		self.__recommendations = self.__recommendations % (




More information about the svn-commits mailing list