[svn-commits] r190 - branches/advisor/src/front/misc/analyzedb
thial01 at ingres.com
thial01 at ingres.com
Wed Aug 13 07:52:22 PDT 2008
Author: thial01
Date: 2008-08-13 07:52:22 -0700 (Wed, 13 Aug 2008)
New Revision: 190
Modified:
branches/advisor/src/front/misc/analyzedb/Analyze.py
branches/advisor/src/front/misc/analyzedb/Containers.py
branches/advisor/src/front/misc/analyzedb/Draw.py
branches/advisor/src/front/misc/analyzedb/Present.py
Log:
Some fixes for the analyzer - see #174
Modified: branches/advisor/src/front/misc/analyzedb/Analyze.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Analyze.py 2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Analyze.py 2008-08-13 14:52:22 UTC (rev 190)
@@ -112,7 +112,8 @@
""" Get workload history data """
results = self.__workloaddb.execute("select \
query_key, opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time, wctime \
- from workload where database = ?",
+ from workload where database = ? \
+ order by time",
(self.__config.userdb,) )
for row in results:
@@ -121,39 +122,37 @@
def __load_statements(self):
- """ Get statements """
+ result = self.__workloaddb.execute("select count(query_key) from statements");
+ self.__workloadsize = result[0][0]
+
+ """
+ Get statements with big join
+ This returns a join between statements and workload
+ with the most expensive statements first
+ """
results = self.__workloaddb.execute("select \
- query_key, query_text, frequency \
- from statements where database = ?",
- (self.__config.userdb,) )
+ s.query_key, query_text, frequency, \
+ opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, s.time, wctime \
+ from (select query_key, max(time) as time from workload where database = ? group by query_key) as t \
+ join workload w on t.query_key = w.query_key and t.time = w.time \
+ join statements s on s.query_key = w.query_key \
+ where s.database = ? and w.database = s.database \
+ order by frequency * (opf_cpu+opf_dio+qef_cpu+qef_dio) desc",
+ (self.__config.userdb, self.__config.userdb) )
- self.__workloadsize = len(results)
-
+ count = 1
for row in results:
+ if self.__config.querylimit != -1 and count > self.__config.querylimit:
+ debug("Limit of queries to process reached - stopping")
+ break
""" Try to find out if this is a select """
if (row[1].lstrip().lower())[0:6] == "select":
- row1 = self.__workloaddb.execute("select first 1 \
- opf_cpu, opf_dio, qef_cpu, qef_dio, est_cpu, est_dio, pages_touched, time, wctime \
- from workload where query_key = ? order by time desc",
- (row[0],))
- if len(row) > 0:
- self.__statements.append(Statement(row, row1[0]))
+ stmt = Statement(row)
+ self.__load_references(stmt)
+ self.__statements.append(stmt)
+ count += 1
- """ Sort statements - most frequent and expensive ones first """
- self.__statements.sort(lambda x, y: cmp(
- x.frequency * (x.opf_cpu+x.opf_dio+x.qef_cpu+x.qef_dio),
- y.frequency * (y.opf_cpu+y.opf_dio+y.qef_cpu+y.qef_dio)),
- reverse=True)
- if self.__config.querylimit != -1:
- """ Only keep the first n statements """
- del self.__statements[self.__config.querylimit:]
-
- """ Load references for the rest """
- for stmt in self.__statements:
- self.__load_references(stmt)
-
-
def __load_references(self, statement):
debug("\n\n## Statement %d: %s\n" % (statement.query_key, statement.query_text))
Modified: branches/advisor/src/front/misc/analyzedb/Containers.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Containers.py 2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Containers.py 2008-08-13 14:52:22 UTC (rev 190)
@@ -2,20 +2,20 @@
""" Statement class """
class Statement():
- def __init__(self, values, values1):
+ def __init__(self, values):
self.query_key = values[0]
self.query_text = values[1].strip()
self.frequency = values[2]
- self.opf_cpu = values1[0]
- self.opf_dio = values1[1]
- self.qef_cpu = values1[2]
- self.qef_dio = values1[3]
- self.est_cpu = values1[4]
- self.est_dio = values1[5]
- self.pages_touched = values1[6]
- self.time = values1[7]
- self.wctime = values1[8]
+ self.opf_cpu = values[3]
+ self.opf_dio = values[4]
+ self.qef_cpu = values[5]
+ self.qef_dio = values[6]
+ self.est_cpu = values[7]
+ self.est_dio = values[8]
+ self.pages_touched = values[9]
+ self.time = values[10]
+ self.wctime = values[11]
self.newcost = -1
self.tables = []
self.indexes = []
Modified: branches/advisor/src/front/misc/analyzedb/Draw.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Draw.py 2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Draw.py 2008-08-13 14:52:22 UTC (rev 190)
@@ -15,7 +15,8 @@
self.gp.xlabel(xlabel)
self.gp.ylabel(ylabel)
self.gp.title(title)
- self.gp("set yrange [0:]")
+ """self.gp("set yrange [1:]")"""
+ self.gp("set logscale y")
self.gp("set key box width 1 below")
def draw(self, file):
@@ -43,6 +44,10 @@
if len(rows) > 0:
self.data.append(Gnuplot.Data(rows, using="1:2 with lines lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
+ def add_bars(self, rows, title):
+ if len(rows) > 0:
+ self.data.append(Gnuplot.Data(rows, using="1:2 with impulses lw 2 title '%s', 0 with lines lc rgb 'white' " % title))
+
def add_line(self, value, title):
self.data.append(Gnuplot.Func("%d" % value, with_="lines lw 2 title '%s'" % title))
Modified: branches/advisor/src/front/misc/analyzedb/Present.py
===================================================================
--- branches/advisor/src/front/misc/analyzedb/Present.py 2008-08-12 18:46:33 UTC (rev 189)
+++ branches/advisor/src/front/misc/analyzedb/Present.py 2008-08-13 14:52:22 UTC (rev 190)
@@ -50,14 +50,14 @@
<h3>Statistics</h3>
(Recorded between %s and %s)<br />
<br />
- There are %d attributes in %d tables with %d indexes in this database.<br />
+ There are %d attributes in %d tables with %d secondary indexes in this database.<br />
The current workload counts %d distinct statements and %d selects were executed.<br />
Since the DBMS is up %d selects have been executed and returned %d rows in total.<br />
At most %d users were connected at the same time. On average %d users are connected.<br />
There were at most %d sessions at the same time and %d sessions on average.<br />
<br /><br />
%s %s<br />
- %s
+ %s %s
'''
__recommendations = '''
@@ -96,14 +96,19 @@
runtime = []
coststime = []
+ usage = []
+ hour = [0, 0]
for wkl in self.__results.workload:
runtime.append([wkl.time,
wkl.wctime])
coststime.append([wkl.time,
(wkl.opf_cpu+wkl.opf_dio+wkl.qef_cpu+wkl.qef_dio) / 1000])
-
- runtime.sort(lambda x, y: cmp(x[0], y[0]))
- coststime.sort(lambda x, y: cmp(x[0], y[0]))
+ if wkl.time > hour[0]+3600:
+ if hour[0] > 0:
+ usage.append([hour[0], hour[1]])
+ hour[0] = wkl.time
+ hour[1] = 0
+ hour[1] += 1
debug(" Drawing costs histogram")
histo1 = Histogram("Top 10 - Cost Comparison", "Query Key", "Cost", ("Actual Costs", "Old Cost Estimates", "New Cost Estimates"))
@@ -127,12 +132,16 @@
curve2.add_line(self.__results.statistics.max_locks, "Max Locks")
curve2.draw("locks")
- debug(" Drawing workload curve")
- curve3 = Curve("Workload", "Time", "Runtime", True)
+ debug(" Drawing workload cost curve")
+ curve3 = Curve("Workload Cost", "Time", "Runtime", True)
curve3.add_curve(runtime, "Time to Execute")
curve3.add_curve(coststime, "Cost / 1000")
curve3.draw("runtime")
+ debug(" Drawing workload usage curve")
+ curve4 = Curve("Workload Usage", "Time", "No of Queries", True)
+ curve4.add_bars(usage, "Queries per Hour")
+ curve4.draw("usage")
def __render(self):
@@ -181,7 +190,8 @@
avg_sessions,
self.__picture("connections"),
self.__picture("locks"),
- self.__picture("runtime")
+ self.__picture("runtime"),
+ self.__picture("usage")
)
self.__recommendations = self.__recommendations % (
More information about the svn-commits
mailing list