From: Mark Wong Date: Sat, 10 Feb 2018 01:59:44 +0000 (-0800) Subject: Refactor pgbench results output X-Git-Url: http://git.postgresql.org/gitweb/static/%7B%7Bpgdulink%28?a=commitdiff_plain;h=4d397f905511a0076841c0546173310c32f1d057;p=pgperffarm.git Refactor pgbench results output The idea is that one results document represents a test of one particular commit. Then it contains the results of all the tests run against that commit. I think it makes more sense to restructure the pgbench results to be like: { "pgbench": { "ro": { "results at a scale factor": { "number of clients": { "std": 4389.727756305762, "metric": 41390.590287, "median": 43137.716637, "results": [ { ... pgbench results of a test ... } ... ] }, .... }, "rw": { ... read write test results ... }, ... other tests results } --- diff --git a/client/benchmarks/pgbench.py b/client/benchmarks/pgbench.py index d11d23c..ab4238f 100644 --- a/client/benchmarks/pgbench.py +++ b/client/benchmarks/pgbench.py @@ -4,6 +4,8 @@ import os.path import re import time +from numpy import mean, median, std + from multiprocessing import cpu_count from utils.logging import log from utils.misc import available_ram, run_cmd @@ -61,7 +63,11 @@ class PgBench(object): """ # initialize results for this dataset scale - self._results[scale] = {'init': None, 'warmup': None, 'runs': []} + self._results['results'] = { + 'init': None, + 'runs': [], + 'warmup': None, + } log("recreating '%s' database" % (self._dbname,)) run_cmd(['dropdb', '--if-exists', self._dbname], env=self._env) @@ -72,7 +78,7 @@ class PgBench(object): env=self._env, cwd=self._outdir) # remember the init duration - self._results[scale]['init'] = r[2] + self._results['results']['init'] = r[2] @staticmethod def _parse_results(data): @@ -151,10 +157,19 @@ class PgBench(object): return issues - def _run(self, duration, nclients=1, njobs=1, read_only=False, + def _run(self, run, scale, duration, nclients=1, njobs=1, read_only=False, aggregate=True, csv_queue=None): 'run pgbench on the database (either a warmup or actual benchmark run)' + # Create a separate directory for each pgbench run + if read_only: + rtag = "ro" + else: + rtag = "rw" + rdir = "%s/pgbench-%s-%d-%d-%s" % (self._outdir, rtag, scale, nclients, + str(run)) + os.mkdir(rdir) + args = ['pgbench', '-c', str(nclients), '-j', str(njobs), '-T', str(duration)] @@ -174,7 +189,7 @@ class PgBench(object): "duration=%d" % (nclients, njobs, aggregate, read_only, duration)) start = time.time() - r = run_cmd(args, env=self._env, cwd=self._outdir) + r = run_cmd(args, env=self._env, cwd=rdir) end = time.time() r = PgBench._parse_results(r[1]) @@ -197,35 +212,49 @@ class PgBench(object): # derive configuration for the CPU count / RAM size configs = PgBench._configure(cpu_count(), available_ram()) + results = {'ro': {}, 'rw': {}} + j = 0 for config in configs: + scale = config['scale'] - # init for the dataset scale and warmup - self._init(config['scale']) - - warmup = self._run(self._duration, cpu_count(), cpu_count()) - results = [] - - for run in range(self._runs): - - log("pgbench : run=%d" % (run,)) - - for clients in config['clients']: - - # read-only - r = self._run(self._duration, clients, clients, True, True, - csv_queue) - r.update({'run': run}) - results.append(r) - - # read-write - r = self._run(self._duration, clients, clients, False, - True, csv_queue) - r.update({'run': run}) - results.append(r) - - self._results[config['scale']] = { - 'warmup': warmup, - 'runs': results - } + if scale not in results['ro']: + results['ro'][scale] = {} + if scale not in results['rw']: + results['rw'][scale] = {} + # init for the dataset scale and warmup + self._init(scale) + + warmup = self._run('w%d' % j, scale, self._duration, cpu_count(), + cpu_count()) + j += 1 + + # read-only & read-write + for ro in [True, False]: + if ro: + tag = 'ro' + else: + tag = 'rw' + + for i in range(self._runs): + log("pgbench : %s run=%d" % (tag, i)) + + for clients in config['clients']: + if clients not in results[tag][scale]: + results[tag][scale][clients] = {} + results[tag][scale][clients]['results'] = [] + + r = self._run(i, scale, self._duration, clients, + clients, ro, True, csv_queue) + r.update({'run': i}) + results[tag][scale][clients]['results'].append(r) + + tps = [] + for result in results[tag][scale][clients]['results']: + tps.append(float(result['tps'])) + results[tag][scale][clients]['metric'] = mean(tps) + results[tag][scale][clients]['median'] = median(tps) + results[tag][scale][clients]['std'] = std(tps) + + self._results['pgbench'] = results return self._results