Skip to content

Commit c72ce69

Browse files
jnihalavamingli
authored andcommitted
gpcheckperf: Fix memory calculation function (#14865)
* gpcheckperf: Fix memory calculation function Currently, there is a bug in the `getMemory()` function in `gpcheckperf` because of the way we check the return code of the `run()` method which is called inside `getMemory()`. The `run()` method returns an integer value of `zero` in case of success and a `non-zero` value if it fails. We are checking this value using the condition `if not ok` which is incorrect because when the `run()` method succeeds (`ok = 0`), the condition would result as `False` causing the `getMemory()` function to assume that the `run()` method failed but in reality, it did not. A simple fix would be to change the condition from `if not ok` to `if ok != 0` to check for any possible failure from the `run()` method. Further, the way `getMemory()` handles errors is also incorrect. It just returns `0` whenever there is an error which can lead to the incorrect file size being used to perform disk performance tests. This is because the gpcheckperf utility internally calls `multidd` command to perform disk performance tests which also accepts a file size parameter with the -S option which when not set (or is equal to 0), uses its own default value of `2 * memory_size`, but instead of dividing it from the number of input directories, it uses this value for each directory i.e. the total file size value would be `2 * memory_size * no_of_input_directories`. Due to this, sometimes the user can meet File System Full error when the number of input directories is big. Hence we need to properly handle the error in the `getMemory()` function and exit from the code instead of just returning `0` Before: ``` $ gpcheckperf -d /tmp/test1 -d /tmp/test2 -d /tmp/test3 -h localhost -rd disk write avg time (sec): 5.46 disk write tot bytes: 12884901888 --> is equal to 2 * memory_size * no_of_input_directories disk write tot bandwidth (MB/s): 2250.55 disk write min bandwidth (MB/s): 2250.55 [localhost] disk write max bandwidth (MB/s): 2250.55 [localhost] ``` After: ``` $ gpcheckperf -d /tmp/test1 -d /tmp/test2 -d /tmp/test3 -h localhost -rd disk write avg time (sec): 1.87 disk write tot bytes: 4295000064 --> is equal to 2 * memory_size disk write tot bandwidth (MB/s): 2190.39 disk write min bandwidth (MB/s): 2190.39 [localhost] disk write max bandwidth (MB/s): 2190.39 [localhost] ``` Also added unit test cases to test the getMemory() function outputs and added a main section to the gpcheckperf code.
1 parent 2528f32 commit c72ce69

3 files changed

Lines changed: 136 additions & 49 deletions

File tree

gpMgmt/bin/gpcheckperf

Lines changed: 60 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -133,26 +133,27 @@ def getPlatform():
133133

134134

135135
def getMemory():
136-
if getPlatform() == 'linux':
137-
ok, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'")
138-
if not ok:
139-
return 0
136+
platform = getPlatform()
137+
138+
if platform == 'linux':
139+
rc, out = run("sh -c 'cat /proc/meminfo | grep MemTotal'")
140+
if rc != 0:
141+
return None
140142
word_list = out.strip().split(' ')
141143
val = int(word_list[len(word_list) - 2])
142144
factor = word_list[len(word_list) - 1]
143145
if factor == 'kB':
144-
return val * 1024
145-
return 0
146+
return val * 1024 if val else None
146147

147-
if getPlatform() == 'darwin':
148-
ok, out = run("/usr/sbin/sysctl hw.physmem")
149-
if not ok:
150-
return 0
148+
if platform == 'darwin':
149+
rc, out = run("/usr/sbin/sysctl hw.physmem")
150+
if rc != 0:
151+
return None
151152
word_list = out.strip().split(' ')
152153
val = int(word_list[1])
153-
return val
154+
return val if val else None
154155

155-
return 0
156+
return None
156157

157158

158159
def parseMemorySize(line):
@@ -261,7 +262,12 @@ def parseCommandLine():
261262
usage('Error: maximum size for -B parameter is 1MB')
262263

263264
if GV.opt['-S'] == 0:
264-
GV.opt['-S'] = 2 * getMemory() / len(GV.opt['-d'])
265+
system_mem_size = getMemory()
266+
if system_mem_size is not None:
267+
GV.opt['-S'] = 2 * system_mem_size / len(GV.opt['-d'])
268+
else:
269+
sys.exit('[Error] could not get system memory size. Instead, you can use the -S option to provide the file size value')
270+
265271
else:
266272
GV.opt['-S'] /= len(GV.opt['-d'])
267273

@@ -947,50 +953,55 @@ def printResult(title, result):
947953
print('')
948954

949955

950-
try:
951-
parseCommandLine()
952-
runSetup()
953-
diskWriteResult = diskReadResult = streamResult = netResult = None
954-
tornDown = False
956+
def main():
955957
try:
956-
if GV.opt['-r'].find('d') >= 0:
957-
multidd = copyExecOver('multidd')
958-
diskWriteResult = runDiskWriteTest(multidd)
959-
diskReadResult = runDiskReadTest(multidd)
958+
parseCommandLine()
959+
runSetup()
960+
diskWriteResult = diskReadResult = streamResult = netResult = None
961+
tornDown = False
962+
try:
963+
if GV.opt['-r'].find('d') >= 0:
964+
print('[Warning] Using %d bytes for disk performance test. This might take some time' % (GV.opt['-S'] * len(GV.opt['-d'])))
965+
multidd = copyExecOver('multidd')
966+
diskWriteResult = runDiskWriteTest(multidd)
967+
diskReadResult = runDiskReadTest(multidd)
960968

961-
if GV.opt['-r'].find('s') >= 0:
962-
streamResult = runStreamTest()
969+
if GV.opt['-r'].find('s') >= 0:
970+
streamResult = runStreamTest()
963971

964-
if GV.opt['--net'] == 'netperf':
965-
netResult = runNetPerfTest()
966-
elif GV.opt['--net'] == 'parallel':
967-
netResult = runNetPerfTestParallel()
968-
elif GV.opt['--net'] == 'matrix':
969-
netResult, seglist = runNetPerfTestMatrix()
972+
if GV.opt['--net'] == 'netperf':
973+
netResult = runNetPerfTest()
974+
elif GV.opt['--net'] == 'parallel':
975+
netResult = runNetPerfTestParallel()
976+
elif GV.opt['--net'] == 'matrix':
977+
netResult, seglist = runNetPerfTestMatrix()
970978

971-
runTeardown()
979+
runTeardown()
980+
981+
finally:
982+
print('')
983+
print('====================')
984+
print('== RESULT %s' % datetime.datetime.now().isoformat())
985+
print('====================')
972986

973-
finally:
974-
print('')
975-
print('====================')
976-
print('== RESULT %s' % datetime.datetime.now().isoformat())
977-
print('====================')
987+
if diskWriteResult:
988+
printResult('disk write', diskWriteResult)
978989

979-
if diskWriteResult:
980-
printResult('disk write', diskWriteResult)
990+
if diskReadResult:
991+
printResult('disk read', diskReadResult)
981992

982-
if diskReadResult:
983-
printResult('disk read', diskReadResult)
993+
if streamResult:
994+
printResult('stream', streamResult)
984995

985-
if streamResult:
986-
printResult('stream', streamResult)
996+
if netResult and GV.opt['--net'] == 'matrix':
997+
printMatrixResult(netResult, seglist)
998+
elif netResult and GV.opt['--net']:
999+
printNetResult(netResult)
9871000

988-
if netResult and GV.opt['--net'] == 'matrix':
989-
printMatrixResult(netResult, seglist)
990-
elif netResult and GV.opt['--net']:
991-
printNetResult(netResult)
1001+
runTeardown()
9921002

993-
runTeardown()
1003+
except KeyboardInterrupt:
1004+
print('[Abort] Keyboard Interrupt ...')
9941005

995-
except KeyboardInterrupt:
996-
print('[Abort] Keyboard Interrupt ...')
1006+
if __name__ == '__main__':
1007+
main()
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import imp
2+
import os
3+
import sys
4+
from mock import patch
5+
from gppylib.test.unit.gp_unittest import GpTestCase,run_tests
6+
7+
class GpCheckPerf(GpTestCase):
8+
def setUp(self):
9+
gpcheckcat_file = os.path.abspath(os.path.dirname(__file__) + "/../../../gpcheckperf")
10+
self.subject = imp.load_source('gpcheckperf', gpcheckcat_file)
11+
12+
def tearDown(self):
13+
super(GpCheckPerf, self).tearDown()
14+
15+
@patch('gpcheckperf.getPlatform', return_value='darwin')
16+
@patch('gpcheckperf.run')
17+
def test_get_memory_on_darwin(self, mock_run, mock_get_platform):
18+
mock_run.return_value = [1, 'hw.physmem: 1234']
19+
actual_result = self.subject.getMemory()
20+
self.assertEquals(actual_result, None)
21+
22+
mock_run.return_value = [0, 'hw.physmem: 0']
23+
actual_result = self.subject.getMemory()
24+
self.assertEquals(actual_result, None)
25+
26+
mock_run.return_value = [0, 'hw.physmem: 1234']
27+
actual_result = self.subject.getMemory()
28+
self.assertEquals(actual_result, 1234)
29+
30+
@patch('gpcheckperf.getPlatform', return_value='linux')
31+
@patch('gpcheckperf.run')
32+
def test_get_memory_on_linux(self, mock_run, mock_get_platform):
33+
mock_run.return_value = [1, 'MemTotal: 10 kB']
34+
actual_result = self.subject.getMemory()
35+
self.assertEquals(actual_result, None)
36+
37+
mock_run.return_value = [0, 'MemTotal: 0 kB']
38+
actual_result = self.subject.getMemory()
39+
self.assertEquals(actual_result, None)
40+
41+
mock_run.return_value = [0, 'MemTotal: 10 kB']
42+
actual_result = self.subject.getMemory()
43+
self.assertEquals(actual_result, 10240)
44+
45+
@patch('gpcheckperf.getPlatform', return_value='abc')
46+
def test_get_memory_on_invalid_platform(self, mock_get_platform):
47+
actual_result = self.subject.getMemory()
48+
self.assertEquals(actual_result, None)
49+
50+
@patch('gpcheckperf.getMemory', return_value=None)
51+
def test_parseCommandLine_when_get_memory_fails(self, mock_get_memory):
52+
sys.argv = ["gpcheckperf", "-h", "locahost", "-r", "d", "-d", "/tmp"]
53+
with self.assertRaises(SystemExit) as e:
54+
self.subject.parseCommandLine()
55+
56+
self.assertEqual(e.exception.code, '[Error] could not get system memory size. Instead, you can use the -S option to provide the file size value')
57+
58+
@patch('gpcheckperf.getMemory', return_value=123)
59+
def test_parseCommandLine_when_get_memory_succeeds(self, mock_get_memory):
60+
sys.argv = ["gpcheckperf", "-h", "locahost", "-r", "d", "-d", "/tmp"]
61+
self.subject.parseCommandLine()
62+
self.assertEqual(self.subject.GV.opt['-S'], 246.0)
63+
64+
65+
if __name__ == '__main__':
66+
run_tests()

gpMgmt/test/behave/mgmt_utils/gpcheckperf.feature

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,13 @@ Feature: Tests for gpcheckperf
5252
And gpcheckperf should print "single host only - abandon netperf test" to stdout
5353
And gpcheckperf should not print "TypeError:" to stdout
5454

55+
Scenario: gpcheckperf runs with -S option and prints a warning message
56+
Given the database is running
57+
When the user runs "gpcheckperf -h localhost -r d -d /tmp -S 1GB"
58+
Then gpcheckperf should return a return code of 0
59+
And gpcheckperf should print "\[Warning] Using 1073741824 bytes for disk performance test. This might take some time" to stdout
60+
61+
Scenario: gpcheckperf errors out when invalid value is passed to the -S option
62+
Given the database is running
63+
When the user runs "gpcheckperf -h localhost -r d -d /tmp -S abc"
64+
Then gpcheckperf should return a return code of 1

0 commit comments

Comments
 (0)