-
Notifications
You must be signed in to change notification settings - Fork 43
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Test] Verify metadata and data self heal
Test: test_metadata_heal_from_shd Steps: 1. Create, mount and run IO on volume 2. Set `self-heal-daemon` to `off` and bring files into metadata split brain 3. Set `self-heal-daemon` to `on` and wait for heal completion 4. Validate areequal checksum on backend bricks Test: test_metadata_heal_from_heal_cmd Steps: 1. Create, mount and run IO on volume 2. Set `self-heal-daemon` to `off` and bring files into metadata split brain 3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` 4. Validate areequal checksum on backend bricks Test: test_data_heal_from_shd Steps: 1. Create, mount and run IO on volume 2. Set `self-heal-daemon` to `off` and bring files into data split brain 3. Set `self-heal-daemon` to `on` and wait for heal completion 4. Validate areequal checksum on backend bricks Change-Id: I24411d964fb6252ae5b621c6569e791b54dcc311 Signed-off-by: Leela Venkaiah G <[email protected]>
- Loading branch information
Showing
1 changed file
with
297 additions
and
0 deletions.
There are no files selected for viewing
297 changes: 297 additions & 0 deletions
297
tests/functional/arbiter/test_verify_metadata_and_data_heal.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,297 @@ | ||
# Copyright (C) 2021 Red Hat, Inc. <http://www.redhat.com> | ||
# | ||
# This program is free software; you can redistribute it and/or modify | ||
# it under the terms of the GNU General Public License as published by | ||
# the Free Software Foundation; either version 2 of the License, or | ||
# any later version. | ||
# | ||
# This program is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
# GNU General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU General Public License along | ||
# with this program; if not, write to the Free Software Foundation, Inc., | ||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||
|
||
from glusto.core import Glusto as g | ||
|
||
from glustolibs.gluster.brick_libs import (bring_bricks_offline, | ||
bring_bricks_online, | ||
get_online_bricks_list) | ||
from glustolibs.gluster.exceptions import ExecutionError | ||
from glustolibs.gluster.gluster_base_class import GlusterBaseClass, runs_on | ||
from glustolibs.gluster.glusterdir import mkdir | ||
from glustolibs.gluster.heal_libs import ( | ||
is_heal_complete, is_volume_in_split_brain, monitor_heal_completion, | ||
wait_for_self_heal_daemons_to_be_online) | ||
from glustolibs.gluster.heal_ops import (disable_self_heal_daemon, | ||
enable_self_heal_daemon, trigger_heal) | ||
from glustolibs.gluster.lib_utils import (add_user, collect_bricks_arequal, | ||
del_user, group_add, group_del) | ||
from glustolibs.gluster.volume_libs import get_subvols | ||
from glustolibs.io.utils import list_all_files_and_dirs_mounts | ||
|
||
|
||
@runs_on([['arbiter', 'replicated'], ['glusterfs']]) | ||
class TestMetadataAndDataHeal(GlusterBaseClass): | ||
'''Description: Verify shd heals files after performing metadata and data | ||
operations while a brick was down''' | ||
def _dac_helper(self, host, option): | ||
'''Helper for creating, deleting users and groups''' | ||
|
||
# Permission/Ownership changes required only for `test_metadata..` | ||
# tests, using random group and usernames | ||
if 'metadata' not in self.test_dir: | ||
return | ||
|
||
if option == 'create': | ||
# Groups | ||
for group in ('qa_func', 'qa_system'): | ||
if not group_add(host, group): | ||
raise ExecutionError('Unable to {} group {} on ' | ||
'{}'.format(option, group, host)) | ||
|
||
# User | ||
if not add_user(host, 'qa_all', group='qa_func'): | ||
raise ExecutionError('Unable to {} user {} under {} on ' | ||
'{}'.format(option, 'qa_all', 'qa_func', | ||
host)) | ||
elif option == 'delete': | ||
# Groups | ||
for group in ('qa_func', 'qa_system'): | ||
if not group_del(host, group): | ||
raise ExecutionError('Unable to {} group {} on ' | ||
'{}'.format(option, group, host)) | ||
|
||
# User | ||
if not del_user(host, 'qa_all'): | ||
raise ExecutionError('Unable to {} user on {}'.format( | ||
option, host)) | ||
|
||
def setUp(self): | ||
self.get_super_method(self, 'setUp')() | ||
|
||
# A single mount is enough for all the tests | ||
self.mounts = self.mounts[0:1] | ||
self.client = self.mounts[0].client_system | ||
|
||
# Use testcase name as test directory | ||
self.test_dir = self.id().split('.')[-1] | ||
self.fqpath = self.mounts[0].mountpoint + '/' + self.test_dir | ||
|
||
if not self.setup_volume_and_mount_volume(mounts=self.mounts): | ||
raise ExecutionError('Failed to setup and mount ' | ||
'{}'.format(self.volname)) | ||
|
||
# Crete group and user names required for the test | ||
self._dac_helper(host=self.client, option='create') | ||
|
||
def tearDown(self): | ||
# Delete group and user names created as part of setup | ||
self._dac_helper(host=self.client, option='delete') | ||
|
||
if not self.unmount_volume_and_cleanup_volume(mounts=self.mounts): | ||
raise ExecutionError('Not able to unmount and cleanup ' | ||
'{}'.format(self.volname)) | ||
|
||
self.get_super_method(self, 'tearDown')() | ||
|
||
def _perform_io_and_disable_self_heal(self): | ||
'''Refactor of steps common to all tests: Perform IO, disable heal''' | ||
ret = mkdir(self.client, self.fqpath) | ||
self.assertTrue(ret, | ||
'Directory creation failed on {}'.format(self.client)) | ||
self.io_cmd = 'cat /dev/urandom | tr -dc [:space:][:print:] | head -c ' | ||
# Create 6 dir's, 6 files and 6 files in each subdir with 10K data | ||
file_io = ('''cd {0}; for i in `seq 1 6`; | ||
do mkdir dir.$i; {1} 10K > file.$i; | ||
for j in `seq 1 6`; | ||
do {1} 10K > dir.$i/file.$j; done; | ||
done;'''.format(self.fqpath, self.io_cmd)) | ||
ret, _, err = g.run(self.client, file_io) | ||
self.assertEqual(ret, 0, 'Unable to create directories and data files') | ||
self.assertFalse(err, '{0} failed with {1}'.format(file_io, err)) | ||
|
||
# Disable self heal deamon | ||
self.assertTrue(disable_self_heal_daemon(self.mnode, self.volname), | ||
'Disabling self-heal-daemon falied') | ||
|
||
def _perform_brick_ops_and_enable_self_heal(self, op_type): | ||
'''Refactor of steps common to all tests: Brick down and perform | ||
metadata/data operations''' | ||
# First brick in the subvol will always be online and used for self | ||
# heal, so make keys match brick index | ||
self.op_cmd = { | ||
# Metadata Operations (owner and permission changes) | ||
'metadata': { | ||
2: | ||
'''cd {0}; for i in `seq 1 3`; do chown -R qa_all:qa_func \ | ||
dir.$i file.$i; chmod -R 555 dir.$i file.$i; done;''', | ||
3: | ||
'''cd {0}; for i in `seq 1 3`; do chown -R :qa_system \ | ||
dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', | ||
# 4 - Will be used for final data consistency check | ||
4: | ||
'''cd {0}; for i in `seq 1 6`; do chown -R qa_all:qa_system \ | ||
dir.$i file.$i; chmod -R 777 dir.$i file.$i; done;''', | ||
}, | ||
# Data Operations (append data to the files) | ||
'data': { | ||
2: | ||
'''cd {0}; for i in `seq 1 3`; | ||
do {1} 2K >> file.$i; | ||
for j in `seq 1 3`; | ||
do {1} 2K >> dir.$i/file.$j; done; | ||
done;''', | ||
3: | ||
'''cd {0}; for i in `seq 1 3`; | ||
do {1} 3K >> file.$i; | ||
for j in `seq 1 3`; | ||
do {1} 3K >> dir.$i/file.$j; done; | ||
done;''', | ||
# 4 - Will be used for final data consistency check | ||
4: | ||
'''cd {0}; for i in `seq 1 6`; | ||
do {1} 4K >> file.$i; | ||
for j in `seq 1 6`; | ||
do {1} 4K >> dir.$i/file.$j; done; | ||
done;''', | ||
}, | ||
} | ||
bricks = get_online_bricks_list(self.mnode, self.volname) | ||
self.assertIsNotNone(bricks, | ||
'Not able to get list of bricks in the volume') | ||
|
||
# Make first brick always online and start operations from second brick | ||
for index, brick in enumerate(bricks[1:], start=2): | ||
|
||
# Bring brick offline | ||
ret = bring_bricks_offline(self.volname, brick) | ||
self.assertTrue(ret, 'Unable to bring {} offline'.format(bricks)) | ||
|
||
# Perform metadata/data operation | ||
cmd = self.op_cmd[op_type][index].format(self.fqpath, self.io_cmd) | ||
ret, _, err = g.run(self.client, cmd) | ||
self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) | ||
self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) | ||
|
||
# Bring brick online | ||
ret = bring_bricks_online( | ||
self.mnode, | ||
self.volname, | ||
brick, | ||
bring_bricks_online_methods='volume_start_force') | ||
|
||
# Assert metadata/data operations resulted in pending heals | ||
self.assertFalse(is_heal_complete(self.mnode, self.volname)) | ||
|
||
# Enable and wait self heal daemon to be online | ||
self.assertTrue(enable_self_heal_daemon(self.mnode, self.volname), | ||
'Enabling self heal daemon failed') | ||
self.assertTrue( | ||
wait_for_self_heal_daemons_to_be_online(self.mnode, self.volname), | ||
'Not all self heal daemons are online') | ||
|
||
def _validate_heal_completion_and_arequal(self, op_type): | ||
'''Refactor of steps common to all tests: Validate heal from heal | ||
commands, verify arequal, perform IO and verify arequal after IO''' | ||
|
||
# Validate heal completion | ||
self.assertTrue(monitor_heal_completion(self.mnode, self.volname), | ||
'Self heal is not completed within timeout') | ||
self.assertFalse( | ||
is_volume_in_split_brain(self.mnode, self.volname), | ||
'Volume is in split brain even after heal completion') | ||
|
||
subvols = get_subvols(self.mnode, self.volname)['volume_subvols'] | ||
self.assertTrue(subvols, 'Not able to get list of subvols') | ||
arbiter = self.volume_type.find('arbiter') >= 0 | ||
stop = len(subvols[0]) - 1 if arbiter else len(subvols[0]) | ||
|
||
# Validate arequal | ||
self._validate_arequal_and_perform_lookup(subvols, stop) | ||
|
||
# Perform some additional metadata/data operations | ||
cmd = self.op_cmd[op_type][4].format(self.fqpath, self.io_cmd) | ||
ret, _, err = g.run(self.client, cmd) | ||
self.assertEqual(ret, 0, '{0} failed with {1}'.format(cmd, err)) | ||
self.assertFalse(err, '{0} failed with {1}'.format(cmd, err)) | ||
|
||
# Validate arequal after additional operations | ||
self._validate_arequal_and_perform_lookup(subvols, stop) | ||
|
||
def _validate_arequal_and_perform_lookup(self, subvols, stop): | ||
'''Refactor of steps common to all tests: Validate arequal from bricks | ||
backend and perform a lookup of all files from mount''' | ||
for subvol in subvols: | ||
ret, arequal = collect_bricks_arequal(subvol[0:stop]) | ||
self.assertTrue( | ||
ret, 'Unable to get `arequal` checksum on ' | ||
'{}'.format(subvol[0:stop])) | ||
self.assertEqual( | ||
len(set(arequal)), 1, 'Mismatch of `arequal` ' | ||
'checksum among {} is identified'.format(subvol[0:stop])) | ||
|
||
# Perform a lookup of all files and directories on mounts | ||
self.assertTrue(list_all_files_and_dirs_mounts(self.mounts), | ||
'Failed to list all files and dirs from mount') | ||
|
||
def test_metadata_heal_from_shd(self): | ||
'''Description: Verify files heal after switching on `self-heal-daemon` | ||
when metadata operations are performed while a brick was down | ||
Steps: | ||
1. Create, mount and run IO on volume | ||
2. Set `self-heal-daemon` to `off`, cyclic brick down and perform | ||
metadata operations | ||
3. Set `self-heal-daemon` to `on` and wait for heal completion | ||
4. Validate areequal checksum on backend bricks | ||
''' | ||
op_type = 'metadata' | ||
self._perform_io_and_disable_self_heal() | ||
self._perform_brick_ops_and_enable_self_heal(op_type=op_type) | ||
self._validate_heal_completion_and_arequal(op_type=op_type) | ||
g.log.info('Pass: Verification of metadata heal after switching on ' | ||
'`self heal daemon` is complete') | ||
|
||
def test_metadata_heal_from_heal_cmd(self): | ||
'''Description: Verify files heal after triggering heal command when | ||
metadata operations are performed while a brick was down | ||
Steps: | ||
1. Create, mount and run IO on volume | ||
2. Set `self-heal-daemon` to `off`, cyclic brick down and perform | ||
metadata operations | ||
3. Set `self-heal-daemon` to `on`, invoke `gluster vol <vol> heal` | ||
4. Validate areequal checksum on backend bricks | ||
''' | ||
op_type = 'metadata' | ||
self._perform_io_and_disable_self_heal() | ||
self._perform_brick_ops_and_enable_self_heal(op_type=op_type) | ||
|
||
# Invoke `glfsheal` | ||
self.assertTrue(trigger_heal(self.mnode, self.volname), | ||
'Unable to trigger index heal on the volume') | ||
|
||
self._validate_heal_completion_and_arequal(op_type=op_type) | ||
g.log.info( | ||
'Pass: Verification of metadata heal via `glfsheal` is complete') | ||
|
||
def test_data_heal_from_shd(self): | ||
'''Description: Verify files heal after triggering heal command when | ||
data operations are performed while a brick was down | ||
Steps: | ||
1. Create, mount and run IO on volume | ||
2. Set `self-heal-daemon` to `off`, cyclic brick down and perform data | ||
operations | ||
3. Set `self-heal-daemon` to `on` and wait for heal completion | ||
4. Validate areequal checksum on backend bricks | ||
''' | ||
op_type = 'data' | ||
self._perform_io_and_disable_self_heal() | ||
self._perform_brick_ops_and_enable_self_heal(op_type=op_type) | ||
self._validate_heal_completion_and_arequal(op_type=op_type) | ||
g.log.info('Pass: Verification of data heal after switching on ' | ||
'`self heal daemon` is complete') |