-
Notifications
You must be signed in to change notification settings - Fork 55
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[PA-225] Restart PX on Nodes where pds pods have attached volumes #916
base: master
Are you sure you want to change the base?
Changes from all commits
1c65502
d0c0fef
2ce08a5
bd08aad
9eb1715
d1e5e51
1b620d8
d14b7d4
c0b08fb
2b97aa0
22f28ba
fda39f9
1a05438
9ad7e75
95b3396
9e56094
2c09799
f2426ce
7e1e870
3e236b8
d50a794
eb4bbdd
4a7b979
9607cf3
3611a25
d909ffb
7a3c4d9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,6 +40,7 @@ type Parameter struct { | |
AccountName string `json:"AccountName"` | ||
ClusterType string `json:"ClusterType"` | ||
Namespace string `json:"Namespace"` | ||
PxNamespace string `json:"PxNamespace"` | ||
} `json:"InfraToTest"` | ||
} | ||
|
||
|
@@ -1313,3 +1314,94 @@ func ValidateAllDataServiceVolumes(deployment *pds.ModelsDeployment, dataService | |
return resourceTemp, storageOp, config, nil | ||
|
||
} | ||
|
||
func GetPodsFromK8sStatefulSet(deployment *pds.ModelsDeployment, namespace string) ([]corev1.Pod, error) { | ||
var ss *v1.StatefulSet | ||
err = wait.Poll(maxtimeInterval, timeOut, func() (bool, error) { | ||
ss, err = k8sApps.GetStatefulSet(deployment.GetClusterResourceName(), namespace) | ||
if err != nil { | ||
logrus.Warnf("An Error Occured while getting statefulsets %v", err) | ||
return false, nil | ||
} | ||
return true, nil | ||
}) | ||
if err != nil { | ||
logrus.Errorf("An Error Occured while getting statefulsets %v", err) | ||
return nil, err | ||
} | ||
Comment on lines
+1328
to
+1331
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Return verbose error, don't print it, we will print it in the initial place where this func is called |
||
pods, err := k8sApps.GetStatefulSetPods(ss) | ||
if err != nil { | ||
logrus.Errorf("An error occured while getting the pods belonging to this statefulset %v", err) | ||
return nil, err | ||
Comment on lines
+1333
to
+1335
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, just return verbose error, don't print it, we will print it in the initial place where this func is called |
||
} | ||
return pods, nil | ||
} | ||
|
||
func GetK8sNodeObjectUsingPodName(nodeName string) (*corev1.Node, error) { | ||
nodeObject, err := k8sCore.GetNodeByName(nodeName) | ||
if err != nil { | ||
logrus.Errorf("Could not get the node object for node %v because %v", nodeName, err) | ||
return nil, err | ||
} | ||
Comment on lines
+1342
to
+1345
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, just return verbose error, don't print it, we will print it in the initial place where this func is called |
||
return nodeObject, nil | ||
} | ||
|
||
func DrainPxPodOnK8sNode(node *corev1.Node, namespace string) error { | ||
labelSelector := map[string]string{"name": "portworx"} | ||
pod, err := k8sCore.GetPodsByNodeAndLabels(node.Name, namespace, labelSelector) | ||
if err != nil { | ||
logrus.Errorf("Could not fetch pods running on the given node %v", err) | ||
return err | ||
} | ||
Comment on lines
+1352
to
+1355
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, just return verbose error, don't print it, we will print it in the initial place where this func is called |
||
logrus.Infof("Portworx pod to be drained %v from node %v", pod.Items[0].Name, node.Name) | ||
err = k8sCore.DrainPodsFromNode(node.Name, pod.Items, timeOut, maxtimeInterval) | ||
if err != nil { | ||
logrus.Errorf("Could not drain the node %v", err) | ||
return err | ||
} | ||
Comment on lines
+1358
to
+1361
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, just return verbose error, don't print it, we will print it in the initial place where this func is called |
||
|
||
return nil | ||
} | ||
|
||
func LabelK8sNode(node *corev1.Node, label string) error { | ||
keyval := strings.Split(label, "=") | ||
err := k8sCore.AddLabelOnNode(node.Name, keyval[0], keyval[1]) | ||
return err | ||
} | ||
Comment on lines
+1366
to
+1370
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't understand the point of creating this function if it doesn't do anything different than the function you are using inside. Just use the |
||
|
||
func RemoveLabelFromK8sNode(node *corev1.Node, label string) error { | ||
err := k8sCore.RemoveLabelOnNode(node.Name, label) | ||
return err | ||
} | ||
Comment on lines
+1372
to
+1375
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same, here just use the |
||
|
||
func UnCordonK8sNode(node *corev1.Node) error { | ||
err = wait.Poll(maxtimeInterval, timeOut, func() (bool, error) { | ||
err = k8sCore.UnCordonNode(node.Name, timeOut, maxtimeInterval) | ||
bhsrinivasan-px marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if err != nil { | ||
logrus.Errorf("Failed uncordon node %v due to %v", node.Name, err) | ||
return false, nil | ||
} | ||
return true, nil | ||
}) | ||
return err | ||
} | ||
Comment on lines
+1377
to
+1387
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, why not just use already existing |
||
|
||
func VerifyPxPodOnNode(nodeName string, namespace string) (bool, error) { | ||
bhsrinivasan-px marked this conversation as resolved.
Show resolved
Hide resolved
|
||
labelSelector := map[string]string{"name": "portworx"} | ||
var pods *corev1.PodList | ||
err = wait.Poll(maxtimeInterval, timeOut, func() (bool, error) { | ||
pods, err = k8sCore.GetPodsByNodeAndLabels(nodeName, namespace, labelSelector) | ||
if err != nil { | ||
logrus.Errorf("Failed to get pods from node %v due to %v", nodeName, err) | ||
return false, nil | ||
Comment on lines
+1395
to
+1396
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If
|
||
} | ||
return true, nil | ||
}) | ||
if err != nil { | ||
logrus.Errorf("Could not fetch pods running on the given node %v", err) | ||
return false, err | ||
} | ||
Comment on lines
+1400
to
+1403
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, just return and add
|
||
pxPodName := pods.Items[0].Name | ||
logrus.Infof("The portworx pod %v from node %v", pxPodName, nodeName) | ||
return true, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ import ( | |
"os" | ||
"strconv" | ||
"testing" | ||
"time" | ||
|
||
. "github.com/onsi/ginkgo" | ||
"github.com/onsi/ginkgo/reporters" | ||
|
@@ -28,6 +29,7 @@ const ( | |
|
||
var ( | ||
namespace string | ||
pxnamespace string | ||
tenantID string | ||
dnsZone string | ||
projectID string | ||
|
@@ -59,7 +61,7 @@ func TestDataService(t *testing.T) { | |
RegisterFailHandler(Fail) | ||
|
||
var specReporters []Reporter | ||
junitReporter := reporters.NewJUnitReporter("/testresults/junit_basic.xml") | ||
junitReporter := reporters.NewJUnitReporter("/tmp/testresults/junit_basic.xml") | ||
specReporters = append(specReporters, junitReporter) | ||
RunSpecsWithDefaultAndCustomReporters(t, "Torpedo : pds", specReporters) | ||
|
||
|
@@ -92,6 +94,7 @@ var _ = BeforeSuite(func() { | |
namespaceID, err = pdslib.GetnameSpaceID(namespace, deploymentTargetID) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(namespaceID).NotTo(BeEmpty()) | ||
pxnamespace = params.InfraToTest.PxNamespace | ||
}) | ||
}) | ||
|
||
|
@@ -235,7 +238,7 @@ var _ = Describe("{ScaleUPDataServices}", func() { | |
isDeploymentsDeleted = true | ||
}) | ||
|
||
Step("Delete the worload generating deployments", func() { | ||
Step("Delete the workload generating deployments", func() { | ||
if ds.Name == "Cassandra" || ds.Name == "PostgreSQL" { | ||
err = pdslib.DeleteK8sDeployments(dep.Name, namespace) | ||
} else { | ||
|
@@ -562,6 +565,192 @@ func UpgradeDataService(dataservice, oldVersion, oldImage, dsVersion, dsBuild st | |
}) | ||
} | ||
|
||
var _ = Describe("{DeployDSRunWorkloadRestartPXOnNodes}", func() { | ||
It("Deploy Dataservices", func() { | ||
logrus.Info("Create dataservices without backup.") | ||
Step("Deploy PDS Data Service", func() { | ||
for _, ds := range params.DataServiceToTest { | ||
isDeploymentsDeleted = false | ||
dataServiceDefaultResourceTemplateID, err = pdslib.GetResourceTemplate(tenantID, ds.Name) | ||
Expect(err).NotTo(HaveOccurred()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use dash for the verifications There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean? |
||
|
||
logrus.Infof("dataServiceDefaultResourceTemplateID %v ", dataServiceDefaultResourceTemplateID) | ||
|
||
dataServiceDefaultAppConfigID, err = pdslib.GetAppConfTemplate(tenantID, ds.Name) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(dataServiceDefaultAppConfigID).NotTo(BeEmpty()) | ||
|
||
logrus.Infof(" dataServiceDefaultAppConfigID %v ", dataServiceDefaultAppConfigID) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use log instance for logging There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what do you mean? |
||
|
||
deployment, _, _, err := pdslib.DeployDataServices(ds.Name, projectID, | ||
deploymentTargetID, | ||
dnsZone, | ||
deploymentName, | ||
namespaceID, | ||
dataServiceDefaultAppConfigID, | ||
int32(ds.Replicas), | ||
serviceType, | ||
dataServiceDefaultResourceTemplateID, | ||
storageTemplateID, | ||
ds.Version, | ||
ds.Image, | ||
namespace, | ||
) | ||
Expect(err).NotTo(HaveOccurred()) | ||
|
||
defer func() { | ||
if !isDeploymentsDeleted { | ||
Step("Delete created deployments") | ||
resp, err := pdslib.DeleteDeployment(deployment.GetId()) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(resp.StatusCode).Should(BeEquivalentTo(http.StatusAccepted)) | ||
} | ||
}() | ||
|
||
Step("Validate Storage Configurations", func() { | ||
logrus.Infof("data service deployed %v ", ds.Name) | ||
resourceTemp, storageOp, config, err := pdslib.ValidateDataServiceVolumes(deployment, ds.Name, dataServiceDefaultResourceTemplateID, storageTemplateID, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
logrus.Infof("filesystem used %v ", config.Spec.StorageOptions.Filesystem) | ||
logrus.Infof("storage replicas used %v ", config.Spec.StorageOptions.Replicas) | ||
logrus.Infof("cpu requests used %v ", config.Spec.Resources.Requests.CPU) | ||
logrus.Infof("memory requests used %v ", config.Spec.Resources.Requests.Memory) | ||
logrus.Infof("storage requests used %v ", config.Spec.Resources.Requests.Storage) | ||
logrus.Infof("No of nodes requested %v ", config.Spec.Nodes) | ||
logrus.Infof("volume group %v ", storageOp.VolumeGroup) | ||
|
||
Expect(resourceTemp.Resources.Requests.CPU).Should(Equal(config.Spec.Resources.Requests.CPU)) | ||
Expect(resourceTemp.Resources.Requests.Memory).Should(Equal(config.Spec.Resources.Requests.Memory)) | ||
Expect(resourceTemp.Resources.Requests.Storage).Should(Equal(config.Spec.Resources.Requests.Storage)) | ||
Expect(resourceTemp.Resources.Limits.CPU).Should(Equal(config.Spec.Resources.Limits.CPU)) | ||
Expect(resourceTemp.Resources.Limits.Memory).Should(Equal(config.Spec.Resources.Limits.Memory)) | ||
repl, err := strconv.Atoi(config.Spec.StorageOptions.Replicas) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(storageOp.Replicas).Should(Equal(int32(repl))) | ||
Expect(storageOp.Filesystem).Should(Equal(config.Spec.StorageOptions.Filesystem)) | ||
Expect(config.Spec.Nodes).Should(Equal(int32(ds.Replicas))) | ||
}) | ||
|
||
Step("Running Workloads before scaling up of dataservices ", func() { | ||
if ds.Name == postgresql { | ||
deploymentName := "pgload" | ||
pod, dep, err = pdslib.CreateDataServiceWorkloads(ds.Name, deployment.GetId(), "100", "1", deploymentName, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
if ds.Name == rabbitmq { | ||
deploymentName := "rmq" | ||
pod, dep, err = pdslib.CreateDataServiceWorkloads(ds.Name, deployment.GetId(), "", "", deploymentName, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
if ds.Name == redis { | ||
deploymentName := "redisbench" | ||
pod, dep, err = pdslib.CreateDataServiceWorkloads(ds.Name, deployment.GetId(), "", "", deploymentName, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
if ds.Name == cassandra { | ||
deploymentName := "cassandra-stress" | ||
pod, dep, err = pdslib.CreateDataServiceWorkloads(ds.Name, deployment.GetId(), "", "", deploymentName, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
}) | ||
|
||
defer func() { | ||
Step("Delete the workload generating deployments", func() { | ||
if ds.Name == "Cassandra" || ds.Name == "PostgreSQL" { | ||
err = pdslib.DeleteK8sDeployments(dep.Name, namespace) | ||
} else { | ||
err = pdslib.DeleteK8sPods(pod.Name, namespace) | ||
} | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
}() | ||
|
||
var deploymentPods []corev1.Pod | ||
Step("Get a list of pod names that belong to the deployment", func() { | ||
deploymentPods, err = pdslib.GetPodsFromK8sStatefulSet(deployment, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(deploymentPods).NotTo(BeEmpty()) | ||
}) | ||
|
||
var nodeList []*corev1.Node | ||
Step("Get the node that the PV of the pod resides on", func() { | ||
for _, pod := range deploymentPods { | ||
logrus.Infof("The pod spec node name: %v", pod.Spec.NodeName) | ||
dash.Infof("The pod spec node name: %v", pod.Spec.NodeName) | ||
nodeObject, err := pdslib.GetK8sNodeObjectUsingPodName(pod.Spec.NodeName) | ||
Expect(err).NotTo(HaveOccurred()) | ||
nodeList = append(nodeList, nodeObject) | ||
} | ||
}) | ||
|
||
Step("For each node in the nodelist, stop px service on it", func() { | ||
|
||
for _, node := range nodeList { | ||
label := "px/service=stop" | ||
err := pdslib.LabelK8sNode(node, label) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
|
||
logrus.Info("Finished labeling the nodes...") | ||
dash.Info("Finished labeling the nodes...") | ||
time.Sleep(30 * time.Second) | ||
|
||
}) | ||
|
||
Step("Validate that the deployment is healthy", func() { | ||
err := pdslib.ValidateDataServiceDeployment(deployment, namespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
}) | ||
|
||
Step("Cleanup: Start px on the node and uncordon the node", func() { | ||
for _, node := range nodeList { | ||
label := "px/service" | ||
err := pdslib.RemoveLabelFromK8sNode(node, label) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
|
||
logrus.Info("Finished removing labels from the nodes...") | ||
dash.Info("Finished removing labels from the nodes...") | ||
|
||
for _, node := range nodeList { | ||
err := pdslib.DrainPxPodOnK8sNode(node, pxnamespace) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
|
||
logrus.Info("Finished draining px pods from the nodes...") | ||
dash.Info("Finished draining px pods from the nodes...") | ||
|
||
for _, node := range nodeList { | ||
err := pdslib.UnCordonK8sNode(node) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
|
||
logrus.Infof("Finished uncordoning the node...") | ||
dash.Infof("Finished uncordoning the node...") | ||
|
||
logrus.Info("Verify that the px pod has started on node...") | ||
dash.Info("Verify that the px pod has started on node...") | ||
// Read log lines of the px pod on the node to see if the service is running | ||
for _, node := range nodeList { | ||
rc, err := pdslib.VerifyPxPodOnNode(node.Name, pxnamespace) | ||
Expect(rc).To(BeTrue()) | ||
Expect(err).NotTo(HaveOccurred()) | ||
} | ||
|
||
}) | ||
|
||
Step("Delete Deployments", func() { | ||
resp, err := pdslib.DeleteDeployment(deployment.GetId()) | ||
Expect(err).NotTo(HaveOccurred()) | ||
Expect(resp.StatusCode).Should(BeEquivalentTo(http.StatusAccepted)) | ||
isDeploymentsDeleted = true | ||
}) | ||
} | ||
}) | ||
}) | ||
|
||
}) | ||
|
||
func TestMain(m *testing.M) { | ||
// call flag.Parse() here if TestMain uses flags | ||
ParseFlags() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
use new log instance
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what do you mean?