Skip to content

Commit

Permalink
osd: OSD device smart data include additional nvme data
Browse files Browse the repository at this point in the history
Add nvme addition data into the deveh health data. That use nvme tool
and command syntax "nvme <vendor> smart-log-add <dev> -json". The nvme
json output append in the dev smart "nvme_smart_health_information_add_log".

- made run_smartctl static/private
- changed get_metrics to take a const string, not c str

Signed-off-by: Rick Chen <[email protected]>
Signed-off-by: Sage Weil <[email protected]>
  • Loading branch information
hsiang41 authored and liewegas committed Jan 4, 2019
1 parent 9f3dff2 commit 2da8901
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 25 deletions.
188 changes: 168 additions & 20 deletions src/common/blkdev.cc
Original file line number Diff line number Diff line change
Expand Up @@ -522,32 +522,121 @@ std::string get_device_id(const std::string& devname,
return device_id;
}

int block_device_get_metrics(const char *device, int timeout,
json_spirit::mValue *result)
static std::string get_device_vendor(const std::string& devname)
{
std::string s;
if (int r = block_device_run_smartctl(device, timeout, &s); r != 0) {
s = "{\"error\": \"smartctl failed\", \"dev\": \"";
s += device;
s += "\", \"smartctl_error_code\": " + stringify(r);
s += "\", \"smartctl_output\": \"" + s;
s += + "\"}";
struct udev_device *dev;
static struct udev *udev;
const char *data;

udev = udev_new();
if (!udev) {
return {};
}
if (json_spirit::read(s, *result)) {
return 0;
dev = udev_device_new_from_subsystem_sysname(udev, "block", devname.c_str());
if (!dev) {
udev_unref(udev);
return {};
}
s = "{\"error\": \"smartctl returned invalid JSON\", \"dev\": \"";
s += device;
s += "\"}";
if (json_spirit::read(s, *result)) {
return 0;

std::string id_vendor, id_model;
data = udev_device_get_property_value(dev, "ID_VENDOR");
if (data) {
id_vendor = data;
}
data = udev_device_get_property_value(dev, "ID_MODEL");
if (data) {
id_model = data;
}
udev_device_unref(dev);
udev_unref(udev);

std::transform(id_vendor.begin(), id_vendor.end(), id_vendor.begin(),
::tolower);
std::transform(id_model.begin(), id_model.end(), id_model.begin(),
::tolower);

if (id_vendor.size()) {
return id_vendor;
}
if (id_model.size()) {
int pos = id_model.find(" ");
if (pos > 0) {
return id_model.substr(0, pos);
} else {
return id_model;
}
}

std::string vendor, model;
char buf[1024] = {0};
BlkDev blkdev(devname);
if (!blkdev.vendor(buf, sizeof(buf))) {
vendor = buf;
}
return -EINVAL;
if (!blkdev.model(buf, sizeof(buf))) {
model = buf;
}
if (vendor.size()) {
return vendor;
}
if (model.size()) {
int pos = model.find(" ");
if (pos > 0) {
return model.substr(0, pos);
} else {
return model;
}
}

return {};
}

int block_device_run_smartctl(const char *device, int timeout,
std::string *result)
static int block_device_run_vendor_nvme(
const string& devname, const string& vendor, int timeout,
std::string *result)
{
string device = "/dev/" + devname;

SubProcessTimed nvmecli(
"sudo", SubProcess::CLOSE, SubProcess::PIPE, SubProcess::CLOSE,
timeout);
nvmecli.add_cmd_args(
"nvme",
vendor.c_str(),
"smart-log-add",
"--json",
device.c_str(),
NULL);
int ret = nvmecli.spawn();
if (ret != 0) {
*result = std::string("error spawning nvme command: ") + nvmecli.err();
return ret;
}

bufferlist output;
ret = output.read_fd(nvmecli.get_stdout(), 100*1024);
if (ret < 0) {
bufferlist err;
err.read_fd(nvmecli.get_stderr(), 100 * 1024);
*result = std::string("failed to execute nvme: ") + err.to_str();
} else {
ret = 0;
*result = output.to_str();
}

if (nvmecli.join() != 0) {
*result = std::string("nvme returned an error: ") + nvmecli.err();
return -EINVAL;
}

return ret;
}

static int block_device_run_smartctl(const string& devname, int timeout,
std::string *result)
{
string device = "/dev/" + devname;

// when using --json, smartctl will report its errors in JSON format to stdout
SubProcessTimed smartctl(
"sudo", SubProcess::CLOSE, SubProcess::PIPE, SubProcess::CLOSE,
Expand All @@ -557,7 +646,7 @@ int block_device_run_smartctl(const char *device, int timeout,
"-a",
//"-x",
"--json",
device,
device.c_str(),
NULL);

int ret = smartctl.spawn();
Expand All @@ -583,6 +672,53 @@ int block_device_run_smartctl(const char *device, int timeout,
return ret;
}

int block_device_get_metrics(const string& devname, int timeout,
json_spirit::mValue *result)
{
std::string s;

// smartctl
if (int r = block_device_run_smartctl(devname, timeout, &s);
r != 0) {
s = "{\"error\": \"smartctl failed\", \"dev\": \"/dev/";
s += devname;
s += "\", \"smartctl_error_code\": " + stringify(r);
s += "\", \"smartctl_output\": \"" + s;
s += + "\"}";
}
if (!json_spirit::read(s, *result)) {
s = "{\"error\": \"smartctl returned invalid JSON\", \"dev\": \"/dev/";
s += devname;
s += "\"}";
}
if (!json_spirit::read(s, *result)) {
return -EINVAL;
}

json_spirit::mObject& base = result->get_obj();
string vendor = get_device_vendor(devname);
if (vendor.size()) {
base["nvme_vendor"] = vendor;
s.clear();
json_spirit::mValue nvme_json;
if (int r = block_device_run_vendor_nvme(devname, vendor, timeout, &s);
r == 0) {
if (json_spirit::read(s, nvme_json) != 0) {
base["nvme_smart_health_information_add_log"] = nvme_json;
} else {
base["nvme_smart_health_information_add_log_error"] = "bad json output: "
+ s;
}
} else {
base["nvme_smart_health_information_add_log_error_code"] = r;
base["nvme_smart_health_information_add_log_error"] = s;
}
} else {
base["nvme_vendor"] = "unknown";
}

return 0;
}

#elif defined(__APPLE__)
#include <sys/disk.h>
Expand Down Expand Up @@ -865,6 +1001,12 @@ int block_device_run_smartctl(const char *device, int timeout,
return -EOPNOTSUPP;
}

int block_device_run_nvme(const char *device, const char *vendor, int timeout,
std::string *result)
{
return -EOPNOTSUPP;
}

static int block_device_devname(int fd, char *devname, size_t max)
{
struct fiodgname_arg arg;
Expand Down Expand Up @@ -1000,4 +1142,10 @@ int block_device_run_smartctl(const char *device, int timeout,
return -EOPNOTSUPP;
}

int block_device_run_nvme(const char *device, const char *vendor, int timeout,
std::string *result)
{
return -EOPNOTSUPP;
}

#endif
4 changes: 1 addition & 3 deletions src/common/blkdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ extern int get_device_by_path(const char *path, char* partition, char* device, s
extern std::string get_device_id(const std::string& devname,
std::string *err=0);
extern void get_dm_parents(const std::string& dev, std::set<std::string> *ls);
extern int block_device_run_smartctl(const char *device, int timeout,
std::string *result);
extern int block_device_get_metrics(const char *device, int timeout,
extern int block_device_get_metrics(const string& devname, int timeout,
json_spirit::mValue *result);

// do everything to translate a device to the raw physical devices that
Expand Down
2 changes: 1 addition & 1 deletion src/mon/Monitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3660,7 +3660,7 @@ void Monitor::handle_command(MonOpRequestRef op)
continue;
}
json_spirit::mValue smart_json;
if (block_device_get_metrics(("/dev/" + devname).c_str(), smart_timeout,
if (block_device_get_metrics(devname, smart_timeout,
&smart_json)) {
dout(10) << "block_device_get_metrics failed for /dev/" << devname
<< dendl;
Expand Down
2 changes: 1 addition & 1 deletion src/osd/OSD.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6635,7 +6635,7 @@ void OSD::probe_smart(const string& only_devid, ostream& ss)
}

json_spirit::mValue smart_json;
if (block_device_get_metrics(("/dev/" + dev).c_str(), smart_timeout,
if (block_device_get_metrics(dev, smart_timeout,
&smart_json)) {
dout(10) << "block_device_get_metrics failed for /dev/" << dev << dendl;
continue;
Expand Down
1 change: 1 addition & 0 deletions sudoers.d/ceph-osd-smartctl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
## allow ceph-osd (which runs as user ceph) to collect device health metrics

ceph ALL=NOPASSWD: /usr/sbin/smartctl -a --json /dev/*
ceph ALL=NOPASSWD: /usr/sbin/nvme * smart-log-add --json /dev/*

0 comments on commit 2da8901

Please sign in to comment.