diff --git a/ldms/src/ldmsd/ldmsd.h b/ldms/src/ldmsd/ldmsd.h index d20da0de6..a8be1696f 100644 --- a/ldms/src/ldmsd/ldmsd.h +++ b/ldms/src/ldmsd/ldmsd.h @@ -662,6 +662,20 @@ int ldmsd_row_to_json_object(ldmsd_row_t row, char **str, int *len); */ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len); +/** + * \brief ldmsd_avro_name_get + * + * Avro names may only contain the characters [A-Za-z0-9\\_\\-]. LDMS metric + * names by contrast may characters outside this set. When creating Avro + * schema, these LDMS names must be mapped a valid Avro name. The function + * returns malloc'd memory that should be freed by the caller when no + * long longer needed. + * + * \param ldms_name The LDMS metric name to be mapped to a valid Avro name + * \return char* Pointer to the allocated buffer or NULL if ENOMEM + */ +char *ldmsd_avro_name_get(const char *ldms_name); + /** * Configure strgp decomposer. * diff --git a/ldms/src/ldmsd/ldmsd_decomp.c b/ldms/src/ldmsd/ldmsd_decomp.c index 07b3ee922..e64172212 100644 --- a/ldms/src/ldmsd/ldmsd_decomp.c +++ b/ldms/src/ldmsd/ldmsd_decomp.c @@ -57,6 +57,7 @@ #include #include #include +#include #include @@ -779,8 +780,30 @@ static const char *col_type_str(enum ldms_value_type type) return type_str[type]; } +static char get_avro_char(char c) +{ + if (isalnum(c)) + return c; + if (c == '_' || c == '-') + return c; + return '_'; +} + +char *ldmsd_avro_name_get(const char *ldms_name) +{ + char *avro_name_buf = calloc(1, strlen(ldms_name) + 1); + char *avro_name = avro_name_buf; + while (*ldms_name != '\0') { + *avro_name++ = get_avro_char(*ldms_name++); + } + if (*avro_name) + *avro_name = '\0'; + return avro_name_buf; +} + int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) { + char *avro_name = NULL; struct strbuf_tailq_s h = TAILQ_HEAD_INITIALIZER(h); ldmsd_col_t col; int i, rc; @@ -796,6 +819,7 @@ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) for (i = 0; i < row->col_count; i++) { col = &row->cols[i]; + avro_name = ldmsd_avro_name_get(col->name); if (i) { /* comma */ rc = strbuf_printf(&h, ","); if (rc) @@ -808,7 +832,7 @@ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) "\"type\":\"long\"," "\"logicalType\":\"timestamp-millis\"" "}}", - col->name); + avro_name); if (rc) goto err_0; break; @@ -825,7 +849,7 @@ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) case LDMS_V_D64: case LDMS_V_CHAR_ARRAY: rc = strbuf_printf(&h, "{\"name\":\"%s\",\"type\":\"%s\"}", - col->name, col_type_str(col->type)); + avro_name, col_type_str(col->type)); if (rc) goto err_0; break; @@ -842,7 +866,7 @@ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) rc = strbuf_printf(&h, "{\"name\":\"%s\"," "\"type\":{ \"type\" : \"array\", \"items\": \"%s\" }}", - col->name, col_type_str(col->type)); + avro_name, col_type_str(col->type)); if (rc) goto err_0; break; @@ -862,9 +886,12 @@ int ldmsd_row_to_json_avro_schema(ldmsd_row_t row, char **str, size_t *len) rc = strbuf_str(&h, str, (int *)len); strbuf_purge(&h); + free(avro_name); return rc; err_0: + if (avro_name) + free(avro_name); strbuf_purge(&h); return rc; }