From 5f72d4279b9f1ad4e7874c771b62e39e13572315 Mon Sep 17 00:00:00 2001 From: Costa Tsaousis Date: Wed, 11 Dec 2024 18:02:17 +0200 Subject: [PATCH] Streaming improvements No 3 (#19168) * ML uses synchronous queries * do not call malloc_trim() to free memory, since to locks everything * Reschedule dimensions for training from worker threads. * when we collect or read from the database, it is SAMPLES. When we generate points for a chart is POINTS * keep the receiver send buffer 10x the default * support autoscaling stream circular buffers * nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible * fix last commit * allow removing receiver and senders inline, if the stream thread is not working on them * fix logs * Revert "nd_poll() prefers sending data vs receiving data - in an attempt to dequeue as soon as possible" This reverts commit 51539a97dad220bc77b93a48b0110eb033e5528d. * do not access receiver or sender after it has been removed * open cache hot2clean * open cache hot2clean does not need flushing * use aral for extent pages up to 65k * track aral malloc and mmap allocations separately; add 8192 as a possible value to PGD * do not evict too frequently if not needed * fix aral metrics * fix aral metrics again * accurate accounting of memory for dictionaries, strings, labels and MRG * log during shutdown the progress of dbengine flushing * move metasync shutfown after dbengine * max iterations per I/O events * max iterations per I/O events - break the loop * max iterations per I/O events - break the loop - again * disable inline evictions for all caches * when writing to sockets, send everything that can be sent * cleanup code to trigger evictions * fix calculation of eviction size * fix calculation of eviction size once more * fix calculation of eviction size once more - again * ml and replication stop while backfilling is running * process opcodes while draining the sockets; log with limit when asking to disconnect a node * fix log * ml stops when replication queries are running * report pgd_padding to pulse * aral precise memory accounting * removed all alignas() and fix the 2 issues that resulted in unaligned memory accesses (one in mqtt and another in streaming) * remove the bigger sizes from PGD, but keep multiples of gorilla buffers * exclude judy from sanitizers * use 16 bytes alignment on 32 bit machines * internal check about memory alignment * experiment: do not allow more children to connect while there is backfilling or replication queries running * when the node is initializing, retry in 30 seconds * connector cleanup and isolation of control logic about enabling/disabling various parts * stop also health queries while backfilling is running * tuning * drain the input * improve interactivity when suspending * more interactive stream_control * debug logs to find the connection issue * abstracted everything about stream control * Add ml_host_{start,stop} again. * Do not create/update anomaly-detection charts when ML is not running for a host. * rrdhost flag RECEIVER_DISCONNECTED has been reversed to COLLECTOR_ONLINE and has been used for localhost and virtual hosts too, to have a single point of truth about the availability of collected data or not * ml_host_start() and ml_host_stop() are used by streaming receivers; ml_host_start() is used for localhost and virtual hosts * fixed typo * allow up to 3 backfills at a time * add throttling based on user queries * restore cache line paddings * unify streaming logs to make it easier to grep logs * tuning of stream_control * more logs unification * use mallocz_release_as_much_memory_to_the_system() under extreme conditions * do not rely on the response code of evict_pages() * log the gap of the database every time a node is connected * updated ram requirements --------- Co-authored-by: vkalintiris --- CMakeLists.txt | 2 + .../sizing-netdata-agents/ram-requirements.md | 10 +- src/aclk/mqtt_websockets/mqtt_ng.c | 9 +- src/daemon/main.c | 61 +++-- src/daemon/pulse/pulse-aral.c | 44 ++-- src/daemon/pulse/pulse-daemon-memory.c | 46 ++-- src/daemon/pulse/pulse-dbengine.c | 62 +++-- src/daemon/pulse/pulse-gorilla.c | 14 +- src/daemon/pulse/pulse-http-api.c | 9 + src/daemon/pulse/pulse-ml.c | 24 +- src/daemon/pulse/pulse-queries.c | 25 +- src/daemon/pulse/pulse-sqlite3.c | 42 ++- src/daemon/watcher.c | 6 +- src/daemon/watcher.h | 2 +- src/database/engine/cache.c | 157 +++++++----- src/database/engine/cache.h | 195 +++++++++----- src/database/engine/metric.c | 51 ++-- src/database/engine/metric.h | 19 +- src/database/engine/page.c | 28 +- src/database/engine/page.h | 4 +- src/database/engine/pagecache.c | 18 +- src/database/engine/pdc.c | 16 +- src/database/engine/pdc.h | 9 +- src/database/engine/rrdengine.c | 40 ++- src/database/engine/rrdengine.h | 61 ++++- src/database/engine/rrdengineapi.c | 24 +- src/database/engine/rrdengineapi.h | 34 ++- src/database/rrd.h | 27 +- src/database/rrdhost.c | 3 + src/database/rrdlabels.c | 29 ++- src/database/rrdset.c | 2 +- src/health/health_event_loop.c | 6 + src/libnetdata/aral/aral.c | 155 ++++++----- src/libnetdata/aral/aral.h | 95 +++++-- src/libnetdata/common.h | 6 + .../dictionary/dictionary-hashtable.h | 15 ++ .../dictionary/dictionary-statistics.h | 10 +- src/libnetdata/dictionary/dictionary.h | 26 ++ src/libnetdata/libjudy/judy-malloc.c | 10 +- src/libnetdata/libjudy/judy-malloc.h | 6 +- .../libjudy/vendored/JudyCommon/JudyPrivate.h | 2 +- .../libjudy/vendored/JudyL/JudyLCascade.c | 1 - .../libjudy/vendored/JudyL/JudyLDecascade.c | 2 - .../libjudy/vendored/JudyL/JudyLDel.c | 1 - .../libjudy/vendored/JudyL/JudyLGet.c | 2 - .../libjudy/vendored/JudyL/JudyLIns.c | 1 - src/libnetdata/libnetdata.c | 3 +- .../simple_hashtable/simple_hashtable.h | 8 +- src/libnetdata/string/string.c | 21 +- src/libnetdata/url/url.c | 2 +- src/libnetdata/url/url.h | 2 +- src/ml/ml.cc | 145 +++-------- src/ml/ml_config.cc | 2 +- src/ml/ml_dimension.h | 2 +- src/ml/ml_enums.cc | 4 - src/ml/ml_enums.h | 6 - src/ml/ml_public.cc | 32 ++- src/ml/ml_queue.h | 8 - src/plugins.d/pluginsd_parser.c | 2 + .../protocol/command-begin-set-end.c | 2 +- src/streaming/protocol/command-nodeid.c | 12 +- src/streaming/protocol/commands.c | 12 +- src/streaming/replication.c | 69 +++-- src/streaming/replication.h | 8 + src/streaming/rrdhost-status.c | 2 +- src/streaming/stream-capabilities.c | 4 +- src/streaming/stream-circular-buffer.c | 29 ++- src/streaming/stream-circular-buffer.h | 16 +- .../stream-compression/compression.h | 5 +- src/streaming/stream-conf.c | 9 +- src/streaming/stream-connector.c | 240 ++++++------------ src/streaming/stream-control.c | 116 +++++++++ src/streaming/stream-control.h | 29 +++ src/streaming/stream-parents.c | 53 ++-- src/streaming/stream-path.c | 13 +- src/streaming/stream-receiver-connection.c | 55 ++-- src/streaming/stream-receiver-internals.h | 3 +- src/streaming/stream-receiver.c | 216 +++++++++------- src/streaming/stream-sender-commit.c | 52 ++-- src/streaming/stream-sender-execute.c | 13 +- src/streaming/stream-sender-internals.h | 2 + src/streaming/stream-sender.c | 171 +++++++------ src/streaming/stream-thread.c | 111 +++++--- src/streaming/stream-thread.h | 9 +- src/streaming/stream-traffic-types.h | 8 + src/streaming/stream.h | 1 + src/web/api/formatters/rrd2json.c | 2 + src/web/api/queries/query.c | 7 +- src/web/api/queries/query.h | 2 + src/web/api/queries/weights.c | 5 + 90 files changed, 1792 insertions(+), 1132 deletions(-) create mode 100644 src/streaming/stream-control.c create mode 100644 src/streaming/stream-control.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 66580f3a15b5f6..bd57bbfa84bae8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1549,6 +1549,8 @@ set(STREAMING_PLUGIN_FILES src/streaming/stream-traffic-types.h src/streaming/stream-circular-buffer.c src/streaming/stream-circular-buffer.h + src/streaming/stream-control.c + src/streaming/stream-control.h ) set(WEB_PLUGIN_FILES diff --git a/docs/netdata-agent/sizing-netdata-agents/ram-requirements.md b/docs/netdata-agent/sizing-netdata-agents/ram-requirements.md index fe50dba2a32b38..9bed48d145afd5 100644 --- a/docs/netdata-agent/sizing-netdata-agents/ram-requirements.md +++ b/docs/netdata-agent/sizing-netdata-agents/ram-requirements.md @@ -19,7 +19,7 @@ This number can be lowered by limiting the number of Database Tiers or switching | nodes currently received | nodes collected | 512 KiB | Structures and reception buffers | | nodes currently sent | nodes collected | 512 KiB | Structures and dispatch buffers | -These numbers vary depending on name length, the number of dimensions per instance and per context, the number and length of the labels added, the number of Machine Learning models maintained and similar parameters. For most use cases, they represent the worst case scenario, so you may find out Netdata actually needs less than that. +These numbers vary depending on metric name length, the average number of dimensions per instance and per context, the number and length of the labels added, the number of database tiers configured, the number of Machine Learning models maintained per metric and similar parameters. For most use cases, they represent the worst case scenario, so you may find out Netdata actually needs less than that. Each metric currently being collected needs (1 index + 20 collection + 5 ml) = 26 KiB. When it stops being collected, it needs 1 KiB (index). @@ -84,3 +84,11 @@ We frequently see that the following strategy gives the best results: 3. Set the page cache in `netdata.conf` to use 1/3 of the available memory. This will allow Netdata queries to have more caches, while leaving plenty of available memory of logs and the operating system. + +In Netdata 2.1 we added the `netdata.conf` option `[db].dbengine use all ram for caches` and `[db].dbengine out of memory protection`. +Combining these two parameters is probably simpler to get best results: + +- `[db].dbengine out of memory protection` is by default 10% of total system RAM, but not more than 5GiB. When the amount of free memory is less than this, Netdata automatically starts releasing memory from its caches to avoid getting out of memory. On `systemd-journal` centralization points, set this to the amount of memory to be dedicated for systemd journal. +- `[db].dbengine use all ram for caches` is by default `no`. Set it to `yes` to use all the memory except the memory given above. + +With these settings, netdata will use all the memory available but leave the amount specified for systemd journal. diff --git a/src/aclk/mqtt_websockets/mqtt_ng.c b/src/aclk/mqtt_websockets/mqtt_ng.c index 9abe77b5fbe516..07a2be102b9bb0 100644 --- a/src/aclk/mqtt_websockets/mqtt_ng.c +++ b/src/aclk/mqtt_websockets/mqtt_ng.c @@ -745,8 +745,13 @@ static size_t mqtt_ng_connect_size(struct mqtt_auth_properties *auth, #define WRITE_POS(frag) (&(frag->data[frag->len])) // [MQTT-1.5.2] Two Byte Integer -#define PACK_2B_INT(buffer, integer, frag) { *(uint16_t *)WRITE_POS(frag) = htobe16((integer)); \ - DATA_ADVANCE(buffer, sizeof(uint16_t), frag); } +#define PACK_2B_INT(buffer, integer, frag) { \ + uint16_t temp = htobe16((integer)); \ + memcpy(WRITE_POS(frag), &temp, sizeof(uint16_t)); \ + DATA_ADVANCE(buffer, sizeof(uint16_t), frag); \ +} +// #define PACK_2B_INT(buffer, integer, frag) { *(uint16_t *)WRITE_POS(frag) = htobe16((integer)); +// DATA_ADVANCE(buffer, sizeof(uint16_t), frag); } static int _optimized_add(struct header_buffer *buf, void *data, size_t data_len, free_fnc_t data_free_fnc, struct buffer_fragment **frag) { diff --git a/src/daemon/main.c b/src/daemon/main.c index 7b75cc9a11e706..c2c27219364981 100644 --- a/src/daemon/main.c +++ b/src/daemon/main.c @@ -394,11 +394,10 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re { watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); - watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); - watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); } else { @@ -406,15 +405,44 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re #ifdef ENABLE_DBENGINE if(dbengine_enabled) { + nd_log(NDLS_DAEMON, NDLP_INFO, "Preparing DBENGINE shutdown..."); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_prepare_exit(multidb_ctx[tier]); - for (size_t tier = 0; tier < storage_tiers; tier++) { - if (!multidb_ctx[tier]) - continue; - completion_wait_for(&multidb_ctx[tier]->quiesce.completion); - completion_destroy(&multidb_ctx[tier]->quiesce.completion); - } + struct pgc_statistics pgc_main_stats = pgc_get_statistics(main_cache); + nd_log(NDLS_DAEMON, NDLP_INFO, "Waiting for DBENGINE to commit unsaved data to disk (%zu pages, %zu bytes)...", + pgc_main_stats.queues[PGC_QUEUE_HOT].entries + pgc_main_stats.queues[PGC_QUEUE_DIRTY].entries, + pgc_main_stats.queues[PGC_QUEUE_HOT].size + pgc_main_stats.queues[PGC_QUEUE_DIRTY].size); + + bool finished_tiers[RRD_STORAGE_TIERS] = { 0 }; + size_t waiting_tiers, iterations = 0; + do { + waiting_tiers = 0; + iterations++; + + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier] || finished_tiers[tier]) + continue; + + waiting_tiers++; + if (completion_timedwait_for(&multidb_ctx[tier]->quiesce.completion, 1)) { + completion_destroy(&multidb_ctx[tier]->quiesce.completion); + finished_tiers[tier] = true; + waiting_tiers--; + nd_log(NDLS_DAEMON, NDLP_INFO, "DBENGINE tier %zu finished!", tier); + } + else if(iterations % 10 == 0) { + pgc_main_stats = pgc_get_statistics(main_cache); + nd_log(NDLS_DAEMON, NDLP_INFO, + "Still waiting for DBENGINE tier %zu to finish " + "(cache still has %zu pages, %zu bytes hot, for all tiers)...", + tier, + pgc_main_stats.queues[PGC_QUEUE_HOT].entries + pgc_main_stats.queues[PGC_QUEUE_DIRTY].entries, + pgc_main_stats.queues[PGC_QUEUE_HOT].size + pgc_main_stats.queues[PGC_QUEUE_DIRTY].size); + } + } + } while(waiting_tiers); + nd_log(NDLS_DAEMON, NDLP_INFO, "DBENGINE shutdown completed..."); } #endif watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); @@ -422,9 +450,6 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re rrd_finalize_collection_for_all_hosts(); watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); - metadata_sync_shutdown(); - watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); - #ifdef ENABLE_DBENGINE if(dbengine_enabled) { size_t running = 1; @@ -452,18 +477,22 @@ void netdata_cleanup_and_exit(int ret, const char *action, const char *action_re rrdeng_exit(multidb_ctx[tier]); rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); - } else { + } + else { // Skip these steps watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); } #else - // Skip these steps - watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); - watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); - watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); #endif + + metadata_sync_shutdown(); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); } // Don't register a shutdown event if we crashed diff --git a/src/daemon/pulse/pulse-aral.c b/src/daemon/pulse/pulse-aral.c index 25514c1ce6e22f..494516df487533 100644 --- a/src/daemon/pulse/pulse-aral.c +++ b/src/daemon/pulse/pulse-aral.c @@ -6,7 +6,7 @@ struct aral_info { const char *name; RRDSET *st_memory; - RRDDIM *rd_used, *rd_free, *rd_structures; + RRDDIM *rd_malloc_used, *rd_malloc_free, *rd_mmap_used, *rd_mmap_free, *rd_structures, *rd_padding; RRDSET *st_utilization; RRDDIM *rd_utilization; @@ -74,24 +74,26 @@ void pulse_aral_do(bool extended) { if (!stats) continue; - size_t allocated_bytes = __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED) + - __atomic_load_n(&stats->mmap.allocated_bytes, __ATOMIC_RELAXED); + size_t malloc_allocated_bytes = __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED); + size_t malloc_used_bytes = __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED); + if(malloc_used_bytes > malloc_allocated_bytes) + malloc_allocated_bytes = malloc_used_bytes; + size_t malloc_free_bytes = malloc_allocated_bytes - malloc_used_bytes; - size_t used_bytes = __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED) + - __atomic_load_n(&stats->mmap.used_bytes, __ATOMIC_RELAXED); - - // slight difference may exist, due to the time needed to get these values - // fix the obvious discrepancies - if(used_bytes > allocated_bytes) - used_bytes = allocated_bytes; + size_t mmap_allocated_bytes = __atomic_load_n(&stats->mmap.allocated_bytes, __ATOMIC_RELAXED); + size_t mmap_used_bytes = __atomic_load_n(&stats->mmap.used_bytes, __ATOMIC_RELAXED); + if(mmap_used_bytes > mmap_allocated_bytes) + mmap_allocated_bytes = mmap_used_bytes; + size_t mmap_free_bytes = mmap_allocated_bytes - mmap_used_bytes; size_t structures_bytes = __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED); - size_t free_bytes = allocated_bytes - used_bytes; + size_t padding_bytes = __atomic_load_n(&stats->malloc.padding_bytes, __ATOMIC_RELAXED) + + __atomic_load_n(&stats->mmap.padding_bytes, __ATOMIC_RELAXED); NETDATA_DOUBLE utilization; - if(used_bytes && allocated_bytes) - utilization = 100.0 * (NETDATA_DOUBLE)used_bytes / (NETDATA_DOUBLE)allocated_bytes; + if((malloc_used_bytes + mmap_used_bytes != 0) && (malloc_allocated_bytes + mmap_allocated_bytes != 0)) + utilization = 100.0 * (NETDATA_DOUBLE)(malloc_used_bytes + mmap_used_bytes) / (NETDATA_DOUBLE)(malloc_allocated_bytes + mmap_allocated_bytes); else utilization = 100.0; @@ -118,14 +120,20 @@ void pulse_aral_do(bool extended) { rrdlabels_add(ai->st_memory->rrdlabels, "ARAL", ai->name, RRDLABEL_SRC_AUTO); - ai->rd_free = rrddim_add(ai->st_memory, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - ai->rd_used = rrddim_add(ai->st_memory, "used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - ai->rd_structures = rrddim_add(ai->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_malloc_free = rrddim_add(ai->st_memory, "malloc free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_mmap_free = rrddim_add(ai->st_memory, "mmap free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_malloc_used = rrddim_add(ai->st_memory, "malloc used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_mmap_used = rrddim_add(ai->st_memory, "mmap used", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_structures = rrddim_add(ai->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ai->rd_padding = rrddim_add(ai->st_memory, "padding", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } - rrddim_set_by_pointer(ai->st_memory, ai->rd_used, (collected_number)allocated_bytes); - rrddim_set_by_pointer(ai->st_memory, ai->rd_free, (collected_number)free_bytes); + rrddim_set_by_pointer(ai->st_memory, ai->rd_malloc_used, (collected_number)malloc_used_bytes); + rrddim_set_by_pointer(ai->st_memory, ai->rd_malloc_free, (collected_number)malloc_free_bytes); + rrddim_set_by_pointer(ai->st_memory, ai->rd_mmap_used, (collected_number)mmap_used_bytes); + rrddim_set_by_pointer(ai->st_memory, ai->rd_mmap_free, (collected_number)mmap_free_bytes); rrddim_set_by_pointer(ai->st_memory, ai->rd_structures, (collected_number)structures_bytes); + rrddim_set_by_pointer(ai->st_memory, ai->rd_padding, (collected_number)padding_bytes); rrdset_done(ai->st_memory); } diff --git a/src/daemon/pulse/pulse-daemon-memory.c b/src/daemon/pulse/pulse-daemon-memory.c index 3c8ff3ecbd41b1..dac7d317997c81 100644 --- a/src/daemon/pulse/pulse-daemon-memory.c +++ b/src/daemon/pulse/pulse-daemon-memory.c @@ -87,9 +87,7 @@ void pulse_daemon_memory_do(bool extended) { netdata_buffers_statistics.buffers_streaming + netdata_buffers_statistics.cbuffers_streaming + netdata_buffers_statistics.buffers_web + - replication_allocated_buffers() + - aral_by_size_overhead() + - judy_aral_overhead(); + replication_allocated_buffers() + aral_by_size_free_bytes() + judy_aral_free_bytes(); size_t strings = 0; string_statistics(NULL, NULL, NULL, NULL, NULL, &strings, NULL, NULL); @@ -101,8 +99,7 @@ void pulse_daemon_memory_do(bool extended) { rrddim_set_by_pointer(st_memory, rd_collectors, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_collectors)); - rrddim_set_by_pointer(st_memory, - rd_rrdhosts, + rrddim_set_by_pointer(st_memory,rd_rrdhosts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhost) + (collected_number)netdata_buffers_statistics.rrdhost_allocations_size); rrddim_set_by_pointer(st_memory, rd_rrdsets, @@ -124,14 +121,15 @@ void pulse_daemon_memory_do(bool extended) { (collected_number)dictionary_stats_memory_total(dictionary_stats_category_replication) + (collected_number)replication_allocated_memory()); #else uint64_t metadata = - aral_by_size_used_bytes() + - dictionary_stats_category_rrdhost.memory.dict + - dictionary_stats_category_rrdset.memory.dict + - dictionary_stats_category_rrddim.memory.dict + - dictionary_stats_category_rrdcontext.memory.dict + - dictionary_stats_category_rrdhealth.memory.dict + - dictionary_stats_category_functions.memory.dict + - dictionary_stats_category_replication.memory.dict + + aral_by_size_structures_bytes() + aral_by_size_used_bytes() + + dictionary_stats_category_rrdhost.memory.dict + dictionary_stats_category_rrdhost.memory.index + + dictionary_stats_category_rrdset.memory.dict + dictionary_stats_category_rrdset.memory.index + + dictionary_stats_category_rrddim.memory.dict + dictionary_stats_category_rrddim.memory.index + + dictionary_stats_category_rrdcontext.memory.dict + dictionary_stats_category_rrdcontext.memory.index + + dictionary_stats_category_rrdhealth.memory.dict + dictionary_stats_category_rrdhealth.memory.index + + dictionary_stats_category_functions.memory.dict + dictionary_stats_category_functions.memory.index + + dictionary_stats_category_replication.memory.dict + dictionary_stats_category_replication.memory.index + + netdata_buffers_statistics.rrdhost_allocations_size + replication_allocated_memory(); rrddim_set_by_pointer(st_memory, rd_metadata, (collected_number)metadata); @@ -157,7 +155,7 @@ void pulse_daemon_memory_do(bool extended) { (collected_number) workers_allocated_memory()); rrddim_set_by_pointer(st_memory, rd_aral, - (collected_number) aral_by_size_structures()); + (collected_number)aral_by_size_structures_bytes()); rrddim_set_by_pointer(st_memory, rd_judy, (collected_number) judy_aral_structures()); @@ -168,6 +166,13 @@ void pulse_daemon_memory_do(bool extended) { rrdset_done(st_memory); } + // ---------------------------------------------------------------------------------------------------------------- + + if(!extended) + return; + + // ---------------------------------------------------------------------------------------------------------------- + { static RRDSET *st_memory_buffers = NULL; static RRDDIM *rd_queries = NULL; @@ -212,8 +217,8 @@ void pulse_daemon_memory_do(bool extended) { rd_cbuffers_streaming = rrddim_add(st_memory_buffers, "streaming cbuf", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_buffers_replication = rrddim_add(st_memory_buffers, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_buffers_web = rrddim_add(st_memory_buffers, "web", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_buffers_aral = rrddim_add(st_memory_buffers, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_buffers_judy = rrddim_add(st_memory_buffers, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_aral = rrddim_add(st_memory_buffers, "aral-by-size free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_judy = rrddim_add(st_memory_buffers, "aral-judy free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } rrddim_set_by_pointer(st_memory_buffers, rd_queries, (collected_number)netdata_buffers_statistics.query_targets_size + (collected_number) onewayalloc_allocated_memory()); @@ -228,17 +233,12 @@ void pulse_daemon_memory_do(bool extended) { rrddim_set_by_pointer(st_memory_buffers, rd_cbuffers_streaming, (collected_number)netdata_buffers_statistics.cbuffers_streaming); rrddim_set_by_pointer(st_memory_buffers, rd_buffers_replication, (collected_number)replication_allocated_buffers()); rrddim_set_by_pointer(st_memory_buffers, rd_buffers_web, (collected_number)netdata_buffers_statistics.buffers_web); - rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)aral_by_size_overhead()); - rrddim_set_by_pointer(st_memory_buffers, rd_buffers_judy, (collected_number)judy_aral_overhead()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)aral_by_size_free_bytes()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_judy, (collected_number)judy_aral_free_bytes()); rrdset_done(st_memory_buffers); } // ---------------------------------------------------------------------------------------------------------------- - if(!extended) - return; - - // ---------------------------------------------------------------------------------------------------------------- - } diff --git a/src/daemon/pulse/pulse-dbengine.c b/src/daemon/pulse/pulse-dbengine.c index 39baa453d1c7ab..db06b61106ebc5 100644 --- a/src/daemon/pulse/pulse-dbengine.c +++ b/src/daemon/pulse/pulse-dbengine.c @@ -668,15 +668,26 @@ void pulse_dbengine_do(bool extended) { mrg_stats_old = mrg_stats; mrg_get_statistics(main_mrg, &mrg_stats); - struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes(); - size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors + - buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pgc + buffers.pgd + buffers.mrg; + struct rrdeng_buffer_sizes dbmem = rrdeng_pulse_memory_sizes(); + size_t buffers_total_size = dbmem.xt_buf + dbmem.wal; #ifdef PDC_USE_JULYL buffers_total_size += buffers.julyl; #endif - pulse_dbengine_total_memory = pgc_main_stats.size + pgc_open_stats.size + pgc_extent_stats.size + mrg_stats.size + buffers_total_size; + size_t aral_structures_total_size = 0, aral_used_total_size = 0; + size_t aral_padding_total_size = 0; + for(size_t i = 0; i < RRDENG_MEM_MAX ; i++) { + buffers_total_size += aral_free_bytes_from_stats(dbmem.as[i]); + aral_structures_total_size += aral_structures_bytes_from_stats(dbmem.as[i]); + aral_used_total_size += aral_used_bytes_from_stats(dbmem.as[i]); + aral_padding_total_size += aral_padding_bytes_from_stats(dbmem.as[i]); + } + + pulse_dbengine_total_memory = + pgc_main_stats.size + (ssize_t)pgc_open_stats.size + pgc_extent_stats.size + + mrg_stats.size + + buffers_total_size + aral_structures_total_size + aral_padding_total_size + pgd_padding_bytes(); size_t priority = 135000; @@ -687,6 +698,9 @@ void pulse_dbengine_do(bool extended) { static RRDDIM *rd_pgc_memory_extent = NULL; // extent compresses cache memory static RRDDIM *rd_pgc_memory_metrics = NULL; // metric registry memory static RRDDIM *rd_pgc_memory_buffers = NULL; + static RRDDIM *rd_pgc_memory_aral_padding = NULL; + static RRDDIM *rd_pgc_memory_pgd_padding = NULL; + static RRDDIM *rd_pgc_memory_aral_structures = NULL; if (unlikely(!st_pgc_memory)) { st_pgc_memory = rrdset_create_localhost( @@ -708,6 +722,9 @@ void pulse_dbengine_do(bool extended) { rd_pgc_memory_extent = rrddim_add(st_pgc_memory, "extent cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_memory_metrics = rrddim_add(st_pgc_memory, "metrics registry", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_memory_buffers = rrddim_add(st_pgc_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_aral_padding = rrddim_add(st_pgc_memory, "aral padding", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_pgd_padding = rrddim_add(st_pgc_memory, "pgd padding", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_aral_structures = rrddim_add(st_pgc_memory, "aral structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } priority++; @@ -717,6 +734,9 @@ void pulse_dbengine_do(bool extended) { rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_extent, (collected_number)pgc_extent_stats.size); rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_metrics, (collected_number)mrg_stats.size); rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_buffers, (collected_number)buffers_total_size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_aral_padding, (collected_number)aral_padding_total_size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_pgd_padding, (collected_number)pgd_padding_bytes()); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_aral_structures, (collected_number)aral_structures_total_size); rrdset_done(st_pgc_memory); } @@ -756,9 +776,9 @@ void pulse_dbengine_do(bool extended) { localhost->rrd_update_every, RRDSET_TYPE_STACKED); - rd_pgc_buffers_pgc = rrddim_add(st_pgc_buffers, "pgc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_pgc_buffers_pgd = rrddim_add(st_pgc_buffers, "pgd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_pgc_buffers_mrg = rrddim_add(st_pgc_buffers, "mrg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pgc = rrddim_add(st_pgc_buffers, "pgc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pgd = rrddim_add(st_pgc_buffers, "pgd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_mrg = rrddim_add(st_pgc_buffers, "mrg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_opcodes = rrddim_add(st_pgc_buffers, "opcodes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_handles = rrddim_add(st_pgc_buffers, "query handles", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_pgc_buffers_descriptors = rrddim_add(st_pgc_buffers, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); @@ -776,20 +796,20 @@ void pulse_dbengine_do(bool extended) { } priority++; - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgc, (collected_number)buffers.pgc); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgd, (collected_number)buffers.pgd); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_mrg, (collected_number)buffers.mrg); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_opcodes, (collected_number)buffers.opcodes); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_handles, (collected_number)buffers.handles); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_descriptors, (collected_number)buffers.descriptors); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_wal, (collected_number)buffers.wal); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl); - rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_deol, (collected_number)buffers.deol); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgc, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_PGC])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgd, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_PGD])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_mrg, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_MRG])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_opcodes, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_OPCODES])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_handles, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_HANDLES])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_descriptors, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_DESCRIPTORS])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_wal, (collected_number)dbmem.wal); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_WORKERS])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_PDC])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_PD])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_XT_IO])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)dbmem.xt_buf); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_EPDL])); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_deol, (collected_number)aral_free_bytes_from_stats(dbmem.as[RRDENG_MEM_DEOL])); #ifdef PDC_USE_JULYL rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_julyl, (collected_number)buffers.julyl); #endif diff --git a/src/daemon/pulse/pulse-gorilla.c b/src/daemon/pulse/pulse-gorilla.c index 3d771e0c20df8c..70669c40816a82 100644 --- a/src/daemon/pulse/pulse-gorilla.c +++ b/src/daemon/pulse/pulse-gorilla.c @@ -6,11 +6,15 @@ static struct gorilla_statistics { bool enabled; - alignas(64) uint64_t tier0_hot_gorilla_buffers; - - alignas(64) uint64_t gorilla_tier0_disk_actual_bytes; - alignas(64) uint64_t gorilla_tier0_disk_optimal_bytes; - alignas(64) uint64_t gorilla_tier0_disk_original_bytes; + CACHE_LINE_PADDING(); + uint64_t tier0_hot_gorilla_buffers; + + CACHE_LINE_PADDING(); + uint64_t gorilla_tier0_disk_actual_bytes; + CACHE_LINE_PADDING(); + uint64_t gorilla_tier0_disk_optimal_bytes; + CACHE_LINE_PADDING(); + uint64_t gorilla_tier0_disk_original_bytes; } gorilla_statistics = { 0 }; void pulse_gorilla_hot_buffer_added() { diff --git a/src/daemon/pulse/pulse-http-api.c b/src/daemon/pulse/pulse-http-api.c index a4f035cb293e8d..2cab1bc1e706da 100644 --- a/src/daemon/pulse/pulse-http-api.c +++ b/src/daemon/pulse/pulse-http-api.c @@ -8,16 +8,25 @@ static struct web_statistics { bool extended; + CACHE_LINE_PADDING(); uint16_t connected_clients; + CACHE_LINE_PADDING(); uint64_t web_client_count; // oops! this is used for giving unique IDs to web_clients! + CACHE_LINE_PADDING(); uint64_t web_requests; + CACHE_LINE_PADDING(); uint64_t web_usec; + CACHE_LINE_PADDING(); uint64_t web_usec_max; + CACHE_LINE_PADDING(); uint64_t bytes_received; + CACHE_LINE_PADDING(); uint64_t bytes_sent; + CACHE_LINE_PADDING(); uint64_t content_size_uncompressed; + CACHE_LINE_PADDING(); uint64_t content_size_compressed; } web_statistics; diff --git a/src/daemon/pulse/pulse-ml.c b/src/daemon/pulse/pulse-ml.c index 7995dcfce47b05..a48c491cbc4847 100644 --- a/src/daemon/pulse/pulse-ml.c +++ b/src/daemon/pulse/pulse-ml.c @@ -4,14 +4,22 @@ #include "pulse-ml.h" static struct ml_statistics { - alignas(64) uint64_t ml_models_consulted; - alignas(64) uint64_t ml_models_received; - alignas(64) uint64_t ml_models_ignored; - alignas(64) uint64_t ml_models_sent; - alignas(64) uint64_t ml_models_deserialization_failures; - alignas(64) uint64_t ml_memory_consumption; - alignas(64) uint64_t ml_memory_new; - alignas(64) uint64_t ml_memory_delete; + CACHE_LINE_PADDING(); + uint64_t ml_models_consulted; + CACHE_LINE_PADDING(); + uint64_t ml_models_received; + CACHE_LINE_PADDING(); + uint64_t ml_models_ignored; + CACHE_LINE_PADDING(); + uint64_t ml_models_sent; + CACHE_LINE_PADDING(); + uint64_t ml_models_deserialization_failures; + CACHE_LINE_PADDING(); + uint64_t ml_memory_consumption; + CACHE_LINE_PADDING(); + uint64_t ml_memory_new; + CACHE_LINE_PADDING(); + uint64_t ml_memory_delete; } ml_statistics = {0}; void pulse_ml_models_received() diff --git a/src/daemon/pulse/pulse-queries.c b/src/daemon/pulse/pulse-queries.c index 6d55405032805c..419c31df270d9d 100644 --- a/src/daemon/pulse/pulse-queries.c +++ b/src/daemon/pulse/pulse-queries.c @@ -5,30 +5,49 @@ #include "streaming/replication.h" static struct query_statistics { + CACHE_LINE_PADDING(); uint64_t api_data_queries_made; + CACHE_LINE_PADDING(); uint64_t api_data_db_points_read; + CACHE_LINE_PADDING(); uint64_t api_data_result_points_generated; + CACHE_LINE_PADDING(); uint64_t api_weights_queries_made; + CACHE_LINE_PADDING(); uint64_t api_weights_db_points_read; + CACHE_LINE_PADDING(); uint64_t api_weights_result_points_generated; + CACHE_LINE_PADDING(); uint64_t api_badges_queries_made; + CACHE_LINE_PADDING(); uint64_t api_badges_db_points_read; + CACHE_LINE_PADDING(); uint64_t api_badges_result_points_generated; + CACHE_LINE_PADDING(); uint64_t health_queries_made; + CACHE_LINE_PADDING(); uint64_t health_db_points_read; + CACHE_LINE_PADDING(); uint64_t health_result_points_generated; + CACHE_LINE_PADDING(); uint64_t ml_queries_made; + CACHE_LINE_PADDING(); uint64_t ml_db_points_read; + CACHE_LINE_PADDING(); uint64_t ml_result_points_generated; + CACHE_LINE_PADDING(); uint64_t backfill_queries_made; + CACHE_LINE_PADDING(); uint64_t backfill_db_points_read; + CACHE_LINE_PADDING(); uint64_t exporters_queries_made; + CACHE_LINE_PADDING(); uint64_t exporters_db_points_read; } query_statistics; @@ -182,12 +201,12 @@ void pulse_queries_do(bool extended __maybe_unused) { if (unlikely(!st_points_read)) { st_points_read = rrdset_create_localhost( "netdata" - , "db_points_read" + , "db_samples_read" , NULL , "Time-Series Queries" , NULL , "Netdata Time-Series DB Samples Read" - , "points/s" + , "samples/s" , "netdata" , "pulse" , 131001 @@ -233,7 +252,7 @@ void pulse_queries_do(bool extended __maybe_unused) { , NULL , "Time-Series Queries" , NULL - , "Netdata Time-Series Samples Generated" + , "Netdata Time-Series Points Generated" , "points/s" , "netdata" , "pulse" diff --git a/src/daemon/pulse/pulse-sqlite3.c b/src/daemon/pulse/pulse-sqlite3.c index b2ab6445acb9fa..3ba57d8258d137 100644 --- a/src/daemon/pulse/pulse-sqlite3.c +++ b/src/daemon/pulse/pulse-sqlite3.c @@ -6,20 +6,34 @@ static struct sqlite3_statistics { bool enabled; - alignas(64) uint64_t sqlite3_queries_made; - alignas(64) uint64_t sqlite3_queries_ok; - alignas(64) uint64_t sqlite3_queries_failed; - alignas(64) uint64_t sqlite3_queries_failed_busy; - alignas(64) uint64_t sqlite3_queries_failed_locked; - alignas(64) uint64_t sqlite3_rows; - alignas(64) uint64_t sqlite3_metadata_cache_hit; - alignas(64) uint64_t sqlite3_context_cache_hit; - alignas(64) uint64_t sqlite3_metadata_cache_miss; - alignas(64) uint64_t sqlite3_context_cache_miss; - alignas(64) uint64_t sqlite3_metadata_cache_spill; - alignas(64) uint64_t sqlite3_context_cache_spill; - alignas(64) uint64_t sqlite3_metadata_cache_write; - alignas(64) uint64_t sqlite3_context_cache_write; + CACHE_LINE_PADDING(); + uint64_t sqlite3_queries_made; + CACHE_LINE_PADDING(); + uint64_t sqlite3_queries_ok; + CACHE_LINE_PADDING(); + uint64_t sqlite3_queries_failed; + CACHE_LINE_PADDING(); + uint64_t sqlite3_queries_failed_busy; + CACHE_LINE_PADDING(); + uint64_t sqlite3_queries_failed_locked; + CACHE_LINE_PADDING(); + uint64_t sqlite3_rows; + CACHE_LINE_PADDING(); + uint64_t sqlite3_metadata_cache_hit; + CACHE_LINE_PADDING(); + uint64_t sqlite3_context_cache_hit; + CACHE_LINE_PADDING(); + uint64_t sqlite3_metadata_cache_miss; + CACHE_LINE_PADDING(); + uint64_t sqlite3_context_cache_miss; + CACHE_LINE_PADDING(); + uint64_t sqlite3_metadata_cache_spill; + CACHE_LINE_PADDING(); + uint64_t sqlite3_context_cache_spill; + CACHE_LINE_PADDING(); + uint64_t sqlite3_metadata_cache_write; + CACHE_LINE_PADDING(); + uint64_t sqlite3_context_cache_write; } sqlite3_statistics = { }; void pulse_sqlite3_query_completed(bool success, bool busy, bool locked) { diff --git a/src/daemon/watcher.c b/src/daemon/watcher.c index 6584073e3ff03c..4f53990bee8302 100644 --- a/src/daemon/watcher.c +++ b/src/daemon/watcher.c @@ -82,10 +82,10 @@ void *watcher_main(void *arg) watcher_wait_for_step(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); watcher_wait_for_step(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); - watcher_wait_for_step(WATCHER_STEP_ID_STOP_METASYNC_THREADS); watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); watcher_wait_for_step(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_METASYNC_THREADS); watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_PID_FILE); watcher_wait_for_step(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); @@ -140,14 +140,14 @@ void watcher_thread_start() { "flush dbengine tiers"; watcher_steps[WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS].msg = "stop collection for all hosts"; - watcher_steps[WATCHER_STEP_ID_STOP_METASYNC_THREADS].msg = - "stop metasync threads"; watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH].msg = "wait for dbengine collectors to finish"; watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING].msg = "wait for dbengine main cache to finish flushing"; watcher_steps[WATCHER_STEP_ID_STOP_DBENGINE_TIERS].msg = "stop dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_STOP_METASYNC_THREADS].msg = + "stop metasync threads"; watcher_steps[WATCHER_STEP_ID_CLOSE_SQL_DATABASES].msg = "close SQL databases"; watcher_steps[WATCHER_STEP_ID_REMOVE_PID_FILE].msg = diff --git a/src/daemon/watcher.h b/src/daemon/watcher.h index 9809e45fb87cdc..6c15ca7bfe4917 100644 --- a/src/daemon/watcher.h +++ b/src/daemon/watcher.h @@ -24,10 +24,10 @@ typedef enum { WATCHER_STEP_ID_CANCEL_MAIN_THREADS, WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS, WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS, - WATCHER_STEP_ID_STOP_METASYNC_THREADS, WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH, WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING, WATCHER_STEP_ID_STOP_DBENGINE_TIERS, + WATCHER_STEP_ID_STOP_METASYNC_THREADS, WATCHER_STEP_ID_CLOSE_SQL_DATABASES, WATCHER_STEP_ID_REMOVE_PID_FILE, WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES, diff --git a/src/database/engine/cache.c b/src/database/engine/cache.c index 013e0c1f8d1bb4..cf381bd33288e1 100644 --- a/src/database/engine/cache.c +++ b/src/database/engine/cache.c @@ -71,7 +71,7 @@ struct pgc_page { }; struct pgc_queue { - alignas(64) SPINLOCK spinlock; + SPINLOCK spinlock; union { PGC_PAGE *base; Pvoid_t sections_judy; @@ -113,13 +113,12 @@ struct pgc { } config; struct { - SPINLOCK spinlock; // when locked, the evict_thread is currently evicting pages ND_THREAD *thread; // the thread struct completion completion; // signal the thread to wake up } evictor; struct pgc_index { - alignas(64) RW_SPINLOCK rw_spinlock; + RW_SPINLOCK rw_spinlock; Pvoid_t sections_judy; #ifdef PGC_WITH_ARAL ARAL *aral; @@ -127,7 +126,7 @@ struct pgc { } *index; struct { - alignas(64) SPINLOCK spinlock; + SPINLOCK spinlock; size_t per1000; } usage; @@ -137,7 +136,7 @@ struct pgc { struct pgc_statistics stats; // statistics #ifdef NETDATA_PGC_POINTER_CHECK - alignas(64) netdata_mutex_t global_pointer_registry_mutex; + netdata_mutex_t global_pointer_registry_mutex; Pvoid_t global_pointer_registry; #endif }; @@ -343,6 +342,20 @@ static inline void pgc_size_histogram_del(PGC *cache, struct pgc_size_histogram // ---------------------------------------------------------------------------- // evictions control +static inline uint64_t pgc_threshold(size_t threshold, uint64_t wanted, uint64_t current, uint64_t clean) { + if(current < clean) + current = clean; + + if(wanted < current - clean) + wanted = current - clean; + + uint64_t ret = wanted * threshold / 1000ULL; + if(ret < current - clean) + ret = current - clean; + + return ret; +} + static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { if(size_to_evict) @@ -351,33 +364,33 @@ static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { else if(!spinlock_trylock(&cache->usage.spinlock)) return __atomic_load_n(&cache->usage.per1000, __ATOMIC_RELAXED); - size_t wanted_cache_size; + uint64_t wanted_cache_size; - const size_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED); - const size_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED); - const size_t clean = __atomic_load_n(&cache->clean.stats->size, __ATOMIC_RELAXED); - const size_t evicting = __atomic_load_n(&cache->stats.evicting_size, __ATOMIC_RELAXED); - const size_t flushing = __atomic_load_n(&cache->stats.flushing_size, __ATOMIC_RELAXED); - const size_t current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); - const size_t all_pages_size = hot + dirty + clean + evicting + flushing; - const size_t index = current_cache_size > all_pages_size ? current_cache_size - all_pages_size : 0; - const size_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED); + const uint64_t dirty = __atomic_load_n(&cache->dirty.stats->size, __ATOMIC_RELAXED); + const uint64_t hot = __atomic_load_n(&cache->hot.stats->size, __ATOMIC_RELAXED); + const uint64_t clean = __atomic_load_n(&cache->clean.stats->size, __ATOMIC_RELAXED); + const uint64_t evicting = __atomic_load_n(&cache->stats.evicting_size, __ATOMIC_RELAXED); + const uint64_t flushing = __atomic_load_n(&cache->stats.flushing_size, __ATOMIC_RELAXED); + const uint64_t current_cache_size = __atomic_load_n(&cache->stats.size, __ATOMIC_RELAXED); + const uint64_t all_pages_size = hot + dirty + clean + evicting + flushing; + const uint64_t index = current_cache_size > all_pages_size ? current_cache_size - all_pages_size : 0; + const uint64_t referenced_size = __atomic_load_n(&cache->stats.referenced_size, __ATOMIC_RELAXED); if(cache->config.options & PGC_OPTIONS_AUTOSCALE) { - const size_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED); - const size_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED); + const uint64_t dirty_max = __atomic_load_n(&cache->dirty.stats->max_size, __ATOMIC_RELAXED); + const uint64_t hot_max = __atomic_load_n(&cache->hot.stats->max_size, __ATOMIC_RELAXED); // our promise to users - const size_t max_size1 = MAX(hot_max, hot) * 2; + const uint64_t max_size1 = MAX(hot_max, hot) * 2; // protection against slow flushing - const size_t max_size2 = hot_max + ((dirty_max * 2 < hot_max * 2 / 3) ? hot_max * 2 / 3 : dirty_max * 2) + index; + const uint64_t max_size2 = hot_max + ((dirty_max * 2 < hot_max * 2 / 3) ? hot_max * 2 / 3 : dirty_max * 2) + index; // the final wanted cache size wanted_cache_size = MIN(max_size1, max_size2); if(cache->config.dynamic_target_size_cb) { - const size_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb(); + const uint64_t wanted_cache_size_cb = cache->config.dynamic_target_size_cb(); if(wanted_cache_size_cb > wanted_cache_size) wanted_cache_size = wanted_cache_size_cb; } @@ -395,21 +408,19 @@ static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { wanted_cache_size = referenced_size + dirty; // if we don't have enough clean pages, there is no reason to be aggressive or critical - if(current_cache_size > wanted_cache_size && wanted_cache_size < current_cache_size - clean) + if(wanted_cache_size < current_cache_size - clean) wanted_cache_size = current_cache_size - clean; - bool signal_the_evictor = false; if(cache->config.out_of_memory_protection_bytes) { // out of memory protection OS_SYSTEM_MEMORY sm = os_system_memory(false); if(sm.ram_total_bytes) { // when the total exists, ram_available_bytes is also right - const size_t min_available = cache->config.out_of_memory_protection_bytes; + const uint64_t min_available = cache->config.out_of_memory_protection_bytes; if (sm.ram_available_bytes < min_available) { // we must shrink wanted_cache_size = current_cache_size - (min_available - sm.ram_available_bytes); - signal_the_evictor = true; } else if(cache->config.use_all_ram) { // we can grow @@ -418,38 +429,40 @@ static inline size_t cache_usage_per1000(PGC *cache, size_t *size_to_evict) { } } - const size_t per1000 = (size_t)((unsigned long long)current_cache_size * 1000ULL / (unsigned long long)wanted_cache_size); - + const size_t per1000 = (size_t)(current_cache_size * 1000ULL / wanted_cache_size); __atomic_store_n(&cache->usage.per1000, per1000, __ATOMIC_RELAXED); __atomic_store_n(&cache->stats.wanted_cache_size, wanted_cache_size, __ATOMIC_RELAXED); __atomic_store_n(&cache->stats.current_cache_size, current_cache_size, __ATOMIC_RELAXED); - spinlock_unlock(&cache->usage.spinlock); - - if(size_to_evict) { - size_t target = (size_t)((uint64_t)wanted_cache_size * (uint64_t)cache->config.evict_low_threshold_per1000 / 1000ULL); - - if(target < wanted_cache_size - clean) - target = wanted_cache_size - clean; + uint64_t healthy_target = pgc_threshold(cache->config.healthy_size_per1000, wanted_cache_size, current_cache_size, clean); + if(current_cache_size > healthy_target) { + uint64_t low_watermark_target = pgc_threshold(cache->config.evict_low_threshold_per1000, wanted_cache_size, current_cache_size, clean); - if(current_cache_size > target) - *size_to_evict = current_cache_size - target; - else - *size_to_evict = 0; - } + uint64_t size_to_evict_now = current_cache_size - low_watermark_target; + if(size_to_evict_now > clean) + size_to_evict_now = clean; - if(per1000 >= cache->config.severe_pressure_per1000) - __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED); + if(size_to_evict) + *size_to_evict = (size_t)size_to_evict_now; - else if(per1000 >= cache->config.aggressive_evict_per1000) - __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED); + bool signal = false; + if(per1000 >= cache->config.severe_pressure_per1000) { + __atomic_add_fetch(&cache->stats.events_cache_under_severe_pressure, 1, __ATOMIC_RELAXED); + signal = true; + } + else if(per1000 >= cache->config.aggressive_evict_per1000) { + __atomic_add_fetch(&cache->stats.events_cache_needs_space_aggressively, 1, __ATOMIC_RELAXED); + signal = true; + } - if (signal_the_evictor && spinlock_trylock(&cache->evictor.spinlock)) { - completion_mark_complete_a_job(&cache->evictor.completion); - spinlock_unlock(&cache->evictor.spinlock); - __atomic_add_fetch(&cache->stats.waste_evict_thread_signals, 1, __ATOMIC_RELAXED); + if(signal) { + completion_mark_complete_a_job(&cache->evictor.completion); + __atomic_add_fetch(&cache->stats.waste_evict_thread_signals, 1, __ATOMIC_RELAXED); + } } + spinlock_unlock(&cache->usage.spinlock); + return per1000; } @@ -558,7 +571,7 @@ struct section_pages { PGC_PAGE *base; }; -static struct aral_statistics aral_statistics_for_pgc = { 0 }; +static struct aral_statistics pgc_aral_statistics = { 0 }; static ARAL *pgc_sections_aral = NULL; @@ -1169,6 +1182,7 @@ static bool evict_pages_with_filter(PGC *cache, size_t max_skip, size_t max_evic else if(unlikely(wait)) { // evict as many as necessary for the cache to go at the predefined threshold per1000 = cache_usage_per1000(cache, &max_size_to_evict); + max_size_to_evict /= 2; // do it in 2 steps if(per1000 >= cache->config.severe_pressure_per1000) { under_sever_pressure = true; max_pages_to_evict = max_pages_to_evict ? max_pages_to_evict * 2 : 4096; @@ -1934,30 +1948,35 @@ static void *pgc_evict_thread(void *ptr) { worker_register_job_name(0, "signaled"); worker_register_job_name(1, "scheduled"); - unsigned job_id = 0; + unsigned job_id = 0, severe_pressure_counter = 0; while (true) { worker_is_idle(); unsigned new_job_id = completion_wait_for_a_job_with_timeout( - &cache->evictor.completion, job_id, 100); + &cache->evictor.completion, job_id, 1000); - bool was_signaled = new_job_id > job_id; - worker_is_busy(was_signaled ? 1 : 0); + worker_is_busy(new_job_id > job_id ? 1 : 0); job_id = new_job_id; if (nd_thread_signaled_to_cancel()) return NULL; + evict_pages(cache, 0, 0, true, false); + size_t size_to_evict = 0; - size_t per1000 = cache_usage_per1000(cache, &size_to_evict); - bool was_critical = per1000 >= cache->config.severe_pressure_per1000; + if(cache_usage_per1000(cache, &size_to_evict) > cache->config.severe_pressure_per1000) { + severe_pressure_counter++; - if(size_to_evict > 0) { - evict_pages(cache, 0, 0, true, false); + if(severe_pressure_counter > 100) { + // so, we tried 100 times to reduce memory, + // but it is still severe! - if (was_signaled || was_critical) mallocz_release_as_much_memory_to_the_system(); + severe_pressure_counter = 0; + } } + else + severe_pressure_counter = 0; } worker_unregister(); @@ -2040,7 +2059,7 @@ PGC *pgc_create(const char *name, sizeof(PGC_PAGE) + cache->config.additional_bytes_per_page, 0, 0, - &aral_statistics_for_pgc, + &pgc_aral_statistics, NULL, NULL, false, @@ -2075,7 +2094,6 @@ PGC *pgc_create(const char *name, // last create the eviction thread { - spinlock_init(&cache->evictor.spinlock); completion_init(&cache->evictor.completion); cache->evictor.thread = nd_thread_create(name, NETDATA_THREAD_OPTION_JOINABLE, pgc_evict_thread, cache); } @@ -2083,12 +2101,8 @@ PGC *pgc_create(const char *name, return cache; } -size_t pgc_aral_structures(void) { - return aral_structures_from_stats(&aral_statistics_for_pgc); -} - -size_t pgc_aral_overhead(void) { - return aral_overhead_from_stats(&aral_statistics_for_pgc); +struct aral_statistics *pgc_aral_stats(void) { + return &pgc_aral_statistics; } void pgc_flush_all_hot_and_dirty_pages(PGC *cache, Word_t section) { @@ -2455,6 +2469,7 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ if(!page_acquire(cache, page)) { internal_fatal(true, "Migration to journal v2: cannot acquire page for migration to v2"); + page_transition_unlock(cache, page); continue; } @@ -2561,8 +2576,17 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ Word_t start_time = 0; while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) { struct jv2_page_info *pi = *PValue2; + + // balance-parents: transition from hot to clean directly + page_set_clean(cache, pi->page, true, false); page_transition_unlock(cache, pi->page); - pgc_page_hot_to_dirty_and_release(cache, pi->page, true); + page_release(cache, pi->page, true); + + // before balance-parents: + // page_transition_unlock(cache, pi->page); + // pgc_page_hot_to_dirty_and_release(cache, pi->page, true); + + // old test - don't enable: // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page); aral_freez(ar_pi, pi); } @@ -2590,7 +2614,8 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED); - flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL, false, false); + // balance-parents: do not flush, there is nothing dirty + // flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL, false, false); } static bool match_page_data(PGC_PAGE *page, void *data) { diff --git a/src/database/engine/cache.h b/src/database/engine/cache.h index 3b541d4459092b..ce9779257c1d43 100644 --- a/src/database/engine/cache.h +++ b/src/database/engine/cache.h @@ -48,116 +48,180 @@ struct pgc_size_histogram { }; struct pgc_queue_statistics { + CACHE_LINE_PADDING(); struct pgc_size_histogram size_histogram; - alignas(64) size_t entries; - alignas(64) size_t size; + CACHE_LINE_PADDING(); + size_t entries; + CACHE_LINE_PADDING(); + size_t size; - alignas(64) size_t max_entries; - alignas(64) size_t max_size; + CACHE_LINE_PADDING(); + size_t max_entries; + CACHE_LINE_PADDING(); + size_t max_size; - alignas(64) size_t added_entries; - alignas(64) size_t added_size; + CACHE_LINE_PADDING(); + size_t added_entries; + CACHE_LINE_PADDING(); + size_t added_size; - alignas(64) size_t removed_entries; - alignas(64) size_t removed_size; + CACHE_LINE_PADDING(); + size_t removed_entries; + CACHE_LINE_PADDING(); + size_t removed_size; + + CACHE_LINE_PADDING(); }; struct pgc_statistics { - alignas(64) size_t wanted_cache_size; - alignas(64) size_t current_cache_size; + CACHE_LINE_PADDING(); + size_t wanted_cache_size; + CACHE_LINE_PADDING(); + size_t current_cache_size; + CACHE_LINE_PADDING(); // ---------------------------------------------------------------------------------------------------------------- // volume - alignas(64) size_t entries; // all the entries (includes clean, dirty, hot) - alignas(64) size_t size; // all the entries (includes clean, dirty, hot) + CACHE_LINE_PADDING(); + size_t entries; // all the entries (includes clean, dirty, hot) + CACHE_LINE_PADDING(); + size_t size; // all the entries (includes clean, dirty, hot) - alignas(64) size_t referenced_entries; // all the entries currently referenced - alignas(64) size_t referenced_size; // all the entries currently referenced + CACHE_LINE_PADDING(); + size_t referenced_entries; // all the entries currently referenced + CACHE_LINE_PADDING(); + size_t referenced_size; // all the entries currently referenced - alignas(64) size_t added_entries; - alignas(64) size_t added_size; + CACHE_LINE_PADDING(); + size_t added_entries; + CACHE_LINE_PADDING(); + size_t added_size; - alignas(64) size_t removed_entries; - alignas(64) size_t removed_size; + CACHE_LINE_PADDING(); + size_t removed_entries; + CACHE_LINE_PADDING(); + size_t removed_size; #ifdef PGC_COUNT_POINTS_COLLECTED - alignas(64) size_t points_collected; + CACHE_LINE_PADDING(); + size_t points_collected; #endif // ---------------------------------------------------------------------------------------------------------------- // migrations - alignas(64) size_t evicting_entries; - alignas(64) size_t evicting_size; + CACHE_LINE_PADDING(); + size_t evicting_entries; + CACHE_LINE_PADDING(); + size_t evicting_size; - alignas(64) size_t flushing_entries; - alignas(64) size_t flushing_size; + CACHE_LINE_PADDING(); + size_t flushing_entries; + CACHE_LINE_PADDING(); + size_t flushing_size; - alignas(64) size_t hot2dirty_entries; - alignas(64) size_t hot2dirty_size; + CACHE_LINE_PADDING(); + size_t hot2dirty_entries; + CACHE_LINE_PADDING(); + size_t hot2dirty_size; - alignas(64) size_t hot_empty_pages_evicted_immediately; - alignas(64) size_t hot_empty_pages_evicted_later; + CACHE_LINE_PADDING(); + size_t hot_empty_pages_evicted_immediately; + CACHE_LINE_PADDING(); + size_t hot_empty_pages_evicted_later; // ---------------------------------------------------------------------------------------------------------------- // workload - alignas(64) size_t acquires; - alignas(64) size_t releases; - - alignas(64) size_t acquires_for_deletion; - - alignas(64) size_t searches_exact; - alignas(64) size_t searches_exact_hits; - alignas(64) size_t searches_exact_misses; - - alignas(64) size_t searches_closest; - alignas(64) size_t searches_closest_hits; - alignas(64) size_t searches_closest_misses; - - alignas(64) size_t flushes_completed; - alignas(64) size_t flushes_completed_size; - alignas(64) size_t flushes_cancelled_size; + CACHE_LINE_PADDING(); + size_t acquires; + CACHE_LINE_PADDING(); + size_t releases; + + CACHE_LINE_PADDING(); + size_t acquires_for_deletion; + + CACHE_LINE_PADDING(); + size_t searches_exact; + CACHE_LINE_PADDING(); + size_t searches_exact_hits; + CACHE_LINE_PADDING(); + size_t searches_exact_misses; + + CACHE_LINE_PADDING(); + size_t searches_closest; + CACHE_LINE_PADDING(); + size_t searches_closest_hits; + CACHE_LINE_PADDING(); + size_t searches_closest_misses; + + CACHE_LINE_PADDING(); + size_t flushes_completed; + CACHE_LINE_PADDING(); + size_t flushes_completed_size; + CACHE_LINE_PADDING(); + size_t flushes_cancelled_size; // ---------------------------------------------------------------------------------------------------------------- // critical events - alignas(64) size_t events_cache_under_severe_pressure; - alignas(64) size_t events_cache_needs_space_aggressively; - alignas(64) size_t events_flush_critical; + CACHE_LINE_PADDING(); + size_t events_cache_under_severe_pressure; + CACHE_LINE_PADDING(); + size_t events_cache_needs_space_aggressively; + CACHE_LINE_PADDING(); + size_t events_flush_critical; // ---------------------------------------------------------------------------------------------------------------- // worker threads - alignas(64) size_t workers_search; - alignas(64) size_t workers_add; - alignas(64) size_t workers_evict; - alignas(64) size_t workers_flush; - alignas(64) size_t workers_jv2_flush; - alignas(64) size_t workers_hot2dirty; + CACHE_LINE_PADDING(); + size_t workers_search; + CACHE_LINE_PADDING(); + size_t workers_add; + CACHE_LINE_PADDING(); + size_t workers_evict; + CACHE_LINE_PADDING(); + size_t workers_flush; + CACHE_LINE_PADDING(); + size_t workers_jv2_flush; + CACHE_LINE_PADDING(); + size_t workers_hot2dirty; // ---------------------------------------------------------------------------------------------------------------- // waste events // waste events - spins - alignas(64) size_t waste_insert_spins; - alignas(64) size_t waste_evict_useless_spins; - alignas(64) size_t waste_release_spins; - alignas(64) size_t waste_acquire_spins; - alignas(64) size_t waste_delete_spins; + CACHE_LINE_PADDING(); + size_t waste_insert_spins; + CACHE_LINE_PADDING(); + size_t waste_evict_useless_spins; + CACHE_LINE_PADDING(); + size_t waste_release_spins; + CACHE_LINE_PADDING(); + size_t waste_acquire_spins; + CACHE_LINE_PADDING(); + size_t waste_delete_spins; // waste events - eviction - alignas(64) size_t waste_evict_relocated; - alignas(64) size_t waste_evict_thread_signals; - alignas(64) size_t waste_evictions_inline_on_add; - alignas(64) size_t waste_evictions_inline_on_release; + CACHE_LINE_PADDING(); + size_t waste_evict_relocated; + CACHE_LINE_PADDING(); + size_t waste_evict_thread_signals; + CACHE_LINE_PADDING(); + size_t waste_evictions_inline_on_add; + CACHE_LINE_PADDING(); + size_t waste_evictions_inline_on_release; // waste events - flushing - alignas(64) size_t waste_flush_on_add; - alignas(64) size_t waste_flush_on_release; - alignas(64) size_t waste_flushes_cancelled; + CACHE_LINE_PADDING(); + size_t waste_flush_on_add; + CACHE_LINE_PADDING(); + size_t waste_flush_on_release; + CACHE_LINE_PADDING(); + size_t waste_flushes_cancelled; // ---------------------------------------------------------------------------------------------------------------- // per queue statistics @@ -248,8 +312,7 @@ bool pgc_flush_pages(PGC *cache); struct pgc_statistics pgc_get_statistics(PGC *cache); size_t pgc_hot_and_dirty_entries(PGC *cache); -size_t pgc_aral_structures(void); -size_t pgc_aral_overhead(void); +struct aral_statistics *pgc_aral_stats(void); static inline size_t indexing_partition(Word_t ptr, Word_t modulo) __attribute__((const)); static inline size_t indexing_partition(Word_t ptr, Word_t modulo) { diff --git a/src/database/engine/metric.c b/src/database/engine/metric.c index 6caeef76cfafd4..bf7ac657e85a7a 100644 --- a/src/database/engine/metric.c +++ b/src/database/engine/metric.c @@ -90,19 +90,16 @@ static inline void MRG_STATS_DELETE_MISS(MRG *mrg, size_t partition) { #define mrg_index_write_lock(mrg, partition) rw_spinlock_write_lock(&(mrg)->index[partition].rw_spinlock) #define mrg_index_write_unlock(mrg, partition) rw_spinlock_write_unlock(&(mrg)->index[partition].rw_spinlock) -static inline void mrg_stats_size_judyl_change(MRG *mrg, size_t mem_before_judyl, size_t mem_after_judyl, size_t partition) { - if(mem_after_judyl > mem_before_judyl) - __atomic_add_fetch(&mrg->index[partition].stats.size, mem_after_judyl - mem_before_judyl, __ATOMIC_RELAXED); - else if(mem_after_judyl < mem_before_judyl) - __atomic_sub_fetch(&mrg->index[partition].stats.size, mem_before_judyl - mem_after_judyl, __ATOMIC_RELAXED); +static inline void mrg_stats_size_judyl_change(MRG *mrg, int64_t judy_mem, size_t partition) { + __atomic_add_fetch(&mrg->index[partition].stats.size, judy_mem, __ATOMIC_RELAXED); } -static inline void mrg_stats_size_judyhs_added_uuid(MRG *mrg, size_t partition) { - __atomic_add_fetch(&mrg->index[partition].stats.size, JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(nd_uuid_t)), __ATOMIC_RELAXED); +static inline void mrg_stats_size_judyhs_added_uuid(MRG *mrg, size_t partition, int64_t judy_mem) { + __atomic_add_fetch(&mrg->index[partition].stats.size, judy_mem, __ATOMIC_RELAXED); } -static inline void mrg_stats_size_judyhs_removed_uuid(MRG *mrg, size_t partition) { - __atomic_sub_fetch(&mrg->index[partition].stats.size, JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(nd_uuid_t)), __ATOMIC_RELAXED); +static inline void mrg_stats_size_judyhs_removed_uuid(MRG *mrg, size_t partition, int64_t judy_mem) { + __atomic_sub_fetch(&mrg->index[partition].stats.size, judy_mem, __ATOMIC_RELAXED); } static inline size_t uuid_partition(MRG *mrg __maybe_unused, nd_uuid_t *uuid) { @@ -163,7 +160,7 @@ static inline bool acquired_metric_has_retention(MRG *mrg, METRIC *metric) { static inline void acquired_for_deletion_metric_delete(MRG *mrg, METRIC *metric) { size_t partition = metric->partition; - size_t mem_before_judyl, mem_after_judyl; + int64_t judy_mem; mrg_index_write_lock(mrg, partition); @@ -174,10 +171,10 @@ static inline void acquired_for_deletion_metric_delete(MRG *mrg, METRIC *metric) return; } - mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); + judy_mem = -(int64_t)JudyLMemUsed(*sections_judy_pptr); int rc = JudyLDel(sections_judy_pptr, metric->section, PJE0); - mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); - mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); + judy_mem += (int64_t)JudyLMemUsed(*sections_judy_pptr); + mrg_stats_size_judyl_change(mrg, judy_mem, partition); if(unlikely(!rc)) { MRG_STATS_DELETE_MISS(mrg, partition); @@ -186,10 +183,15 @@ static inline void acquired_for_deletion_metric_delete(MRG *mrg, METRIC *metric) } if(!*sections_judy_pptr) { + JudyAllocThreadPulseReset(); + rc = JudyHSDel(&mrg->index[partition].uuid_judy, &metric->uuid, sizeof(nd_uuid_t), PJE0); + + int64_t judy_mem = JudyAllocThreadPulseGetAndReset(); + if(unlikely(!rc)) fatal("DBENGINE METRIC: cannot delete UUID from JudyHS"); - mrg_stats_size_judyhs_removed_uuid(mrg, partition); + mrg_stats_size_judyhs_removed_uuid(mrg, partition, judy_mem); } MRG_STATS_DELETED_METRIC(mrg, partition); @@ -262,19 +264,22 @@ static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *r while(1) { mrg_index_write_lock(mrg, partition); - size_t mem_before_judyl, mem_after_judyl; + JudyAllocThreadPulseReset(); Pvoid_t *sections_judy_pptr = JudyHSIns(&mrg->index[partition].uuid_judy, entry->uuid, sizeof(nd_uuid_t), PJE0); + + int64_t judy_mem = JudyAllocThreadPulseGetAndReset(); + if (unlikely(!sections_judy_pptr || sections_judy_pptr == PJERR)) fatal("DBENGINE METRIC: corrupted UUIDs JudyHS array"); if (unlikely(!*sections_judy_pptr)) - mrg_stats_size_judyhs_added_uuid(mrg, partition); + mrg_stats_size_judyhs_added_uuid(mrg, partition, judy_mem); - mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); + judy_mem = -(int64_t)JudyLMemUsed(*sections_judy_pptr); PValue = JudyLIns(sections_judy_pptr, entry->section, PJE0); - mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); - mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); + judy_mem += (int64_t)JudyLMemUsed(*sections_judy_pptr); + mrg_stats_size_judyl_change(mrg, judy_mem, partition); if (unlikely(!PValue || PValue == PJERR)) fatal("DBENGINE METRIC: corrupted section JudyL array"); @@ -380,12 +385,8 @@ inline MRG *mrg_create(ssize_t partitions) { return mrg; } -inline size_t mrg_aral_structures(void) { - return aral_structures_from_stats(&mrg_aral_statistics); -} - -inline size_t mrg_aral_overhead(void) { - return aral_overhead_from_stats(&mrg_aral_statistics); +struct aral_statistics *mrg_aral_stats(void) { + return &mrg_aral_statistics; } inline void mrg_destroy(MRG *mrg __maybe_unused) { diff --git a/src/database/engine/metric.h b/src/database/engine/metric.h index 038a90e02f3b8e..b92cc399c05761 100644 --- a/src/database/engine/metric.h +++ b/src/database/engine/metric.h @@ -4,8 +4,6 @@ #include "../rrd.h" -#define MRG_CACHE_LINE_PADDING(x) uint8_t padding##x[64] - typedef struct metric METRIC; typedef struct mrg MRG; @@ -21,7 +19,7 @@ struct mrg_statistics { // --- non-atomic --- under a write lock size_t entries; - size_t size; // total memory used, with indexing + ssize_t size; // total memory used, with indexing size_t additions; size_t additions_duplicate; @@ -30,21 +28,22 @@ struct mrg_statistics { size_t delete_having_retention_or_referenced; size_t delete_misses; - MRG_CACHE_LINE_PADDING(0); - // --- atomic --- multiple readers / writers + CACHE_LINE_PADDING(); size_t entries_referenced; - MRG_CACHE_LINE_PADDING(2); + CACHE_LINE_PADDING(); size_t current_references; - MRG_CACHE_LINE_PADDING(3); + CACHE_LINE_PADDING(); size_t search_hits; + CACHE_LINE_PADDING(); size_t search_misses; - MRG_CACHE_LINE_PADDING(4); + CACHE_LINE_PADDING(); size_t writers; + CACHE_LINE_PADDING(); size_t writers_conflicts; }; @@ -83,9 +82,7 @@ bool mrg_metric_set_writer(MRG *mrg, METRIC *metric); bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric); void mrg_get_statistics(MRG *mrg, struct mrg_statistics *s); -size_t mrg_aral_structures(void); -size_t mrg_aral_overhead(void); - +struct aral_statistics *mrg_aral_stats(void); void mrg_update_metric_retention_and_granularity_by_uuid( MRG *mrg, Word_t section, nd_uuid_t *uuid, diff --git a/src/database/engine/page.c b/src/database/engine/page.c index e642bf588d212a..a2b6d750e4acdd 100644 --- a/src/database/engine/page.c +++ b/src/database/engine/page.c @@ -62,6 +62,7 @@ struct pgd { #define PGD_ARAL_PARTITIONS_MAX 256 struct { + int64_t padding_used; size_t partitions; size_t sizeof_pgd; @@ -77,7 +78,7 @@ struct { #error "You need to update the slots reserved for storage tiers" #endif -static struct aral_statistics aral_statistics_for_pgd = { 0 }; +static struct aral_statistics pgd_aral_statistics = { 0 }; static size_t aral_sizes_delta; static size_t aral_sizes_count; @@ -89,8 +90,11 @@ static size_t aral_sizes[] = { [RRD_STORAGE_TIERS - 2] = 0, [RRD_STORAGE_TIERS - 1] = 0, - // gorilla buffer size + // gorilla buffer sizes RRDENG_GORILLA_32BIT_BUFFER_SIZE, + RRDENG_GORILLA_32BIT_BUFFER_SIZE * 2, + RRDENG_GORILLA_32BIT_BUFFER_SIZE * 3, + RRDENG_GORILLA_32BIT_BUFFER_SIZE * 4, // our structures sizeof(gorilla_writer_t), @@ -101,12 +105,13 @@ static ARAL **arals = NULL; #define arals_slot(slot, partition) ((partition) * aral_sizes_count + (slot)) static ARAL *pgd_get_aral_by_size_and_partition(size_t size, size_t partition); -size_t pgd_aral_structures(void) { - return aral_structures(pgd_alloc_globals.aral_pgd[0]); +size_t pgd_padding_bytes(void) { + int64_t x = __atomic_load_n(&pgd_alloc_globals.padding_used, __ATOMIC_RELAXED); + return (x > 0) ? x : 0; } -size_t pgd_aral_overhead(void) { - return aral_overhead(pgd_alloc_globals.aral_pgd[0]); +struct aral_statistics *pgd_aral_stats(void) { + return &pgd_aral_statistics; } int aral_size_sort_compare(const void *a, const void *b) { @@ -175,7 +180,7 @@ void pgd_init_arals(void) { aral_sizes[slot], 0, 0, - &aral_statistics_for_pgd, + &pgd_aral_statistics, NULL, NULL, false, false); } } @@ -254,6 +259,9 @@ static inline PGD *pgd_alloc(bool for_collector) { static inline void *pgd_data_alloc(size_t size, size_t partition, bool for_collector) { ARAL *ar = pgd_get_aral_by_size_and_partition(size, partition); if(ar) { + int64_t padding = (int64_t)aral_requested_element_size(ar) - (int64_t)size; + __atomic_add_fetch(&pgd_alloc_globals.padding_used, padding, __ATOMIC_RELAXED); + if(for_collector) return aral_mallocz_marked(ar); else @@ -265,8 +273,12 @@ static inline void *pgd_data_alloc(size_t size, size_t partition, bool for_colle static void pgd_data_free(void *page, size_t size, size_t partition) { ARAL *ar = pgd_get_aral_by_size_and_partition(size, partition); - if(ar) + if(ar) { + int64_t padding = (int64_t)aral_requested_element_size(ar) - (int64_t)size; + __atomic_sub_fetch(&pgd_alloc_globals.padding_used, padding, __ATOMIC_RELAXED); + aral_freez(ar, page); + } else freez(page); timing_dbengine_evict_step(TIMING_STEP_DBENGINE_EVICT_FREE_MAIN_PGD_TIER1_ARAL); diff --git a/src/database/engine/page.h b/src/database/engine/page.h index 2b051b4e68b14e..e00fdd984ab74b 100644 --- a/src/database/engine/page.h +++ b/src/database/engine/page.h @@ -38,8 +38,8 @@ uint32_t pgd_memory_footprint(PGD *pg); uint32_t pgd_capacity(PGD *pg); uint32_t pgd_disk_footprint(PGD *pg); -size_t pgd_aral_structures(void); -size_t pgd_aral_overhead(void); +struct aral_statistics *pgd_aral_stats(void); +size_t pgd_padding_bytes(void); void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size); diff --git a/src/database/engine/pagecache.c b/src/database/engine/pagecache.c index d440244dbee317..e03ee1f6e83327 100644 --- a/src/database/engine/pagecache.c +++ b/src/database/engine/pagecache.c @@ -1033,7 +1033,7 @@ void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s size_t dynamic_open_cache_size(void) { size_t main_wanted_cache_size = pgc_get_wanted_cache_size(main_cache); - size_t target_size = main_wanted_cache_size / 100 * 10; // 10% + size_t target_size = main_wanted_cache_size / 100 * 5; if(target_size < 2 * 1024 * 1024) target_size = 2 * 1024 * 1024; @@ -1048,7 +1048,7 @@ size_t dynamic_open_cache_size(void) { size_t dynamic_extent_cache_size(void) { size_t main_wanted_cache_size = pgc_get_wanted_cache_size(main_cache); - size_t target_size = main_wanted_cache_size / 100 * 10; // 10% + size_t target_size = main_wanted_cache_size / 100 * 30; if(target_size < 5 * 1024 * 1024) target_size = 5 * 1024 * 1024; @@ -1070,12 +1070,12 @@ void pgc_and_mrg_initialize(void) main_mrg = mrg_create(0); size_t target_cache_size = (size_t)default_rrdeng_page_cache_mb * 1024ULL * 1024ULL; - size_t main_cache_size = (target_cache_size / 100) * 95; + size_t main_cache_size = (target_cache_size / 100) * 70; size_t open_cache_size = 0; - size_t extent_cache_size = (target_cache_size / 100) * 5; + size_t extent_cache_size = (target_cache_size / 100) * 30; - if(extent_cache_size < 3 * 1024 * 1024) { - extent_cache_size = 3 * 1024 * 1024; + if(extent_cache_size < 5 * 1024 * 1024) { + extent_cache_size = 5 * 1024 * 1024; main_cache_size = target_cache_size - extent_cache_size; } @@ -1092,7 +1092,7 @@ void pgc_and_mrg_initialize(void) pgc_max_evictors(), 1000, 1, - PGC_OPTIONS_AUTOSCALE, + PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_EVICT_PAGES_NO_INLINE, 0, 0 ); @@ -1109,7 +1109,7 @@ void pgc_and_mrg_initialize(void) pgc_max_evictors(), 1000, 1, - PGC_OPTIONS_AUTOSCALE, // flushing inline: all dirty pages are just converted to clean + PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_FLUSH_PAGES_NO_INLINE | PGC_OPTIONS_EVICT_PAGES_NO_INLINE, 0, sizeof(struct extent_io_data) ); @@ -1126,7 +1126,7 @@ void pgc_and_mrg_initialize(void) pgc_max_evictors(), 1000, 1, - PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_FLUSH_PAGES_NO_INLINE, // no flushing needed + PGC_OPTIONS_AUTOSCALE | PGC_OPTIONS_FLUSH_PAGES_NO_INLINE | PGC_OPTIONS_EVICT_PAGES_NO_INLINE, // no flushing needed 0, 0 ); diff --git a/src/database/engine/pdc.c b/src/database/engine/pdc.c index c5d46c66477290..3b0ddbfea8067f 100644 --- a/src/database/engine/pdc.c +++ b/src/database/engine/pdc.c @@ -71,8 +71,8 @@ static void pdc_release(PDC *pdc) { aral_freez(pdc_globals.pdc.ar, pdc); } -size_t pdc_cache_size(void) { - return aral_overhead(pdc_globals.pdc.ar) + aral_structures(pdc_globals.pdc.ar); +struct aral_statistics *pdc_aral_stats(void) { + return aral_get_statistics(pdc_globals.pdc.ar); } // ---------------------------------------------------------------------------- @@ -100,8 +100,8 @@ static void page_details_release(struct page_details *pd) { aral_freez(pdc_globals.pd.ar, pd); } -size_t pd_cache_size(void) { - return aral_overhead(pdc_globals.pd.ar) + aral_structures(pdc_globals.pd.ar); +struct aral_statistics *pd_aral_stats(void) { + return aral_get_statistics(pdc_globals.pd.ar); } // ---------------------------------------------------------------------------- @@ -129,8 +129,8 @@ static void epdl_release(EPDL *epdl) { aral_freez(pdc_globals.epdl.ar, epdl); } -size_t epdl_cache_size(void) { - return aral_overhead(pdc_globals.epdl.ar) + aral_structures(pdc_globals.epdl.ar); +struct aral_statistics *epdl_aral_stats(void) { + return aral_get_statistics(pdc_globals.epdl.ar); } // ---------------------------------------------------------------------------- @@ -159,8 +159,8 @@ static void deol_release(DEOL *deol) { aral_freez(pdc_globals.deol.ar, deol); } -size_t deol_cache_size(void) { - return aral_overhead(pdc_globals.deol.ar) + aral_structures(pdc_globals.deol.ar); +struct aral_statistics *deol_aral_stats(void) { + return aral_get_statistics(pdc_globals.deol.ar); } // ---------------------------------------------------------------------------- diff --git a/src/database/engine/pdc.h b/src/database/engine/pdc.h index 9bae39ade83148..c61ea242c9f1b0 100644 --- a/src/database/engine/pdc.h +++ b/src/database/engine/pdc.h @@ -34,10 +34,11 @@ typedef void (*execute_extent_page_details_list_t)(struct rrdengine_instance *ct void pdc_to_epdl_router(struct rrdengine_instance *ctx, struct page_details_control *pdc, execute_extent_page_details_list_t exec_first_extent_list, execute_extent_page_details_list_t exec_rest_extent_list); void epdl_find_extent_and_populate_pages(struct rrdengine_instance *ctx, EPDL *epdl, bool worker); -size_t pdc_cache_size(void); -size_t pd_cache_size(void); -size_t epdl_cache_size(void); -size_t deol_cache_size(void); +struct aral_statistics *pdc_aral_stats(void); +struct aral_statistics *pd_aral_stats(void); +struct aral_statistics *epdl_aral_stats(void); +struct aral_statistics *deol_aral_stats(void); + size_t extent_buffer_cache_size(void); void pdc_init(void); diff --git a/src/database/engine/rrdengine.c b/src/database/engine/rrdengine.c index 34f59dcb11bf7e..d16772c4f314f9 100644 --- a/src/database/engine/rrdengine.c +++ b/src/database/engine/rrdengine.c @@ -5,11 +5,7 @@ #include "pdc.h" #include "dbengine-compression.h" -rrdeng_stats_t global_io_errors = 0; -rrdeng_stats_t global_fs_errors = 0; -rrdeng_stats_t rrdeng_reserved_file_descriptors = 0; -rrdeng_stats_t global_pg_cache_over_half_dirty_events = 0; -rrdeng_stats_t global_flushing_pressure_page_deletions = 0; +struct rrdeng_global_stats global_stats = { 0 }; unsigned rrdeng_pages_per_extent = DEFAULT_PAGES_PER_EXTENT; @@ -1587,25 +1583,27 @@ static void after_journal_v2_indexing(struct rrdengine_instance *ctx __maybe_unu rrdeng_enq_cmd(ctx, RRDENG_OPCODE_DATABASE_ROTATE, NULL, NULL, STORAGE_PRIORITY_INTERNAL_DBENGINE, NULL, NULL); } -struct rrdeng_buffer_sizes rrdeng_get_buffer_sizes(void) { +struct rrdeng_buffer_sizes rrdeng_pulse_memory_sizes(void) { return (struct rrdeng_buffer_sizes) { - .pgc = pgc_aral_overhead() + pgc_aral_structures(), - .pgd = pgd_aral_overhead() + pgd_aral_structures(), - .mrg = mrg_aral_overhead() + mrg_aral_structures(), - .opcodes = aral_overhead(rrdeng_main.cmd_queue.ar) + aral_structures(rrdeng_main.cmd_queue.ar), - .handles = aral_overhead(rrdeng_main.handles.ar) + aral_structures(rrdeng_main.handles.ar), - .descriptors = aral_overhead(rrdeng_main.descriptors.ar) + aral_structures(rrdeng_main.descriptors.ar), - .wal = __atomic_load_n(&wal_globals.atomics.allocated, __ATOMIC_RELAXED) * (sizeof(WAL) + RRDENG_BLOCK_SIZE), - .workers = aral_overhead(rrdeng_main.work_cmd.ar), - .pdc = pdc_cache_size(), - .xt_io = aral_overhead(rrdeng_main.xt_io_descr.ar) + aral_structures(rrdeng_main.xt_io_descr.ar), - .xt_buf = extent_buffer_cache_size(), - .epdl = epdl_cache_size(), - .deol = deol_cache_size(), - .pd = pd_cache_size(), + .as = { + [RRDENG_MEM_PGC] = pgc_aral_stats(), + [RRDENG_MEM_PGD] = pgd_aral_stats(), + [RRDENG_MEM_MRG] = mrg_aral_stats(), + [RRDENG_MEM_PDC] = pdc_aral_stats(), + [RRDENG_MEM_EPDL] = epdl_aral_stats(), + [RRDENG_MEM_DEOL] = deol_aral_stats(), + [RRDENG_MEM_PD] = pd_aral_stats(), + [RRDENG_MEM_OPCODES] = aral_get_statistics(rrdeng_main.cmd_queue.ar), + [RRDENG_MEM_HANDLES] = aral_get_statistics(rrdeng_main.handles.ar), + [RRDENG_MEM_DESCRIPTORS] = aral_get_statistics(rrdeng_main.descriptors.ar), + [RRDENG_MEM_WORKERS] = aral_get_statistics(rrdeng_main.work_cmd.ar), + [RRDENG_MEM_XT_IO] = aral_get_statistics(rrdeng_main.xt_io_descr.ar), + }, + .wal = __atomic_load_n(&wal_globals.atomics.allocated, __ATOMIC_RELAXED) * (sizeof(WAL) + RRDENG_BLOCK_SIZE), + .xt_buf = extent_buffer_cache_size(), #ifdef PDC_USE_JULYL - .julyl = julyl_cache_size(), + .julyl = julyl_cache_size(), #endif }; } diff --git a/src/database/engine/rrdengine.h b/src/database/engine/rrdengine.h index 190585e4d665a5..be897bd31b686a 100644 --- a/src/database/engine/rrdengine.h +++ b/src/database/engine/rrdengine.h @@ -327,34 +327,60 @@ void wal_release(WAL *wal); * They only describe operations since DB engine instance load time. */ struct rrdengine_statistics { + CACHE_LINE_PADDING(); rrdeng_stats_t before_decompress_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t after_decompress_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t before_compress_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t after_compress_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t io_write_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t io_write_requests; + CACHE_LINE_PADDING(); rrdeng_stats_t io_read_bytes; + CACHE_LINE_PADDING(); rrdeng_stats_t io_read_requests; + CACHE_LINE_PADDING(); rrdeng_stats_t datafile_creations; + CACHE_LINE_PADDING(); rrdeng_stats_t datafile_deletions; + CACHE_LINE_PADDING(); rrdeng_stats_t journalfile_creations; + CACHE_LINE_PADDING(); rrdeng_stats_t journalfile_deletions; + CACHE_LINE_PADDING(); rrdeng_stats_t io_errors; + CACHE_LINE_PADDING(); rrdeng_stats_t fs_errors; }; -/* I/O errors global counter */ -extern rrdeng_stats_t global_io_errors; -/* File-System errors global counter */ -extern rrdeng_stats_t global_fs_errors; -/* number of File-Descriptors that have been reserved by dbengine */ -extern rrdeng_stats_t rrdeng_reserved_file_descriptors; -/* inability to flush global counters */ -extern rrdeng_stats_t global_pg_cache_over_half_dirty_events; -extern rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of deleted pages */ +struct rrdeng_global_stats { + CACHE_LINE_PADDING(); + /* I/O errors global counter */ + rrdeng_stats_t global_io_errors; + + CACHE_LINE_PADDING(); + /* File-System errors global counter */ + rrdeng_stats_t global_fs_errors; + + CACHE_LINE_PADDING(); + /* number of File-Descriptors that have been reserved by dbengine */ + rrdeng_stats_t rrdeng_reserved_file_descriptors; + + CACHE_LINE_PADDING(); + /* inability to flush global counters */ + rrdeng_stats_t global_pg_cache_over_half_dirty_events; + CACHE_LINE_PADDING(); + rrdeng_stats_t global_flushing_pressure_page_deletions; /* number of deleted pages */ +}; + +extern struct rrdeng_global_stats global_stats; typedef struct tier_config_prototype { int tier; // the tier of this ctx @@ -387,22 +413,35 @@ struct rrdengine_instance { } njfv2idx; struct { + CACHE_LINE_PADDING(); unsigned last_fileno; // newest index of datafile and journalfile + CACHE_LINE_PADDING(); unsigned last_flush_fileno; // newest index of datafile received data + CACHE_LINE_PADDING(); size_t collectors_running; + CACHE_LINE_PADDING(); size_t collectors_running_duplicate; + CACHE_LINE_PADDING(); size_t inflight_queries; // the number of queries currently running + CACHE_LINE_PADDING(); uint64_t current_disk_space; // the current disk space size used + CACHE_LINE_PADDING(); uint64_t transaction_id; // the transaction id of the next extent flushing + CACHE_LINE_PADDING(); bool migration_to_v2_running; + CACHE_LINE_PADDING(); bool now_deleting_files; + CACHE_LINE_PADDING(); unsigned extents_currently_being_flushed; // non-zero until we commit data to disk (both datafile and journal file) + CACHE_LINE_PADDING(); time_t first_time_s; + CACHE_LINE_PADDING(); uint64_t metrics; + CACHE_LINE_PADDING(); uint64_t samples; } atomic; @@ -440,12 +479,12 @@ static inline void ctx_io_write_op_bytes(struct rrdengine_instance *ctx, size_t static inline void ctx_io_error(struct rrdengine_instance *ctx) { __atomic_add_fetch(&ctx->stats.io_errors, 1, __ATOMIC_RELAXED); - rrd_stat_atomic_add(&global_io_errors, 1); + rrd_stat_atomic_add(&global_stats.global_io_errors, 1); } static inline void ctx_fs_error(struct rrdengine_instance *ctx) { __atomic_add_fetch(&ctx->stats.fs_errors, 1, __ATOMIC_RELAXED); - rrd_stat_atomic_add(&global_fs_errors, 1); + rrd_stat_atomic_add(&global_stats.global_fs_errors, 1); } #define ctx_last_fileno_get(ctx) __atomic_load_n(&(ctx)->atomic.last_fileno, __ATOMIC_RELAXED) diff --git a/src/database/engine/rrdengineapi.c b/src/database/engine/rrdengineapi.c index 69976934f6791a..1301d2e411d350 100755 --- a/src/database/engine/rrdengineapi.c +++ b/src/database/engine/rrdengineapi.c @@ -1046,13 +1046,13 @@ void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long array[27] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.page_cache_descriptors, __ATOMIC_RELAXED); array[28] = (uint64_t)__atomic_load_n(&ctx->stats.io_errors, __ATOMIC_RELAXED); array[29] = (uint64_t)__atomic_load_n(&ctx->stats.fs_errors, __ATOMIC_RELAXED); - array[30] = (uint64_t)__atomic_load_n(&global_io_errors, __ATOMIC_RELAXED); // used - array[31] = (uint64_t)__atomic_load_n(&global_fs_errors, __ATOMIC_RELAXED); // used - array[32] = (uint64_t)__atomic_load_n(&rrdeng_reserved_file_descriptors, __ATOMIC_RELAXED); // used + array[30] = (uint64_t)__atomic_load_n(&global_stats.global_io_errors, __ATOMIC_RELAXED); // used + array[31] = (uint64_t)__atomic_load_n(&global_stats.global_fs_errors, __ATOMIC_RELAXED); // used + array[32] = (uint64_t)__atomic_load_n(&global_stats.rrdeng_reserved_file_descriptors, __ATOMIC_RELAXED); // used array[33] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.pg_cache_over_half_dirty_events, __ATOMIC_RELAXED); - array[34] = (uint64_t)__atomic_load_n(&global_pg_cache_over_half_dirty_events, __ATOMIC_RELAXED); // used + array[34] = (uint64_t)__atomic_load_n(&global_stats.global_pg_cache_over_half_dirty_events, __ATOMIC_RELAXED); // used array[35] = 0; // (uint64_t)__atomic_load_n(&ctx->stats.flushing_pressure_page_deletions, __ATOMIC_RELAXED); - array[36] = (uint64_t)__atomic_load_n(&global_flushing_pressure_page_deletions, __ATOMIC_RELAXED); // used + array[36] = (uint64_t)__atomic_load_n(&global_stats.global_flushing_pressure_page_deletions, __ATOMIC_RELAXED); // used array[37] = 0; //(uint64_t)pg_cache->active_descriptors; fatal_assert(RRDENG_NR_STATS == 38); @@ -1144,15 +1144,15 @@ int rrdeng_init( max_open_files = rlimit_nofile.rlim_cur / 4; /* reserve RRDENG_FD_BUDGET_PER_INSTANCE file descriptors for this instance */ - rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE); - if (rrdeng_reserved_file_descriptors > max_open_files) { + rrd_stat_atomic_add(&global_stats.rrdeng_reserved_file_descriptors, RRDENG_FD_BUDGET_PER_INSTANCE); + if (global_stats.rrdeng_reserved_file_descriptors > max_open_files) { netdata_log_error( "Exceeded the budget of available file descriptors (%u/%u), cannot create new dbengine instance.", - (unsigned)rrdeng_reserved_file_descriptors, + (unsigned)global_stats.rrdeng_reserved_file_descriptors, (unsigned)max_open_files); - rrd_stat_atomic_add(&global_fs_errors, 1); - rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + rrd_stat_atomic_add(&global_stats.global_fs_errors, 1); + rrd_stat_atomic_add(&global_stats.rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); return UV_EMFILE; } @@ -1196,7 +1196,7 @@ int rrdeng_init( *ctxp = NULL; } - rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + rrd_stat_atomic_add(&global_stats.rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); return UV_EIO; } @@ -1243,7 +1243,7 @@ int rrdeng_exit(struct rrdengine_instance *ctx) { if (unittest_running) //(ctx->config.unittest) freez(ctx); - rrd_stat_atomic_add(&rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); + rrd_stat_atomic_add(&global_stats.rrdeng_reserved_file_descriptors, -RRDENG_FD_BUDGET_PER_INSTANCE); return 0; } diff --git a/src/database/engine/rrdengineapi.h b/src/database/engine/rrdengineapi.h index 1c1e47971e785f..c0a25e5866d88c 100644 --- a/src/database/engine/rrdengineapi.h +++ b/src/database/engine/rrdengineapi.h @@ -208,27 +208,35 @@ struct rrdeng_cache_efficiency_stats { size_t metrics_retention_started; }; +typedef enum rrdeng_mem { + RRDENG_MEM_PGC = 0, + RRDENG_MEM_PGD, + RRDENG_MEM_MRG, + RRDENG_MEM_OPCODES, + RRDENG_MEM_HANDLES, + RRDENG_MEM_DESCRIPTORS, + RRDENG_MEM_WORKERS, + RRDENG_MEM_PDC, + RRDENG_MEM_XT_IO, + RRDENG_MEM_EPDL, + RRDENG_MEM_DEOL, + RRDENG_MEM_PD, + + // terminator + RRDENG_MEM_MAX, +} RRDENG_MEM; + struct rrdeng_buffer_sizes { - size_t workers; - size_t pdc; + struct aral_statistics *as[RRDENG_MEM_MAX]; + size_t wal; - size_t descriptors; - size_t xt_io; size_t xt_buf; - size_t handles; - size_t opcodes; - size_t epdl; - size_t deol; - size_t pd; - size_t pgc; - size_t pgd; - size_t mrg; #ifdef PDC_USE_JULYL size_t julyl; #endif }; -struct rrdeng_buffer_sizes rrdeng_get_buffer_sizes(void); +struct rrdeng_buffer_sizes rrdeng_pulse_memory_sizes(void); struct rrdeng_cache_efficiency_stats rrdeng_get_cache_efficiency_stats(void); RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx); diff --git a/src/database/rrd.h b/src/database/rrd.h index 5f0232d0f79eef..65ff69ee593438 100644 --- a/src/database/rrd.h +++ b/src/database/rrd.h @@ -278,7 +278,7 @@ struct rrddim_tier { STORAGE_COLLECT_HANDLE *sch; // the data collection handle }; -void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s); +void backfill_tier_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s); // ---------------------------------------------------------------------------- // RRD DIMENSION - this is a metric @@ -921,8 +921,23 @@ typedef enum __attribute__ ((__packed__)) rrdhost_flags { // Careful not to overlap with rrdhost_options to avoid bugs if // rrdhost_flags_xxx is used instead of rrdhost_option_xxx or vice-versa // Orphan, Archived and Obsolete flags + + /* + * 3 BASE FLAGS FOR HOSTS: + * + * - COLLECTOR_ONLINE = the collector is currently collecting data for this node + * this is true FOR ALL KINDS OF NODES (including localhost, virtual hosts, children) + * + * - ORPHAN = the node had a collector online recently, but does not have it now + * + * - ARCHIVED = the node does not have data collection structures attached to it + * + */ + + RRDHOST_FLAG_COLLECTOR_ONLINE = (1 << 7), // the collector of this host is online RRDHOST_FLAG_ORPHAN = (1 << 8), // this host is orphan (not receiving data) RRDHOST_FLAG_ARCHIVED = (1 << 9), // The host is archived, no collected charts yet + RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS = (1 << 10), // the host has pending chart obsoletions RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS = (1 << 11), // the host has pending dimension obsoletions @@ -951,7 +966,6 @@ typedef enum __attribute__ ((__packed__)) rrdhost_flags { RRDHOST_FLAG_PENDING_CONTEXT_LOAD = (1 << 26), // Context needs to be loaded RRDHOST_FLAG_METADATA_CLAIMID = (1 << 27), // metadata needs to be stored in the database - RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED = (1 << 28), // set when the receiver part is disconnected RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 29), // set when the host has updated global functions } RRDHOST_FLAGS; @@ -990,7 +1004,7 @@ typedef enum __attribute__ ((__packed__)) { #define rrdhost_can_stream_metadata_to_parent(host) \ (rrdhost_has_stream_sender_enabled(host) && \ rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_SENDER_READY_4_METRICS) && \ - !rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED) \ + rrdhost_flag_check(host, RRDHOST_FLAG_COLLECTOR_ONLINE) \ ) // ---------------------------------------------------------------------------- @@ -1358,7 +1372,12 @@ extern RRDHOST *localhost; #define rrdhost_sender_replicating_charts_minus_one(host) (__atomic_sub_fetch(&((host)->stream.snd.status.replication.charts), 1, __ATOMIC_RELAXED)) #define rrdhost_sender_replicating_charts_zero(host) (__atomic_store_n(&((host)->stream.snd.status.replication.charts), 0, __ATOMIC_RELAXED)) -#define rrdhost_is_online(host) ((host) == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST) || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN | RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED)) +#define rrdhost_is_online(host) ( \ + (host) == localhost || \ + rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST) || \ + (rrdhost_flag_check(host, RRDHOST_FLAG_COLLECTOR_ONLINE) && !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)) \ + ) + bool rrdhost_matches_window(RRDHOST *host, time_t after, time_t before, time_t now); extern DICTIONARY *rrdhost_root_index; diff --git a/src/database/rrdhost.c b/src/database/rrdhost.c index 8a7fe157929885..09b64c7c543ce9 100644 --- a/src/database/rrdhost.c +++ b/src/database/rrdhost.c @@ -841,6 +841,9 @@ int rrd_init(const char *hostname, struct rrdhost_system_info *system_info, bool if (unlikely(!localhost)) return 1; + rrdhost_flag_set(localhost, RRDHOST_FLAG_COLLECTOR_ONLINE); + + ml_host_start(localhost); dyncfg_host_init(localhost); if(!unittest) diff --git a/src/database/rrdlabels.c b/src/database/rrdlabels.c index f16513520ae1a2..5a5757994643bf 100644 --- a/src/database/rrdlabels.c +++ b/src/database/rrdlabels.c @@ -65,9 +65,9 @@ typedef struct rrdlabels { } \ while (0) -static inline void STATS_PLUS_MEMORY(struct dictionary_stats *stats, size_t key_size, size_t item_size, size_t value_size) { - if(key_size) - __atomic_fetch_add(&stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); +static inline void STATS_PLUS_MEMORY(struct dictionary_stats *stats, int64_t judy_mem, size_t item_size, size_t value_size) { + if(judy_mem) + __atomic_fetch_add(&stats->memory.index, judy_mem, __ATOMIC_RELAXED); if(item_size) __atomic_fetch_add(&stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); @@ -76,9 +76,9 @@ static inline void STATS_PLUS_MEMORY(struct dictionary_stats *stats, size_t key_ __atomic_fetch_add(&stats->memory.values, (long)value_size, __ATOMIC_RELAXED); } -static inline void STATS_MINUS_MEMORY(struct dictionary_stats *stats, size_t key_size, size_t item_size, size_t value_size) { - if(key_size) - __atomic_fetch_sub(&stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); +static inline void STATS_MINUS_MEMORY(struct dictionary_stats *stats, int64_t judy_mem, size_t item_size, size_t value_size) { + if(judy_mem) + __atomic_fetch_add(&stats->memory.index, judy_mem, __ATOMIC_RELAXED); if(item_size) __atomic_fetch_sub(&stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); @@ -131,7 +131,12 @@ static RRDLABEL *add_label_name_value(const char *name, const char *value) spinlock_lock(&global_labels.spinlock); + JudyAllocThreadPulseReset(); + Pvoid_t *PValue = JudyHSIns(&global_labels.JudyHS, (void *)&label_index, sizeof(label_index), PJE0); + + int64_t judy_mem = JudyAllocThreadPulseGetAndReset(); + if(unlikely(!PValue || PValue == PJERR)) fatal("RRDLABELS: corrupted judyHS array"); @@ -139,11 +144,12 @@ static RRDLABEL *add_label_name_value(const char *name, const char *value) rrdlabel = *PValue; string_freez(label_index.key); string_freez(label_index.value); + STATS_PLUS_MEMORY(&dictionary_stats_category_rrdlabels, judy_mem, 0, 0); } else { rrdlabel = callocz(1, sizeof(*rrdlabel)); rrdlabel->label.index = label_index; *PValue = rrdlabel; - STATS_PLUS_MEMORY(&dictionary_stats_category_rrdlabels, sizeof(LABEL_REGISTRY_IDX), sizeof(RRDLABEL_IDX), 0); + STATS_PLUS_MEMORY(&dictionary_stats_category_rrdlabels, judy_mem, sizeof(RRDLABEL_IDX), 0); } __atomic_add_fetch(&rrdlabel->refcount, 1, __ATOMIC_RELAXED); @@ -160,11 +166,16 @@ static void delete_label(RRDLABEL *label) RRDLABEL_IDX *rrdlabel = *PValue; size_t refcount = __atomic_sub_fetch(&rrdlabel->refcount, 1, __ATOMIC_RELAXED); if (refcount == 0) { + JudyAllocThreadPulseReset(); + int ret = JudyHSDel(&global_labels.JudyHS, (void *)label, sizeof(*label), PJE0); + + int64_t judy_mem = JudyAllocThreadPulseGetAndReset(); + if (unlikely(ret == JERR)) - STATS_MINUS_MEMORY(&dictionary_stats_category_rrdlabels, 0, sizeof(*rrdlabel), 0); + STATS_MINUS_MEMORY(&dictionary_stats_category_rrdlabels, judy_mem, sizeof(*rrdlabel), 0); else - STATS_MINUS_MEMORY(&dictionary_stats_category_rrdlabels, sizeof(LABEL_REGISTRY_IDX), sizeof(*rrdlabel), 0); + STATS_MINUS_MEMORY(&dictionary_stats_category_rrdlabels, judy_mem, sizeof(*rrdlabel), 0); string_freez(label->index.key); string_freez(label->index.value); freez(rrdlabel); diff --git a/src/database/rrdset.c b/src/database/rrdset.c index 448e836650c10a..7e4d89b1c6c4a7 100644 --- a/src/database/rrdset.c +++ b/src/database/rrdset.c @@ -1281,7 +1281,7 @@ void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, if(!rrddim_option_check(rd, RRDDIM_OPTION_BACKFILLED_HIGH_TIERS)) { // we have not collected this tier before // let's fill any gap that may exist - rrdr_fill_tier_gap_from_smaller_tiers(rd, tier, now_s); + backfill_tier_from_smaller_tiers(rd, tier, now_s); } store_metric_at_tier(rd, tier, t, sp, point_end_time_ut); diff --git a/src/health/health_event_loop.c b/src/health/health_event_loop.c index 144e4929be1508..df323faffe6188 100644 --- a/src/health/health_event_loop.c +++ b/src/health/health_event_loop.c @@ -213,6 +213,12 @@ static void health_event_loop(void) { unsigned int loop = 0; while(service_running(SERVICE_HEALTH)) { + if(!stream_control_health_should_be_running()) { + worker_is_idle(); + stream_control_throttle(); + continue; + } + loop++; netdata_log_debug(D_HEALTH, "Health monitoring iteration no %u started", loop); diff --git a/src/libnetdata/aral/aral.c b/src/libnetdata/aral/aral.c index 3d7c76fb1ac8c1..98a953040e2a46 100644 --- a/src/libnetdata/aral/aral.c +++ b/src/libnetdata/aral/aral.c @@ -11,6 +11,12 @@ #define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS #endif +#if ENV32BIT +#define SYSTEM_REQUIRED_ALIGNMENT (sizeof(uintptr_t) * 2) +#else +#define SYSTEM_REQUIRED_ALIGNMENT (alignof(uintptr_t)) +#endif + // max mapped file size #define ARAL_MAX_PAGE_SIZE_MMAP (1ULL * 1024 * 1024 * 1024) @@ -61,13 +67,17 @@ typedef enum { struct aral_ops { struct { - alignas(64) size_t allocators; // the number of threads currently trying to allocate memory - alignas(64) size_t deallocators; // the number of threads currently trying to deallocate memory - alignas(64) bool last_allocated_or_deallocated; // stability detector, true when was last allocated + CACHE_LINE_PADDING(); + size_t allocators; // the number of threads currently trying to allocate memory + CACHE_LINE_PADDING(); + size_t deallocators; // the number of threads currently trying to deallocate memory + CACHE_LINE_PADDING(); + bool last_allocated_or_deallocated; // stability detector, true when was last allocated } atomic; struct { - alignas(64) SPINLOCK spinlock; + CACHE_LINE_PADDING(); + SPINLOCK spinlock; size_t allocating_elements; // currently allocating elements size_t allocation_size; // current / next allocation size } adders; @@ -97,7 +107,7 @@ struct aral { } config; struct { - alignas(64) SPINLOCK spinlock; + SPINLOCK spinlock; size_t file_number; // for mmap ARAL_PAGE *pages_free; // pages with free items @@ -125,12 +135,12 @@ const char *aral_name(ARAL *ar) { return ar->config.name; } -size_t aral_structures_from_stats(struct aral_statistics *stats) { +size_t aral_structures_bytes_from_stats(struct aral_statistics *stats) { if(!stats) return 0; return __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED); } -size_t aral_overhead_from_stats(struct aral_statistics *stats) { +size_t aral_free_bytes_from_stats(struct aral_statistics *stats) { if(!stats) return 0; size_t allocated = __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED) + @@ -139,23 +149,39 @@ size_t aral_overhead_from_stats(struct aral_statistics *stats) { size_t used = __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED) + __atomic_load_n(&stats->mmap.used_bytes, __ATOMIC_RELAXED); - if(allocated > used) return allocated - used; - return allocated; + return (allocated > used) ? allocated - used : 0; } size_t aral_used_bytes_from_stats(struct aral_statistics *stats) { size_t used = __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED) + __atomic_load_n(&stats->mmap.used_bytes, __ATOMIC_RELAXED); - return used; } -size_t aral_overhead(ARAL *ar) { - return aral_overhead_from_stats(ar->stats); +size_t aral_padding_bytes_from_stats(struct aral_statistics *stats) { + size_t padding = __atomic_load_n(&stats->malloc.padding_bytes, __ATOMIC_RELAXED) + + __atomic_load_n(&stats->mmap.padding_bytes, __ATOMIC_RELAXED); + return padding; +} + +size_t aral_used_bytes(ARAL *ar) { + return aral_used_bytes_from_stats(ar->stats); } -size_t aral_structures(ARAL *ar) { - return aral_structures_from_stats(ar->stats); +size_t aral_free_bytes(ARAL *ar) { + return aral_free_bytes_from_stats(ar->stats); +} + +size_t aral_structures_bytes(ARAL *ar) { + return aral_structures_bytes_from_stats(ar->stats); +} + +size_t aral_padding_bytes(ARAL *ar) { + return aral_padding_bytes_from_stats(ar->stats); +} + +size_t aral_free_structures_padding_from_stats(struct aral_statistics *stats) { + return aral_free_bytes_from_stats(stats) + aral_structures_bytes_from_stats(stats) + aral_padding_bytes_from_stats(stats); } struct aral_statistics *aral_get_statistics(ARAL *ar) { @@ -343,6 +369,8 @@ static ARAL_PAGE *aral_get_page_pointer_after_element___do_NOT_have_aral_lock(AR } #endif + internal_fatal((uintptr_t)page % SYSTEM_REQUIRED_ALIGNMENT != 0, "Pointer is not aligned properly"); + return page; } @@ -387,11 +415,6 @@ static size_t aral_get_system_page_size(void) { return page_size; } -// we don't need alignof(max_align_t) for normal C structures -// alignof(uintptr_r) is sufficient for our use cases -// #define SYSTEM_REQUIRED_ALIGNMENT (alignof(max_align_t)) -#define SYSTEM_REQUIRED_ALIGNMENT (alignof(uintptr_t)) - static size_t aral_element_slot_size(size_t requested_element_size, bool usable) { // we need to add a page pointer after the element // so, first align the element size to the pointer size @@ -453,8 +476,11 @@ static size_t aral_next_allocation_size___adders_lock_needed(ARAL *ar, bool mark // -------------------------------------------------------------------------------------------------------------------- static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { - size_t data_size, structures_size; + struct aral_page_type_stats *stats; ARAL_PAGE *page; + + size_t total_size = size; + if(ar->config.mmap.enabled) { page = callocz(1, sizeof(ARAL_PAGE)); ar->aral_lock.file_number++; @@ -469,10 +495,8 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'", ar->config.name, size, page->filename); - __atomic_add_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&ar->stats->mmap.allocated_bytes, size, __ATOMIC_RELAXED); - data_size = size; - structures_size = sizeof(ARAL_PAGE); + total_size = size + sizeof(ARAL_PAGE); + stats = &ar->stats->mmap; } #ifdef NETDATA_TRACE_ALLOCATIONS else { @@ -485,23 +509,18 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ #else else { size_t ARAL_PAGE_size = memory_alignment(sizeof(ARAL_PAGE), SYSTEM_REQUIRED_ALIGNMENT); - size_t max_elements = aral_elements_in_page_size(ar, size); - data_size = max_elements * ar->config.element_size; - structures_size = size - data_size; if (size >= ARAL_MMAP_PAGES_ABOVE) { bool mapped; uint8_t *ptr = netdata_mmap(NULL, size, MAP_PRIVATE, 1, false, NULL); if (ptr) { mapped = true; - __atomic_add_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&ar->stats->mmap.allocated_bytes, data_size, __ATOMIC_RELAXED); + stats = &ar->stats->mmap; } else { ptr = mallocz(size); mapped = false; - __atomic_add_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&ar->stats->malloc.allocated_bytes, data_size, __ATOMIC_RELAXED); + stats = &ar->stats->malloc; } page = (ARAL_PAGE *)ptr; memset(page, 0, ARAL_PAGE_size); @@ -515,8 +534,7 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ page->data = &ptr[ARAL_PAGE_size]; page->mapped = false; - __atomic_add_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); - __atomic_add_fetch(&ar->stats->malloc.allocated_bytes, data_size, __ATOMIC_RELAXED); + stats = &ar->stats->malloc; } } #endif @@ -526,13 +544,21 @@ static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ page->max_elements = aral_elements_in_page_size(ar, page->size); page->aral_lock.free_elements = page->max_elements; + size_t structures_size = sizeof(ARAL_PAGE) + page->max_elements * sizeof(void *); + size_t data_size = page->max_elements * ar->config.requested_element_size; + size_t padding_size = total_size - data_size - structures_size; + + __atomic_add_fetch(&stats->allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&stats->allocated_bytes, data_size, __ATOMIC_RELAXED); + __atomic_add_fetch(&stats->padding_bytes, padding_size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); __atomic_add_fetch(&ar->stats->structures.allocated_bytes, structures_size, __ATOMIC_RELAXED); // link the free space to its page ARAL_FREE *fr = (ARAL_FREE *)page->data; - fr->size = data_size; + fr->size = page->max_elements * ar->config.element_size; fr->next = NULL; page->free.list = fr; @@ -545,15 +571,15 @@ void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_ size_t idx = mark_to_idx(page->started_marked); __atomic_store_n(&ar->ops[idx].atomic.last_allocated_or_deallocated, true, __ATOMIC_RELAXED); - size_t data_size, structures_size; + struct aral_page_type_stats *stats; + size_t max_elements = page->max_elements; + size_t size = page->size; + size_t total_size = size; // free it if (ar->config.mmap.enabled) { - data_size = page->size; - structures_size = sizeof(ARAL_PAGE); - - __atomic_sub_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED); + stats = &ar->stats->mmap; + total_size = size + sizeof(ARAL_PAGE); netdata_munmap(page->data, page->size); @@ -571,24 +597,25 @@ void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_ freez_int(page->data TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); freez(page); #else - data_size = page->max_elements * ar->config.element_size; - structures_size = page->size - data_size; - if(page->mapped) { - __atomic_sub_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&ar->stats->mmap.allocated_bytes, data_size, __ATOMIC_RELAXED); - + stats = &ar->stats->mmap; netdata_munmap(page, page->size); } else { - __atomic_sub_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); - __atomic_sub_fetch(&ar->stats->malloc.allocated_bytes, data_size, __ATOMIC_RELAXED); - + stats = &ar->stats->malloc; freez(page); } #endif } + size_t structures_size = sizeof(ARAL_PAGE) + max_elements * sizeof(void *); + size_t data_size = max_elements * ar->config.requested_element_size; + size_t padding_size = total_size - data_size - structures_size; + + __atomic_sub_fetch(&stats->allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&stats->allocated_bytes, data_size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&stats->padding_bytes, padding_size, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); __atomic_sub_fetch(&ar->stats->structures.allocated_bytes, structures_size, __ATOMIC_RELAXED); } @@ -766,10 +793,12 @@ void *aral_mallocz_internal(ARAL *ar, bool marked TRACE_ALLOCATIONS_FUNCTION_DEF // put the page pointer after the element aral_set_page_pointer_after_element___do_NOT_have_aral_lock(ar, page, found_fr, marked); - if(unlikely(ar->config.mmap.enabled)) - __atomic_add_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + if(unlikely(ar->config.mmap.enabled || page->mapped)) + __atomic_add_fetch(&ar->stats->mmap.used_bytes, ar->config.requested_element_size, __ATOMIC_RELAXED); else - __atomic_add_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->malloc.used_bytes, ar->config.requested_element_size, __ATOMIC_RELAXED); + + internal_fatal((uintptr_t)found_fr % SYSTEM_REQUIRED_ALIGNMENT != 0, "Pointer is not aligned properly"); return (void *)found_fr; } @@ -827,11 +856,6 @@ void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITI if(unlikely(!ptr)) return; - if(unlikely(ar->config.mmap.enabled)) - __atomic_sub_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); - else - __atomic_sub_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); - // get the page pointer bool marked; ARAL_PAGE *page = aral_get_page_pointer_after_element___do_NOT_have_aral_lock(ar, ptr, &marked); @@ -839,6 +863,11 @@ void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITI size_t idx = mark_to_idx(marked); __atomic_add_fetch(&ar->ops[idx].atomic.deallocators, 1, __ATOMIC_RELAXED); + if(unlikely(ar->config.mmap.enabled || page->mapped)) + __atomic_sub_fetch(&ar->stats->mmap.used_bytes, ar->config.requested_element_size, __ATOMIC_RELAXED); + else + __atomic_sub_fetch(&ar->stats->malloc.used_bytes, ar->config.requested_element_size, __ATOMIC_RELAXED); + // make this element available ARAL_FREE *fr = (ARAL_FREE *)ptr; fr->size = ar->config.element_size; @@ -1093,18 +1122,22 @@ struct aral_statistics *aral_by_size_statistics(void) { return &aral_by_size_globals.shared_statistics; } -size_t aral_by_size_structures(void) { - return aral_structures_from_stats(&aral_by_size_globals.shared_statistics); +size_t aral_by_size_structures_bytes(void) { + return aral_structures_bytes_from_stats(&aral_by_size_globals.shared_statistics); } -size_t aral_by_size_overhead(void) { - return aral_overhead_from_stats(&aral_by_size_globals.shared_statistics); +size_t aral_by_size_free_bytes(void) { + return aral_free_bytes_from_stats(&aral_by_size_globals.shared_statistics); } size_t aral_by_size_used_bytes(void) { return aral_used_bytes_from_stats(&aral_by_size_globals.shared_statistics); } +size_t aral_by_size_padding_bytes(void) { + return aral_padding_bytes_from_stats(&aral_by_size_globals.shared_statistics); +} + ARAL *aral_by_size_acquire(size_t size) { spinlock_lock(&aral_by_size_globals.spinlock); diff --git a/src/libnetdata/aral/aral.h b/src/libnetdata/aral/aral.h index 86de671fd6b5b1..2467e72ef4c68d 100644 --- a/src/libnetdata/aral/aral.h +++ b/src/libnetdata/aral/aral.h @@ -8,53 +8,96 @@ typedef struct aral ARAL; +struct aral_page_type_stats { + CACHE_LINE_PADDING(); + size_t allocations; + CACHE_LINE_PADDING(); + size_t allocated_bytes; + CACHE_LINE_PADDING(); + size_t used_bytes; + CACHE_LINE_PADDING(); + size_t padding_bytes; +}; + struct aral_statistics { struct { - alignas(64) size_t allocations; - alignas(64) size_t allocated_bytes; + CACHE_LINE_PADDING(); + size_t allocations; + CACHE_LINE_PADDING(); + size_t allocated_bytes; } structures; - struct { - alignas(64) size_t allocations; - alignas(64) size_t allocated_bytes; - alignas(64) size_t used_bytes; - } malloc; - - struct { - alignas(64) size_t allocations; - alignas(64) size_t allocated_bytes; - alignas(64) size_t used_bytes; - } mmap; + struct aral_page_type_stats malloc; + struct aral_page_type_stats mmap; }; +// -------------------------------------------------------------------------------------------------------------------- + +const char *aral_name(ARAL *ar); + ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size, struct aral_statistics *stats, const char *filename, const char **cache_dir, bool mmap, bool lockless); +// -------------------------------------------------------------------------------------------------------------------- + // return the size of the element, as requested size_t aral_requested_element_size(ARAL *ar); // return the exact memory footprint of the elements size_t aral_actual_element_size(ARAL *ar); -const char *aral_name(ARAL *ar); -size_t aral_overhead(ARAL *ar); -size_t aral_structures(ARAL *ar); +// -------------------------------------------------------------------------------------------------------------------- + +size_t aral_optimal_malloc_page_size(void); + +// -------------------------------------------------------------------------------------------------------------------- + +/* + * + * The total memory used by ARAL is: + * + * total = structures + used + free + padding + * + * or + * + * total = structures + allocated + padding + * + * always: + * + * allocated = used + free + * + * Hints: + * - allocated, used and free are about the requested element size. + * - structures includes the extension of the elements for the metadata aral needs. + * - padding is lost due to alignment requirements + * + */ + +size_t aral_structures_bytes(ARAL *ar); +size_t aral_free_bytes(ARAL *ar); +size_t aral_used_bytes(ARAL *ar); +size_t aral_padding_bytes(ARAL *ar); + struct aral_statistics *aral_get_statistics(ARAL *ar); -size_t aral_structures_from_stats(struct aral_statistics *stats); -size_t aral_overhead_from_stats(struct aral_statistics *stats); + +size_t aral_structures_bytes_from_stats(struct aral_statistics *stats); +size_t aral_free_bytes_from_stats(struct aral_statistics *stats); +size_t aral_used_bytes_from_stats(struct aral_statistics *stats); +size_t aral_padding_bytes_from_stats(struct aral_statistics *stats); + +// -------------------------------------------------------------------------------------------------------------------- ARAL *aral_by_size_acquire(size_t size); void aral_by_size_release(ARAL *ar); -size_t aral_by_size_structures(void); -size_t aral_by_size_overhead(void); -struct aral_statistics *aral_by_size_statistics(void); +size_t aral_by_size_structures_bytes(void); +size_t aral_by_size_free_bytes(void); size_t aral_by_size_used_bytes(void); -size_t aral_used_bytes_from_stats(struct aral_statistics *stats); +size_t aral_by_size_padding_bytes(void); -size_t aral_optimal_malloc_page_size(void); +struct aral_statistics *aral_by_size_statistics(void); -int aral_unittest(size_t elements); +// -------------------------------------------------------------------------------------------------------------------- #ifdef NETDATA_TRACE_ALLOCATIONS @@ -87,6 +130,10 @@ void aral_destroy_internal(ARAL *ar); void aral_unmark_allocation(ARAL *ar, void *ptr); +// -------------------------------------------------------------------------------------------------------------------- + +int aral_unittest(size_t elements); + #endif // NETDATA_TRACE_ALLOCATIONS #endif // ARAL_H diff --git a/src/libnetdata/common.h b/src/libnetdata/common.h index aafe51176c32ce..b46eb9b9f06b29 100644 --- a/src/libnetdata/common.h +++ b/src/libnetdata/common.h @@ -394,6 +394,12 @@ typedef uint32_t uid_t; // -------------------------------------------------------------------------------------------------------------------- +#define CONCAT_INDIRECT(a, b) a##b +#define CONCAT(a, b) CONCAT_INDIRECT(a, b) +#define CACHE_LINE_PADDING() uint8_t CONCAT(padding, __COUNTER__)[64 - sizeof(size_t)]; + +// -------------------------------------------------------------------------------------------------------------------- + #if defined(OS_WINDOWS) #include #include diff --git a/src/libnetdata/dictionary/dictionary-hashtable.h b/src/libnetdata/dictionary/dictionary-hashtable.h index 14c81cfcc7ac24..8df2925a2a73ca 100644 --- a/src/libnetdata/dictionary/dictionary-hashtable.h +++ b/src/libnetdata/dictionary/dictionary-hashtable.h @@ -112,8 +112,13 @@ static inline size_t hashtable_destroy_judy(DICTIONARY *dict) { pointer_destroy_index(dict); + JudyAllocThreadPulseReset(); + JError_t J_Error; Word_t ret = JudyHSFreeArray(&dict->index.JudyHSArray, &J_Error); + + __atomic_add_fetch(&dict->stats->memory.index, JudyAllocThreadPulseGetAndReset(), __ATOMIC_RELAXED); + if(unlikely(ret == (Word_t) JERR)) { netdata_log_error("DICTIONARY: Cannot destroy JudyHS, JU_ERRNO_* == %u, ID == %d", JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); @@ -126,8 +131,13 @@ static inline size_t hashtable_destroy_judy(DICTIONARY *dict) { } static inline void *hashtable_insert_judy(DICTIONARY *dict, const char *name, size_t name_len) { + JudyAllocThreadPulseReset(); + JError_t J_Error; Pvoid_t *Rc = JudyHSIns(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + + __atomic_add_fetch(&dict->stats->memory.index, JudyAllocThreadPulseGetAndReset(), __ATOMIC_RELAXED); + if (unlikely(Rc == PJERR)) { netdata_log_error("DICTIONARY: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d", name, JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); @@ -159,8 +169,13 @@ static inline int hashtable_delete_judy(DICTIONARY *dict, const char *name, size (void)item; if(unlikely(!dict->index.JudyHSArray)) return 0; + JudyAllocThreadPulseReset(); + JError_t J_Error; int ret = JudyHSDel(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + + __atomic_add_fetch(&dict->stats->memory.index, JudyAllocThreadPulseGetAndReset(), __ATOMIC_RELAXED); + if(unlikely(ret == JERR)) { netdata_log_error("DICTIONARY: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d", name, diff --git a/src/libnetdata/dictionary/dictionary-statistics.h b/src/libnetdata/dictionary/dictionary-statistics.h index 20eb8159936a17..5095fc9b4cbd74 100644 --- a/src/libnetdata/dictionary/dictionary-statistics.h +++ b/src/libnetdata/dictionary/dictionary-statistics.h @@ -9,10 +9,7 @@ // memory statistics #ifdef DICT_WITH_STATS -static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { - if(key_size) - __atomic_fetch_add(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); - +static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_size __maybe_unused, size_t item_size, size_t value_size) { if(item_size) __atomic_fetch_add(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); @@ -20,10 +17,7 @@ static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_siz __atomic_fetch_add(&dict->stats->memory.values, (long)value_size, __ATOMIC_RELAXED); } -static inline void DICTIONARY_STATS_MINUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { - if(key_size) - __atomic_fetch_sub(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); - +static inline void DICTIONARY_STATS_MINUS_MEMORY(DICTIONARY *dict, size_t key_size __maybe_unused, size_t item_size, size_t value_size) { if(item_size) __atomic_fetch_sub(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); diff --git a/src/libnetdata/dictionary/dictionary.h b/src/libnetdata/dictionary/dictionary.h index 51acaa2e841bcc..1d0080c981f740 100644 --- a/src/libnetdata/dictionary/dictionary.h +++ b/src/libnetdata/dictionary/dictionary.h @@ -66,48 +66,74 @@ struct dictionary_stats { const char *name; // the name of the category struct { + CACHE_LINE_PADDING(); size_t active; // the number of active dictionaries + CACHE_LINE_PADDING(); size_t deleted; // the number of dictionaries queued for destruction } dictionaries; struct { + CACHE_LINE_PADDING(); long entries; // active items in the dictionary + CACHE_LINE_PADDING(); long pending_deletion; // pending deletion items in the dictionary + CACHE_LINE_PADDING(); long referenced; // referenced items in the dictionary } items; struct { + CACHE_LINE_PADDING(); size_t creations; // dictionary creations + CACHE_LINE_PADDING(); size_t destructions; // dictionary destructions + CACHE_LINE_PADDING(); size_t flushes; // dictionary flushes + CACHE_LINE_PADDING(); size_t traversals; // dictionary foreach + CACHE_LINE_PADDING(); size_t walkthroughs; // dictionary walkthrough + CACHE_LINE_PADDING(); size_t garbage_collections; // dictionary garbage collections + CACHE_LINE_PADDING(); size_t searches; // item searches + CACHE_LINE_PADDING(); size_t inserts; // item inserts + CACHE_LINE_PADDING(); size_t resets; // item resets + CACHE_LINE_PADDING(); size_t deletes; // item deletes } ops; struct { + CACHE_LINE_PADDING(); size_t inserts; // number of times the insert callback is called + CACHE_LINE_PADDING(); size_t conflicts; // number of times the conflict callback is called + CACHE_LINE_PADDING(); size_t reacts; // number of times the react callback is called + CACHE_LINE_PADDING(); size_t deletes; // number of times the delete callback is called } callbacks; // memory struct { + CACHE_LINE_PADDING(); ssize_t index; // bytes of keys indexed (indication of the index size) + CACHE_LINE_PADDING(); ssize_t values; // bytes of caller structures + CACHE_LINE_PADDING(); ssize_t dict; // bytes of the structures dictionary needs } memory; // spin locks struct { + CACHE_LINE_PADDING(); size_t use_spins; // number of times a reference to item had to spin to acquire it or ignore it + CACHE_LINE_PADDING(); size_t search_spins; // number of times a successful search result had to be thrown away + CACHE_LINE_PADDING(); size_t insert_spins; // number of times an insertion to the hash table had to be repeated + CACHE_LINE_PADDING(); size_t delete_spins; // number of times a deletion had to spin to get a decision } spin_locks; }; diff --git a/src/libnetdata/libjudy/judy-malloc.c b/src/libnetdata/libjudy/judy-malloc.c index 477da633b378f2..7ce6f0d4b7fa2c 100644 --- a/src/libnetdata/libjudy/judy-malloc.c +++ b/src/libnetdata/libjudy/judy-malloc.c @@ -34,12 +34,12 @@ __attribute__((constructor)) void aral_judy_init(void) { } } -size_t judy_aral_overhead(void) { - return aral_overhead_from_stats(&judy_sizes_aral_statistics); +size_t judy_aral_free_bytes(void) { + return aral_free_bytes_from_stats(&judy_sizes_aral_statistics); } size_t judy_aral_structures(void) { - return aral_structures_from_stats(&judy_sizes_aral_statistics); + return aral_structures_bytes_from_stats(&judy_sizes_aral_statistics); } static ARAL *judy_size_aral(Word_t Words) { @@ -51,11 +51,11 @@ static ARAL *judy_size_aral(Word_t Words) { static __thread int64_t judy_allocated = 0; -void JudyAllocThreadTelemetryReset(void) { +void JudyAllocThreadPulseReset(void) { judy_allocated = 0; } -int64_t JudyAllocThreadTelemetryGetAndReset(void) { +int64_t JudyAllocThreadPulseGetAndReset(void) { int64_t rc = judy_allocated; judy_allocated = 0; return rc; diff --git a/src/libnetdata/libjudy/judy-malloc.h b/src/libnetdata/libjudy/judy-malloc.h index 23e0a55fd7f15e..23e10ca100ced4 100644 --- a/src/libnetdata/libjudy/judy-malloc.h +++ b/src/libnetdata/libjudy/judy-malloc.h @@ -5,10 +5,10 @@ #include "../libnetdata.h" -size_t judy_aral_overhead(void); +size_t judy_aral_free_bytes(void); size_t judy_aral_structures(void); -void JudyAllocThreadTelemetryReset(void); -int64_t JudyAllocThreadTelemetryGetAndReset(void); +void JudyAllocThreadPulseReset(void); +int64_t JudyAllocThreadPulseGetAndReset(void); #endif //NETDATA_JUDY_MALLOC_H diff --git a/src/libnetdata/libjudy/vendored/JudyCommon/JudyPrivate.h b/src/libnetdata/libjudy/vendored/JudyCommon/JudyPrivate.h index 350631f01c93d9..f002f6e22c2cc1 100644 --- a/src/libnetdata/libjudy/vendored/JudyCommon/JudyPrivate.h +++ b/src/libnetdata/libjudy/vendored/JudyCommon/JudyPrivate.h @@ -213,7 +213,7 @@ Leaf |< 3 > | 3 | 2 | 3 | 1 | 2 | 3 | 3 typedef int bool_t; #endif -#define FUNCTION // null; easy to find functions. +#define FUNCTION __attribute__((no_sanitize("shift"))) // null; easy to find functions. #ifndef TRUE #define TRUE 1 diff --git a/src/libnetdata/libjudy/vendored/JudyL/JudyLCascade.c b/src/libnetdata/libjudy/vendored/JudyL/JudyLCascade.c index c1a26f41353763..6b52ddf5fbce0b 100644 --- a/src/libnetdata/libjudy/vendored/JudyL/JudyLCascade.c +++ b/src/libnetdata/libjudy/vendored/JudyL/JudyLCascade.c @@ -311,7 +311,6 @@ static int j__udyStageJBBtoJBB( // // NOTE: Caller must release the Leaf2 that was passed in. -__attribute__((no_sanitize("shift"))) FUNCTION static Pjlb_t j__udyJLL2toJLB1( uint16_t * Pjll, // array of 16-bit indexes. #ifdef JUDYL diff --git a/src/libnetdata/libjudy/vendored/JudyL/JudyLDecascade.c b/src/libnetdata/libjudy/vendored/JudyL/JudyLDecascade.c index c2bf81ea1328c6..39a89eff128997 100644 --- a/src/libnetdata/libjudy/vendored/JudyL/JudyLDecascade.c +++ b/src/libnetdata/libjudy/vendored/JudyL/JudyLDecascade.c @@ -345,7 +345,6 @@ FUNCTION int j__udyBranchUToBranchB( // allocation and free, in order to allow the caller to continue with a LeafB1 // if allocation fails. -__attribute__((no_sanitize("shift"))) FUNCTION int j__udyLeafB1ToLeaf1( Pjp_t Pjp, // points to LeafB1 to shrink. Pvoid_t Pjpm) // for global accounting. @@ -432,7 +431,6 @@ FUNCTION int j__udyLeafB1ToLeaf1( // TBD: In this and all following functions, the caller should already be able // to compute the Pop1 return value, so why return it? -__attribute__((no_sanitize("shift"))) FUNCTION Word_t j__udyLeaf1ToLeaf2( uint16_t * PLeaf2, // destination uint16_t * Index portion of leaf. #ifdef JUDYL diff --git a/src/libnetdata/libjudy/vendored/JudyL/JudyLDel.c b/src/libnetdata/libjudy/vendored/JudyL/JudyLDel.c index 7c3d9108bb1f44..ced4b5fb30905b 100644 --- a/src/libnetdata/libjudy/vendored/JudyL/JudyLDel.c +++ b/src/libnetdata/libjudy/vendored/JudyL/JudyLDel.c @@ -147,7 +147,6 @@ extern Word_t j__udyLLeaf7ToLeafW(Pjlw_t, Pjv_t, Pjp_t, Word_t, Pvoid_t); DBGCODE(uint8_t parentJPtype;) // parent branch JP type. -__attribute__((no_sanitize("shift"))) FUNCTION static int j__udyDelWalk( Pjp_t Pjp, // current JP under which to delete. Word_t Index, // to delete. diff --git a/src/libnetdata/libjudy/vendored/JudyL/JudyLGet.c b/src/libnetdata/libjudy/vendored/JudyL/JudyLGet.c index e6853939be8b58..0bb9971cc89a2b 100644 --- a/src/libnetdata/libjudy/vendored/JudyL/JudyLGet.c +++ b/src/libnetdata/libjudy/vendored/JudyL/JudyLGet.c @@ -44,8 +44,6 @@ // See the manual entry for details. Note support for "shortcut" entries to // trees known to start with a JPM. -__attribute__((no_sanitize("shift"))) - #ifdef JUDY1 #ifdef JUDYGETINLINE diff --git a/src/libnetdata/libjudy/vendored/JudyL/JudyLIns.c b/src/libnetdata/libjudy/vendored/JudyL/JudyLIns.c index 256a1ef3bac738..f96df4101d939e 100644 --- a/src/libnetdata/libjudy/vendored/JudyL/JudyLIns.c +++ b/src/libnetdata/libjudy/vendored/JudyL/JudyLIns.c @@ -152,7 +152,6 @@ extern int j__udyLInsertBranch(Pjp_t Pjp, Word_t Index, Word_t Btype, Pjpm_t); // Return -1 for error (details in JPM), 0 for Index already inserted, 1 for // new Index inserted. -__attribute__((no_sanitize("shift"))) FUNCTION static int j__udyInsWalk( Pjp_t Pjp, // current JP to descend. Word_t Index, // to insert. diff --git a/src/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 62ac5ea08322dc..323fae84b95179 100644 --- a/src/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -454,7 +454,8 @@ void mallocz_release_as_much_memory_to_the_system(void) { spinlock_lock(&spinlock); #ifdef HAVE_C_MALLOPT - size_t trim_threshold = aral_optimal_malloc_page_size(); + // the default is 128KiB + size_t trim_threshold = 65ULL * 1024; mallopt(M_TRIM_THRESHOLD, (int)trim_threshold); #endif diff --git a/src/libnetdata/simple_hashtable/simple_hashtable.h b/src/libnetdata/simple_hashtable/simple_hashtable.h index fe88d23f887f29..9aea7e013bfe25 100644 --- a/src/libnetdata/simple_hashtable/simple_hashtable.h +++ b/src/libnetdata/simple_hashtable/simple_hashtable.h @@ -69,9 +69,13 @@ static inline bool SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION(SIMPLE_HASHTABLE_KEY_T #endif // First layer of macro for token concatenation -#define CONCAT_INTERNAL(a, b) a ## b +#ifndef CONCAT_INDIRECT +#define CONCAT_INDIRECT(a, b) a ## b +#endif // Second layer of macro, which ensures proper expansion -#define CONCAT(a, b) CONCAT_INTERNAL(a, b) +#ifndef CONCAT +#define CONCAT(a, b) CONCAT_INDIRECT(a, b) +#endif // define names for all structures and structures #define simple_hashtable_init_named CONCAT(simple_hashtable_init, SIMPLE_HASHTABLE_NAME) diff --git a/src/libnetdata/string/string.c b/src/libnetdata/string/string.c index 107c7eea5bae32..b778dfe6d878d1 100644 --- a/src/libnetdata/string/string.c +++ b/src/libnetdata/string/string.c @@ -32,7 +32,7 @@ static struct string_partition { size_t deletes; // the number of successful deleted from the index long int entries; // the number of entries in the index - long int memory; // the memory used, without the JudyHS index + long int memory; // the memory used, with JudyHS (accurate) #ifdef NETDATA_INTERNAL_CHECKS // internal statistics @@ -196,10 +196,18 @@ static inline STRING *string_index_insert(const char *str, size_t length) { rw_spinlock_write_lock(&string_base[partition].spinlock); + int64_t mem = 0; + STRING **ptr; { JError_t J_Error; + + JudyAllocThreadPulseReset(); + Pvoid_t *Rc = JudyHSIns(&string_base[partition].JudyHSArray, (void *)str, length - 1, &J_Error); + + mem = JudyAllocThreadPulseGetAndReset(); + if (unlikely(Rc == PJERR)) { fatal( "STRING: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d", @@ -220,7 +228,7 @@ static inline STRING *string_index_insert(const char *str, size_t length) { *ptr = string; string_base[partition].inserts++; string_base[partition].entries++; - string_base[partition].memory += (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(length)); + string_base[partition].memory += (long)(mem_size + mem); } else { // the item is already in the index @@ -256,10 +264,17 @@ static inline void string_index_delete(STRING *string) { #endif bool deleted = false; + int64_t mem = 0; if (likely(string_base[partition].JudyHSArray)) { JError_t J_Error; + + JudyAllocThreadPulseReset(); + int ret = JudyHSDel(&string_base[partition].JudyHSArray, (void *)string->str, string->length - 1, &J_Error); + + mem = JudyAllocThreadPulseGetAndReset(); + if (unlikely(ret == JERR)) { netdata_log_error( "STRING: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d", @@ -276,7 +291,7 @@ static inline void string_index_delete(STRING *string) { size_t mem_size = sizeof(STRING) + string->length; string_base[partition].deletes++; string_base[partition].entries--; - string_base[partition].memory -= (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(string->length)); + string_base[partition].memory -= (long)(mem_size + mem); freez(string); } diff --git a/src/libnetdata/url/url.c b/src/libnetdata/url/url.c index 720a703d4ef705..15846fb129cbfb 100644 --- a/src/libnetdata/url/url.c +++ b/src/libnetdata/url/url.c @@ -19,7 +19,7 @@ char to_hex(char code) { /* Returns an url-encoded version of str */ /* IMPORTANT: be sure to free() the returned string after use */ -char *url_encode(char *str) { +char *url_encode(const char *str) { char *buf, *pbuf; pbuf = buf = mallocz(strlen(str) * 3 + 1); diff --git a/src/libnetdata/url/url.h b/src/libnetdata/url/url.h index f7a67dd5cc37ea..67e57ed3df189d 100644 --- a/src/libnetdata/url/url.h +++ b/src/libnetdata/url/url.h @@ -17,7 +17,7 @@ char to_hex(char code); /* Returns a url-encoded version of str */ /* IMPORTANT: be sure to free() the returned string after use */ -char *url_encode(char *str); +char *url_encode(const char *str); /* Returns a url-decoded version of str */ /* IMPORTANT: be sure to free() the returned string after use */ diff --git a/src/ml/ml.cc b/src/ml/ml.cc index b90da76886aa4f..8e37253c668816 100644 --- a/src/ml/ml.cc +++ b/src/ml/ml.cc @@ -6,6 +6,7 @@ #include "ad_charts.h" #include "database/sqlite/vendored/sqlite3.h" +#include "streaming/stream-control.h" #define WORKER_TRAIN_QUEUE_POP 0 #define WORKER_TRAIN_ACQUIRE_DIMENSION 1 @@ -20,13 +21,6 @@ sqlite3 *ml_db = NULL; static netdata_mutex_t db_mutex = NETDATA_MUTEX_INITIALIZER; typedef struct { - // Time when the request for this response was made - time_t request_time; - - // First/last entry of the dimension in DB when generating the request - time_t first_entry_on_request; - time_t last_entry_on_request; - // First/last entry of the dimension in DB when generating the response time_t first_entry_on_response; time_t last_entry_on_response; @@ -47,14 +41,10 @@ typedef struct { } ml_training_response_t; static std::pair -ml_dimension_calculated_numbers(ml_worker_t *worker, ml_dimension_t *dim, const ml_request_create_new_model_t &req) +ml_dimension_calculated_numbers(ml_worker_t *worker, ml_dimension_t *dim) { ml_training_response_t training_response = {}; - training_response.request_time = req.request_time; - training_response.first_entry_on_request = req.first_entry_on_request; - training_response.last_entry_on_request = req.last_entry_on_request; - training_response.first_entry_on_response = rrddim_first_entry_s_of_tier(dim->rd, 0); training_response.last_entry_on_response = rrddim_last_entry_s_of_tier(dim->rd, 0); @@ -83,7 +73,7 @@ ml_dimension_calculated_numbers(ml_worker_t *worker, ml_dimension_t *dim, const storage_engine_query_init(dim->rd->tiers[0].seb, dim->rd->tiers[0].smh, &handle, training_response.query_after_t, training_response.query_before_t, - STORAGE_PRIORITY_BEST_EFFORT); + STORAGE_PRIORITY_SYNCHRONOUS); size_t idx = 0; memset(worker->training_cns, 0, sizeof(calculated_number_t) * max_n * (Cfg.lag_n + 1)); @@ -637,10 +627,18 @@ static void ml_dimension_update_models(ml_worker_t *worker, ml_dimension_t *dim) } static enum ml_worker_result -ml_dimension_train_model(ml_worker_t *worker, ml_dimension_t *dim, const ml_request_create_new_model_t &req) +ml_dimension_train_model(ml_worker_t *worker, ml_dimension_t *dim) { worker_is_busy(WORKER_TRAIN_QUERY); - auto P = ml_dimension_calculated_numbers(worker, dim, req); + + spinlock_lock(&dim->slock); + if (dim->mt == METRIC_TYPE_CONSTANT) { + spinlock_unlock(&dim->slock); + return ML_WORKER_RESULT_OK; + } + spinlock_unlock(&dim->slock); + + auto P = ml_dimension_calculated_numbers(worker, dim); ml_worker_result worker_result = P.first; ml_training_response_t training_response = P.second; @@ -648,21 +646,8 @@ ml_dimension_train_model(ml_worker_t *worker, ml_dimension_t *dim, const ml_requ spinlock_lock(&dim->slock); dim->mt = METRIC_TYPE_CONSTANT; - - switch (dim->ts) { - case TRAINING_STATUS_PENDING_WITH_MODEL: - dim->ts = TRAINING_STATUS_TRAINED; - break; - case TRAINING_STATUS_PENDING_WITHOUT_MODEL: - dim->ts = TRAINING_STATUS_UNTRAINED; - break; - default: - break; - } - dim->suppression_anomaly_counter = 0; dim->suppression_window_counter = 0; - dim->last_training_time = training_response.last_entry_on_response; spinlock_unlock(&dim->slock); @@ -694,59 +679,8 @@ ml_dimension_train_model(ml_worker_t *worker, ml_dimension_t *dim, const ml_requ return worker_result; } -static void -ml_dimension_schedule_for_training(ml_dimension_t *dim, time_t curr_time) -{ - switch (dim->mt) { - case METRIC_TYPE_CONSTANT: - return; - default: - break; - } - - bool schedule_for_training = false; - - switch (dim->ts) { - case TRAINING_STATUS_PENDING_WITH_MODEL: - case TRAINING_STATUS_PENDING_WITHOUT_MODEL: - schedule_for_training = false; - break; - case TRAINING_STATUS_UNTRAINED: - schedule_for_training = true; - dim->ts = TRAINING_STATUS_PENDING_WITHOUT_MODEL; - break; - case TRAINING_STATUS_SILENCED: - case TRAINING_STATUS_TRAINED: - if ((dim->last_training_time + (Cfg.train_every * dim->rd->rrdset->update_every)) < curr_time) { - schedule_for_training = true; - dim->ts = TRAINING_STATUS_PENDING_WITH_MODEL; - } - break; - } - - if (schedule_for_training) { - ml_request_create_new_model_t req; - - req.DLI = DimensionLookupInfo( - &dim->rd->rrdset->rrdhost->machine_guid[0], - dim->rd->rrdset->id, - dim->rd->id - ); - req.request_time = curr_time; - req.first_entry_on_request = rrddim_first_entry_s(dim->rd); - req.last_entry_on_request = rrddim_last_entry_s(dim->rd); - - ml_host_t *host = (ml_host_t *) dim->rd->rrdset->rrdhost->ml_host; - - ml_queue_item_t item; - item.type = ML_QUEUE_ITEM_TYPE_CREATE_NEW_MODEL; - item.create_new_model = req; - ml_queue_push(host->queue, item); - } -} - bool -ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t value, bool exists) +ml_dimension_predict(ml_dimension_t *dim, calculated_number_t value, bool exists) { // Nothing to do if ML is disabled for this dimension if (dim->mls != MACHINE_LEARNING_STATUS_ENABLED) @@ -791,7 +725,7 @@ ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t ml_features_preprocess(&features); /* - * Lock to predict and possibly schedule the dimension for training + * Lock to predict */ if (spinlock_trylock(&dim->slock) == 0) return false; @@ -800,19 +734,10 @@ ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t if (!same_value) dim->mt = METRIC_TYPE_VARIABLE; - // Decide if the dimension needs to be scheduled for training - ml_dimension_schedule_for_training(dim, curr_time); - - // Nothing to do if we don't have a model - switch (dim->ts) { - case TRAINING_STATUS_UNTRAINED: - case TRAINING_STATUS_PENDING_WITHOUT_MODEL: { - case TRAINING_STATUS_SILENCED: - spinlock_unlock(&dim->slock); - return false; - } - default: - break; + // Ignore silenced dimensions + if (dim->ts == TRAINING_STATUS_SILENCED) { + spinlock_unlock(&dim->slock); + return false; } dim->suppression_window_counter++; @@ -888,18 +813,9 @@ ml_chart_update_dimension(ml_chart_t *chart, ml_dimension_t *dim, bool is_anomal case TRAINING_STATUS_UNTRAINED: chart->mls.num_training_status_untrained++; return; - case TRAINING_STATUS_PENDING_WITHOUT_MODEL: - chart->mls.num_training_status_pending_without_model++; - return; case TRAINING_STATUS_TRAINED: chart->mls.num_training_status_trained++; - chart->mls.num_anomalous_dimensions += is_anomalous; - chart->mls.num_normal_dimensions += !is_anomalous; - return; - case TRAINING_STATUS_PENDING_WITH_MODEL: - chart->mls.num_training_status_pending_with_model++; - chart->mls.num_anomalous_dimensions += is_anomalous; chart->mls.num_normal_dimensions += !is_anomalous; return; @@ -997,6 +913,12 @@ ml_host_detect_once(ml_host_t *host) mls_copy = host->mls; netdata_mutex_unlock(&host->mutex); + + worker_is_busy(WORKER_JOB_DETECTION_DIM_CHART); + ml_update_dimensions_chart(host, mls_copy); + + worker_is_busy(WORKER_JOB_DETECTION_HOST_CHART); + ml_update_host_and_detection_rate_charts(host, host->host_anomaly_rate * 10000.0); } else { host->host_anomaly_rate = 0.0; @@ -1009,12 +931,6 @@ ml_host_detect_once(ml_host_t *host) }; } } - - worker_is_busy(WORKER_JOB_DETECTION_DIM_CHART); - ml_update_dimensions_chart(host, mls_copy); - - worker_is_busy(WORKER_JOB_DETECTION_HOST_CHART); - ml_update_host_and_detection_rate_charts(host, host->host_anomaly_rate * 10000.0); } void * @@ -1129,7 +1045,7 @@ static enum ml_worker_result ml_worker_create_new_model(ml_worker_t *worker, ml_ } ml_dimension_t *Dim = reinterpret_cast(AcqDim.dimension()); - return ml_dimension_train_model(worker, Dim, req); + return ml_dimension_train_model(worker, Dim); } static enum ml_worker_result ml_worker_add_existing_model(ml_worker_t *worker, ml_request_add_existing_model_t req) { @@ -1173,6 +1089,12 @@ void *ml_train_main(void *arg) { worker_register_job_name(WORKER_TRAIN_FLUSH_MODELS, "flush models"); while (!Cfg.training_stop) { + if(!stream_control_ml_should_be_running()) { + worker_is_idle(); + stream_control_throttle(); + continue; + } + worker_is_busy(WORKER_TRAIN_QUEUE_POP); ml_queue_stats_t loop_stats{}; @@ -1195,6 +1117,9 @@ void *ml_train_main(void *arg) { switch (item.type) { case ML_QUEUE_ITEM_TYPE_CREATE_NEW_MODEL: { worker_res = ml_worker_create_new_model(worker, item.create_new_model); + if (worker_res != ML_WORKER_RESULT_NULL_ACQUIRED_DIMENSION) { + ml_queue_push(worker->queue, item); + } break; } case ML_QUEUE_ITEM_TYPE_ADD_EXISTING_MODEL: { diff --git a/src/ml/ml_config.cc b/src/ml/ml_config.cc index 4890e1fa813544..25f6f890fe1a5d 100644 --- a/src/ml/ml_config.cc +++ b/src/ml/ml_config.cc @@ -46,7 +46,7 @@ void ml_config_load(ml_config_t *cfg) { time_t anomaly_detection_query_duration = config_get_duration_seconds(config_section_ml, "anomaly detection grouping duration", 5 * 60); size_t num_worker_threads = config_get_number(config_section_ml, "num training threads", os_get_system_cpus() / 4); - size_t flush_models_batch_size = config_get_number(config_section_ml, "flush models batch size", 128); + size_t flush_models_batch_size = config_get_number(config_section_ml, "flush models batch size", 256); size_t suppression_window = config_get_duration_seconds(config_section_ml, "dimension anomaly rate suppression window", 900); diff --git a/src/ml/ml_dimension.h b/src/ml/ml_dimension.h index 348e963842cad7..3282301fb6df14 100644 --- a/src/ml/ml_dimension.h +++ b/src/ml/ml_dimension.h @@ -29,7 +29,7 @@ struct ml_dimension_t { }; bool -ml_dimension_predict(ml_dimension_t *dim, time_t curr_time, calculated_number_t value, bool exists); +ml_dimension_predict(ml_dimension_t *dim, calculated_number_t value, bool exists); bool ml_dimension_deserialize_kmeans(const char *json_str); diff --git a/src/ml/ml_enums.cc b/src/ml/ml_enums.cc index db8557227f4c31..d0b594efb1c9df 100644 --- a/src/ml/ml_enums.cc +++ b/src/ml/ml_enums.cc @@ -32,10 +32,6 @@ const char * ml_training_status_to_string(enum ml_training_status ts) { switch (ts) { - case TRAINING_STATUS_PENDING_WITH_MODEL: - return "pending-with-model"; - case TRAINING_STATUS_PENDING_WITHOUT_MODEL: - return "pending-without-model"; case TRAINING_STATUS_TRAINED: return "trained"; case TRAINING_STATUS_UNTRAINED: diff --git a/src/ml/ml_enums.h b/src/ml/ml_enums.h index 58ef1b349e84e8..c84c5553a5935d 100644 --- a/src/ml/ml_enums.h +++ b/src/ml/ml_enums.h @@ -27,12 +27,6 @@ enum ml_training_status { // We don't have a model for this dimension TRAINING_STATUS_UNTRAINED, - // Request for training sent, but we don't have any models yet - TRAINING_STATUS_PENDING_WITHOUT_MODEL, - - // Request to update existing models sent - TRAINING_STATUS_PENDING_WITH_MODEL, - // Have a valid, up-to-date model TRAINING_STATUS_TRAINED, diff --git a/src/ml/ml_public.cc b/src/ml/ml_public.cc index 628b00f0e3cc72..03f5ba0a740d2b 100644 --- a/src/ml/ml_public.cc +++ b/src/ml/ml_public.cc @@ -48,7 +48,7 @@ void ml_host_new(RRDHOST *rh) netdata_mutex_init(&host->mutex); spinlock_init(&host->type_anomaly_rate_spinlock); - host->ml_running = true; + host->ml_running = false; rh->ml_host = (rrd_ml_host_t *) host; } @@ -104,13 +104,12 @@ void ml_host_stop(RRDHOST *rh) { spinlock_lock(&dim->slock); - // reset dim - // TODO: should we drop in-mem models, or mark them as stale? Is it - // okay to resume training straight away? - dim->mt = METRIC_TYPE_CONSTANT; dim->ts = TRAINING_STATUS_UNTRAINED; + + // TODO: Check if we can remove this field. dim->last_training_time = 0; + dim->suppression_anomaly_counter = 0; dim->suppression_window_counter = 0; dim->cns.clear(); @@ -290,6 +289,25 @@ void ml_dimension_new(RRDDIM *rd) rd->ml_dimension = (rrd_ml_dimension_t *) dim; metaqueue_ml_load_models(rd); + + // add to worker queue + { + RRDHOST *rh = rd->rrdset->rrdhost; + ml_host_t *host = (ml_host_t *) rh->ml_host; + + ml_queue_item_t item; + item.type = ML_QUEUE_ITEM_TYPE_CREATE_NEW_MODEL; + + ml_request_create_new_model_t req; + req.DLI = DimensionLookupInfo( + &rh->machine_guid[0], + rd->rrdset->id, + rd->id + ); + item.create_new_model = req; + + ml_queue_push(host->queue, item); + } } void ml_dimension_delete(RRDDIM *rd) @@ -318,6 +336,8 @@ void ml_dimension_received_anomaly(RRDDIM *rd, bool is_anomalous) { bool ml_dimension_is_anomalous(RRDDIM *rd, time_t curr_time, double value, bool exists) { + UNUSED(curr_time); + ml_dimension_t *dim = (ml_dimension_t *) rd->ml_dimension; if (!dim) return false; @@ -328,7 +348,7 @@ bool ml_dimension_is_anomalous(RRDDIM *rd, time_t curr_time, double value, bool ml_chart_t *chart = (ml_chart_t *) rd->rrdset->ml_chart; - bool is_anomalous = ml_dimension_predict(dim, curr_time, value, exists); + bool is_anomalous = ml_dimension_predict(dim, value, exists); ml_chart_update_dimension(chart, dim, is_anomalous); return is_anomalous; diff --git a/src/ml/ml_queue.h b/src/ml/ml_queue.h index a0f9d06b60d01b..5aef21077ec7aa 100644 --- a/src/ml/ml_queue.h +++ b/src/ml/ml_queue.h @@ -10,14 +10,6 @@ typedef struct ml_request_create_new_model { DimensionLookupInfo DLI; - - // Creation time of request - time_t request_time; - - // First/last entry of this dimension in DB - // at the point the request was made - time_t first_entry_on_request; - time_t last_entry_on_request; } ml_request_create_new_model_t; typedef struct ml_request_add_existing_model { diff --git a/src/plugins.d/pluginsd_parser.c b/src/plugins.d/pluginsd_parser.c index 486c2f82a9cbbe..f5f71e3079ac98 100644 --- a/src/plugins.d/pluginsd_parser.c +++ b/src/plugins.d/pluginsd_parser.c @@ -202,6 +202,8 @@ static inline PARSER_RC pluginsd_host_define_end(char **words __maybe_unused, si false); rrdhost_option_set(host, RRDHOST_OPTION_VIRTUAL_HOST); + rrdhost_flag_set(host, RRDHOST_FLAG_COLLECTOR_ONLINE); + ml_host_start(host); dyncfg_host_init(host); if(host->rrdlabels) { diff --git a/src/streaming/protocol/command-begin-set-end.c b/src/streaming/protocol/command-begin-set-end.c index 99c6d58afbea3c..9af7613e78eff4 100644 --- a/src/streaming/protocol/command-begin-set-end.c +++ b/src/streaming/protocol/command-begin-set-end.c @@ -30,7 +30,7 @@ stream_send_rrdset_metrics_v1_internal(BUFFER *wb, RRDSET *st, struct sender_sta buffer_fast_strcat(wb, "\n", 1); } else { - internal_error(true, "STREAM: 'host:%s/chart:%s/dim:%s' flag 'exposed' is updated but not exposed", + internal_error(true, "STREAM SEND '%s': 'chart:%s/dim:%s' flag 'exposed' is updated but not exposed", rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); // we will include it in the next iteration rrddim_metadata_updated(rd); diff --git a/src/streaming/protocol/command-nodeid.c b/src/streaming/protocol/command-nodeid.c index e154813473e943..b82195936358de 100644 --- a/src/streaming/protocol/command-nodeid.c +++ b/src/streaming/protocol/command-nodeid.c @@ -51,7 +51,7 @@ void stream_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { ND_UUID claim_id; if (uuid_parse(claim_id_str ? claim_id_str : "", claim_id.uuid) != 0) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s] received invalid claim id '%s'", + "STREAM SEND '%s' [to %s] received invalid claim id '%s'", rrdhost_hostname(s->host), s->connected_to, claim_id_str ? claim_id_str : "(unset)"); return; @@ -60,7 +60,7 @@ void stream_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { ND_UUID node_id; if(uuid_parse(node_id_str ? node_id_str : "", node_id.uuid) != 0) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s] received an invalid node id '%s'", + "STREAM SEND '%s' [to %s] received an invalid node id '%s'", rrdhost_hostname(s->host), s->connected_to, node_id_str ? node_id_str : "(unset)"); return; @@ -68,14 +68,14 @@ void stream_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { if (!UUIDiszero(s->host->aclk.claim_id_of_parent) && !UUIDeq(s->host->aclk.claim_id_of_parent, claim_id)) nd_log(NDLS_DAEMON, NDLP_INFO, - "STREAM %s [send to %s] changed parent's claim id to %s", + "STREAM SEND '%s' [to %s] changed parent's claim id to %s", rrdhost_hostname(s->host), s->connected_to, claim_id_str ? claim_id_str : "(unset)"); if(!UUIDiszero(s->host->node_id) && !UUIDeq(s->host->node_id, node_id)) { if(claimed) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s] parent reports different node id '%s', but we are claimed. Ignoring it.", + "STREAM SEND '%s' [to %s] parent reports different node id '%s', but we are claimed. Ignoring it.", rrdhost_hostname(s->host), s->connected_to, node_id_str ? node_id_str : "(unset)"); return; @@ -83,7 +83,7 @@ void stream_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { else { update_node_id = true; nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s] changed node id to %s", + "STREAM SEND '%s' [to %s] changed node id to %s", rrdhost_hostname(s->host), s->connected_to, node_id_str ? node_id_str : "(unset)"); } @@ -91,7 +91,7 @@ void stream_sender_get_node_and_claim_id_from_parent(struct sender_state *s) { if(!url || !*url) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s] received an invalid cloud URL '%s'", + "STREAM SEND '%s' [to %s] received an invalid cloud URL '%s'", rrdhost_hostname(s->host), s->connected_to, url ? url : "(unset)"); return; diff --git a/src/streaming/protocol/commands.c b/src/streaming/protocol/commands.c index 36d48012b93ef6..b73b47d8909f90 100644 --- a/src/streaming/protocol/commands.c +++ b/src/streaming/protocol/commands.c @@ -19,13 +19,19 @@ RRDSET_STREAM_BUFFER stream_send_metrics_init(RRDSET *st, time_t wall_clock_time // check if we are not connected if(unlikely(!(host_flags & RRDHOST_FLAG_STREAM_SENDER_READY_4_METRICS))) { - if(unlikely(!(host_flags & (RRDHOST_FLAG_STREAM_SENDER_ADDED | RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED)))) + if(unlikely((host_flags & RRDHOST_FLAG_COLLECTOR_ONLINE) && + !(host_flags & RRDHOST_FLAG_STREAM_SENDER_ADDED))) stream_sender_start_host(host); if(unlikely(!(host_flags & RRDHOST_FLAG_STREAM_SENDER_LOGGED_STATUS))) { rrdhost_flag_set(host, RRDHOST_FLAG_STREAM_SENDER_LOGGED_STATUS); + + // this message is logged in 2 cases: + // - the parent is connected, but not yet available for streaming data + // - the parent just disconnected, so local data are not streamed to parent + nd_log(NDLS_DAEMON, NDLP_INFO, - "STREAM SEND %s: connected but streaming is not ready yet...", + "STREAM SEND '%s': streaming is not ready, not sending data to a parent...", rrdhost_hostname(host)); } @@ -33,7 +39,7 @@ RRDSET_STREAM_BUFFER stream_send_metrics_init(RRDSET *st, time_t wall_clock_time } else if(unlikely(host_flags & RRDHOST_FLAG_STREAM_SENDER_LOGGED_STATUS)) { nd_log(NDLS_DAEMON, NDLP_INFO, - "STREAM SEND %s: streaming is ready, sending metrics to parent...", + "STREAM SEND '%s': streaming is ready, sending metrics to parent...", rrdhost_hostname(host)); rrdhost_flag_clear(host, RRDHOST_FLAG_STREAM_SENDER_LOGGED_STATUS); } diff --git a/src/streaming/replication.c b/src/streaming/replication.c index f95fcaf28d1811..755e6c4916807f 100644 --- a/src/streaming/replication.c +++ b/src/streaming/replication.c @@ -50,7 +50,7 @@ struct replication_query_statistics replication_get_query_statistics(void) { return ret; } -size_t replication_buffers_allocated = 0; +static size_t replication_buffers_allocated = 0; size_t replication_allocated_buffers(void) { return __atomic_load_n(&replication_buffers_allocated, __ATOMIC_RELAXED); @@ -155,7 +155,7 @@ static struct replication_query *replication_query_prepare( if (st->last_updated.tv_sec > q->query.before) { #ifdef NETDATA_LOG_REPLICATION_REQUESTS internal_error(true, - "STREAM_SENDER REPLAY: 'host:%s/chart:%s' " + "STREAM SEND REPLAY: 'host:%s/chart:%s' " "has start_streaming = true, " "adjusting replication before timestamp from %llu to %llu", rrdhost_hostname(st->rrdhost), rrdset_id(st), @@ -178,7 +178,7 @@ static struct replication_query *replication_query_prepare( if (unlikely(rd_dfe.counter >= q->dimensions)) { internal_error(true, - "STREAM_SENDER REPLAY ERROR: 'host:%s/chart:%s' has more dimensions than the replicated ones", + "STREAM SEND REPLAY ERROR: 'host:%s/chart:%s' has more dimensions than the replicated ones", rrdhost_hostname(st->rrdhost), rrdset_id(st)); break; } @@ -192,6 +192,7 @@ static struct replication_query *replication_query_prepare( STORAGE_PRIORITY priority = q->query.locked_data_collection ? STORAGE_PRIORITY_HIGH : STORAGE_PRIORITY_LOW; if(synchronous) priority = STORAGE_PRIORITY_SYNCHRONOUS; + stream_control_replication_query_started(); storage_engine_query_init(q->backend, rd->tiers[0].smh, &d->handle, q->query.after, q->query.before, priority); d->enabled = true; @@ -276,6 +277,7 @@ static void replication_query_finalize(BUFFER *wb, struct replication_query *q, if (unlikely(!d->enabled)) continue; storage_engine_query_finalize(&d->handle); + stream_control_replication_query_finished(); dictionary_acquired_item_release(d->dict, d->rda); @@ -362,7 +364,7 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s nd_log_limit_static_global_var(erl, 1, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, - "STREAM_SENDER REPLAY ERROR: 'host:%s/chart:%s/dim:%s': db does not advance the query " + "STREAM SEND REPLAY: 'host:%s/chart:%s/dim:%s': db does not advance the query " "beyond time %llu (tried 1000 times to get the next point and always got back a point in the past)", rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st), rrddim_id(d->rd), (unsigned long long) now); @@ -412,8 +414,7 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s #ifdef NETDATA_INTERNAL_CHECKS nd_log_limit_static_global_var(erl, 1, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING, - "REPLAY WARNING: 'host:%s/chart:%s' " - "misaligned dimensions, " + "STREAM SEND REPLAY WARNING: 'host:%s/chart:%s' misaligned dimensions, " "update every (min: %ld, max: %ld), " "start time (min: %ld, max: %ld), " "end time (min %ld, max %ld), " @@ -448,9 +449,10 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s q->query.before = last_end_time_in_buffer; q->query.enable_streaming = false; - internal_error(true, "REPLICATION: current buffer size %zu is more than the " - "max message size %zu for chart '%s' of host '%s'. " - "Interrupting replication request (%ld to %ld, %s) at %ld to %ld, %s.", + internal_error(true, + "STREAM SEND REPLAY: current buffer size %zu is more than the " + "max message size %zu for chart '%s' of host '%s'. " + "Interrupting replication request (%ld to %ld, %s) at %ld to %ld, %s.", buffer_strlen(wb), max_msg_size, rrdset_id(q->st), rrdhost_hostname(q->st->rrdhost), q->request.after, q->request.before, q->request.enable_streaming?"true":"false", q->query.after, q->query.before, q->query.enable_streaming?"true":"false"); @@ -528,14 +530,14 @@ static bool replication_query_execute(BUFFER *wb, struct replication_query *q, s log_date(actual_after_buf, LOG_DATE_LENGTH, actual_after); log_date(actual_before_buf, LOG_DATE_LENGTH, actual_before); internal_error(true, - "STREAM_SENDER REPLAY: 'host:%s/chart:%s': sending data %llu [%s] to %llu [%s] (requested %llu [delta %lld] to %llu [delta %lld])", + "STREAM SEND REPLAY: 'host:%s/chart:%s': sending data %llu [%s] to %llu [%s] (requested %llu [delta %lld] to %llu [delta %lld])", rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st), (unsigned long long)actual_after, actual_after_buf, (unsigned long long)actual_before, actual_before_buf, (unsigned long long)after, (long long)(actual_after - after), (unsigned long long)before, (long long)(actual_before - before)); } else internal_error(true, - "STREAM_SENDER REPLAY: 'host:%s/chart:%s': nothing to send (requested %llu to %llu)", + "STREAM SEND REPLAY: 'host:%s/chart:%s': nothing to send (requested %llu to %llu)", rrdhost_hostname(q->st->rrdhost), rrdset_id(q->st), (unsigned long long)after, (unsigned long long)before); #endif // NETDATA_LOG_REPLICATION_REQUESTS @@ -706,12 +708,14 @@ bool replication_response_execute_and_finalize(struct replication_query *q, size st->stream.snd.resync_time_s = 0; #ifdef NETDATA_LOG_REPLICATION_REQUESTS - internal_error(true, "STREAM_SENDER REPLAY: 'host:%s/chart:%s' streaming starts", + internal_error(true, "STREAM SEND REPLAY: 'host:%s/chart:%s' streaming starts", rrdhost_hostname(st->rrdhost), rrdset_id(st)); #endif } else - internal_error(true, "REPLAY ERROR: 'host:%s/chart:%s' received start streaming command, but the chart is not in progress replicating", + internal_error(true, + "STREAM SEND REPLAY ERROR: 'host:%s/chart:%s' " + "received start streaming command, but the chart is not in progress replicating", rrdhost_hostname(st->rrdhost), rrdset_id(st)); } } @@ -771,7 +775,7 @@ static void replicate_log_request(struct replication_request_details *r, const c nd_log_limit_static_global_var(erl, 1, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, #endif - "REPLAY ERROR: 'host:%s/chart:%s' child sent: " + "STREAM SEND REPLAY ERROR: 'host:%s/chart:%s' child sent: " "db from %ld to %ld%s, wall clock time %ld, " "last request from %ld to %ld, " "issue: %s - " @@ -809,7 +813,7 @@ static bool send_replay_chart_cmd(struct replication_request_details *r, const c log_date(wanted_before_buf, LOG_DATE_LENGTH, r->wanted.before); internal_error(true, - "REPLAY: 'host:%s/chart:%s' sending replication request %ld [%s] to %ld [%s], start streaming '%s': %s: " + "STREAM SEND REPLAY: 'host:%s/chart:%s' sending replication request %ld [%s] to %ld [%s], start streaming '%s': %s: " "last[%ld - %ld] child[%ld - %ld, now %ld %s] local[%ld - %ld, now %ld] gap[%ld - %ld %s] %s" , rrdhost_hostname(r->host), rrdset_id(r->st) , r->wanted.after, wanted_after_buf @@ -838,7 +842,7 @@ static bool send_replay_chart_cmd(struct replication_request_details *r, const c ssize_t ret = r->caller.callback(buffer, r->caller.parser, STREAM_TRAFFIC_TYPE_REPLICATION); if (ret < 0) { - netdata_log_error("REPLAY ERROR: 'host:%s/chart:%s' failed to send replication request to child (error %zd)", + netdata_log_error("STREAM SEND REPLAY ERROR: 'host:%s/chart:%s' failed to send replication request to child (error %zd)", rrdhost_hostname(r->host), rrdset_id(r->st), ret); return false; } @@ -1277,7 +1281,7 @@ static void replication_sort_entry_del(struct replication_request *rq, bool buff } if (!rse_to_delete) - fatal("REPLAY: 'host:%s/chart:%s' Cannot find sort entry to delete for time %ld.", + fatal("STREAM SEND REPLAY: 'host:%s/chart:%s' Cannot find sort entry to delete for time %ld.", rrdhost_hostname(rq->sender->host), string2str(rq->chart_id), rq->after); } @@ -1380,7 +1384,7 @@ static bool replication_request_conflict_callback(const DICTIONARY_ITEM *item __ // we can replace this command internal_error( true, - "STREAM %s [send to %s]: REPLAY: 'host:%s/chart:%s' replacing duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", + "STREAM SEND '%s' [to %s]: REPLAY: 'host:%s/chart:%s' replacing duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", rrdhost_hostname(s->host), s->connected_to, rrdhost_hostname(s->host), dictionary_acquired_item_name(item), (unsigned long long)rq->after, (unsigned long long)rq->before, rq->start_streaming ? "true" : "false", (unsigned long long)rq_new->after, (unsigned long long)rq_new->before, rq_new->start_streaming ? "true" : "false"); @@ -1393,7 +1397,7 @@ static bool replication_request_conflict_callback(const DICTIONARY_ITEM *item __ replication_sort_entry_add(rq); internal_error( true, - "STREAM %s [send to %s]: REPLAY: 'host:%s/chart:%s' adding duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", + "STREAM SEND '%s' [to %s]: REPLAY: 'host:%s/chart:%s' adding duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", rrdhost_hostname(s->host), s->connected_to, rrdhost_hostname(s->host), dictionary_acquired_item_name(item), (unsigned long long)rq->after, (unsigned long long)rq->before, rq->start_streaming ? "true" : "false", (unsigned long long)rq_new->after, (unsigned long long)rq_new->before, rq_new->start_streaming ? "true" : "false"); @@ -1401,7 +1405,7 @@ static bool replication_request_conflict_callback(const DICTIONARY_ITEM *item __ else { internal_error( true, - "STREAM %s [send to %s]: REPLAY: 'host:%s/chart:%s' ignoring duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", + "STREAM SEND '%s' [to %s]: REPLAY: 'host:%s/chart:%s' ignoring duplicate replication command received (existing from %llu to %llu [%s], new from %llu to %llu [%s])", rrdhost_hostname(s->host), s->connected_to, rrdhost_hostname(s->host), dictionary_acquired_item_name(item), (unsigned long long) rq->after, (unsigned long long) rq->before, rq->start_streaming ? "true" : "false", @@ -1445,7 +1449,7 @@ static bool replication_execute_request(struct replication_request *rq, bool wor } if(!rq->st) { - internal_error(true, "REPLAY ERROR: 'host:%s/chart:%s' not found", + internal_error(true, "STREAM SEND REPLAY ERROR: 'host:%s/chart:%s' not found", rrdhost_hostname(rq->sender->host), string2str(rq->chart_id)); goto cleanup; @@ -1573,7 +1577,8 @@ static size_t verify_host_charts_are_streaming_now(RRDHOST *host) { host->sender && !stream_sender_pending_replication_requests(host->sender) && dictionary_entries(host->sender->replication.requests) != 0, - "REPLICATION SUMMARY: 'host:%s' reports %zu pending replication requests, but its chart replication index says there are %zu charts pending replication", + "STREAM SEND REPLAY SUMMARY: 'host:%s' reports %zu pending replication requests, " + "but its chart replication index says there are %zu charts pending replication", rrdhost_hostname(host), stream_sender_pending_replication_requests(host->sender), dictionary_entries(host->sender->replication.requests) @@ -1591,7 +1596,7 @@ static size_t verify_host_charts_are_streaming_now(RRDHOST *host) { if(!flags) { internal_error( true, - "REPLICATION SUMMARY: 'host:%s/chart:%s' is neither IN PROGRESS nor FINISHED", + "STREAM SEND REPLAY SUMMARY: 'host:%s/chart:%s' is neither IN PROGRESS nor FINISHED", rrdhost_hostname(host), rrdset_id(st) ); is_error = true; @@ -1600,7 +1605,7 @@ static size_t verify_host_charts_are_streaming_now(RRDHOST *host) { if(!(flags & RRDSET_FLAG_SENDER_REPLICATION_FINISHED) || (flags & RRDSET_FLAG_SENDER_REPLICATION_IN_PROGRESS)) { internal_error( true, - "REPLICATION SUMMARY: 'host:%s/chart:%s' is IN PROGRESS although replication is finished", + "STREAM SEND REPLAY SUMMARY: 'host:%s/chart:%s' is IN PROGRESS although replication is finished", rrdhost_hostname(host), rrdset_id(st) ); is_error = true; @@ -1614,7 +1619,7 @@ static size_t verify_host_charts_are_streaming_now(RRDHOST *host) { rrdset_foreach_done(st); internal_error(errors, - "REPLICATION SUMMARY: 'host:%s' finished replicating %zu charts, but %zu charts are still in progress although replication finished", + "STREAM SEND REPLAY SUMMARY: 'host:%s' finished replicating %zu charts, but %zu charts are still in progress although replication finished", rrdhost_hostname(host), ok, errors); return errors; @@ -1830,6 +1835,12 @@ static void *replication_worker_thread(void *ptr __maybe_unused) { replication_initialize_workers(false); while (service_running(SERVICE_REPLICATION)) { + if(!stream_control_replication_should_be_running()) { + worker_is_idle(); + stream_control_throttle(); + continue; + } + if (unlikely(replication_pipeline_execute_next() == REQUEST_QUEUE_EMPTY)) { sender_commit_thread_buffer_free(); worker_is_busy(WORKER_JOB_WAIT); @@ -1880,7 +1891,7 @@ void *replication_thread_main(void *ptr) { int nodes = (int)dictionary_entries(rrdhost_root_index); int cpus = (int)get_netdata_cpus(); - int threads = MIN(cpus * 1 / 3, nodes / 10); + int threads = cpus / 2; if (threads < 1) threads = 1; else if (threads > MAX_REPLICATION_THREADS) threads = MAX_REPLICATION_THREADS; @@ -1926,6 +1937,12 @@ void *replication_thread_main(void *ptr) { while(service_running(SERVICE_REPLICATION)) { + if(!stream_control_replication_should_be_running()) { + worker_is_idle(); + stream_control_throttle(); + continue; + } + // statistics usec_t now_mono_ut = now_monotonic_usec(); if(unlikely(now_mono_ut - last_now_mono_ut > default_rrd_update_every * USEC_PER_SEC)) { diff --git a/src/streaming/replication.h b/src/streaming/replication.h index 10d8fba3ac6122..3fd88621a6c6e8 100644 --- a/src/streaming/replication.h +++ b/src/streaming/replication.h @@ -6,6 +6,10 @@ #include "daemon/common.h" #include "stream-circular-buffer.h" +#ifdef __cplusplus +extern "C" { +#endif + struct parser; struct replication_query_statistics { @@ -36,4 +40,8 @@ void replication_recalculate_buffer_used_ratio_unsafe(struct sender_state *s); size_t replication_allocated_memory(void); size_t replication_allocated_buffers(void); +#ifdef __cplusplus +} +#endif + #endif /* REPLICATION_H */ diff --git a/src/streaming/rrdhost-status.c b/src/streaming/rrdhost-status.c index 22ce3de89c7b22..f069e375753680 100644 --- a/src/streaming/rrdhost-status.c +++ b/src/streaming/rrdhost-status.c @@ -132,7 +132,7 @@ void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) { rrdhost_receiver_lock(host); s->ingest.hops = (int16_t)(host->system_info ? host->system_info->hops : (host == localhost) ? 0 : 1); bool has_receiver = false; - if (host->receiver && !rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED)) { + if (host->receiver && rrdhost_flag_check(host, RRDHOST_FLAG_COLLECTOR_ONLINE)) { has_receiver = true; s->ingest.replication.instances = rrdhost_receiver_replicating_charts(host); s->ingest.replication.completion = host->stream.rcv.status.replication.percent; diff --git a/src/streaming/stream-capabilities.c b/src/streaming/stream-capabilities.c index dfdde5a7f69585..83671dc7002e69 100644 --- a/src/streaming/stream-capabilities.c +++ b/src/streaming/stream-capabilities.c @@ -80,7 +80,7 @@ void log_receiver_capabilities(struct receiver_state *rpt) { BUFFER *wb = buffer_create(100, NULL); stream_capabilities_to_string(wb, rpt->capabilities); - nd_log_daemon(NDLP_INFO, "STREAM %s [receive from [%s]:%s]: established link with negotiated capabilities: %s", + nd_log_daemon(NDLP_INFO, "STREAM RECEIVE '%s' [from [%s]:%s]: established link with negotiated capabilities: %s", rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, buffer_tostring(wb)); buffer_free(wb); @@ -90,7 +90,7 @@ void log_sender_capabilities(struct sender_state *s) { BUFFER *wb = buffer_create(100, NULL); stream_capabilities_to_string(wb, s->capabilities); - nd_log_daemon(NDLP_INFO, "STREAM %s [send to %s]: established link with negotiated capabilities: %s", + nd_log_daemon(NDLP_INFO, "STREAM SEND '%s' [to %s]: established link with negotiated capabilities: %s", rrdhost_hostname(s->host), s->connected_to, buffer_tostring(wb)); buffer_free(wb); diff --git a/src/streaming/stream-circular-buffer.c b/src/streaming/stream-circular-buffer.c index 60386755f7acd7..dd7cb876ad3c14 100644 --- a/src/streaming/stream-circular-buffer.c +++ b/src/streaming/stream-circular-buffer.c @@ -3,8 +3,6 @@ #include "stream.h" #include "stream-sender-internals.h" -#define STREAM_CIRCULAR_BUFFER_ADAPT_TO_TIMES_MAX_SIZE 3 - struct stream_circular_buffer { struct circular_buffer *cb; STREAM_CIRCULAR_BUFFER_STATS stats; @@ -41,10 +39,9 @@ STREAM_CIRCULAR_BUFFER *stream_circular_buffer_create(void) { } // returns true if it increased the buffer size -bool stream_circular_buffer_set_max_size_unsafe(STREAM_CIRCULAR_BUFFER *scb, size_t uncompressed_msg_size, bool force) { - size_t wanted = uncompressed_msg_size * STREAM_CIRCULAR_BUFFER_ADAPT_TO_TIMES_MAX_SIZE; - if(force || scb->cb->max_size < wanted) { - scb->cb->max_size = wanted; +bool stream_circular_buffer_set_max_size_unsafe(STREAM_CIRCULAR_BUFFER *scb, size_t max_size, bool force) { + if(force || scb->cb->max_size < max_size) { + scb->cb->max_size = max_size; scb->stats.bytes_max_size = scb->cb->max_size; __atomic_store_n(&scb->atomic.max_size, scb->cb->max_size, __ATOMIC_RELAXED); stream_circular_buffer_stats_update_unsafe(scb); @@ -81,8 +78,9 @@ void stream_circular_buffer_recreate_timed_unsafe(STREAM_CIRCULAR_BUFFER *scb, u scb->stats.recreates++; // we increase even if we don't do it, to have sender_start() recreate its buffers if(scb->cb && scb->cb->size > CBUFFER_INITIAL_SIZE) { + size_t max_size = scb->cb->max_size; cbuffer_free(scb->cb); - scb->cb = cbuffer_new(CBUFFER_INITIAL_SIZE, stream_send.buffer_max_size, &netdata_buffers_statistics.cbuffers_streaming); + scb->cb = cbuffer_new(CBUFFER_INITIAL_SIZE, max_size, &netdata_buffers_statistics.cbuffers_streaming); } } @@ -96,15 +94,22 @@ void stream_circular_buffer_destroy(STREAM_CIRCULAR_BUFFER *scb) { } // adds data to the circular buffer, returns false when it can't (buffer is full) -bool stream_circular_buffer_add_unsafe(STREAM_CIRCULAR_BUFFER *scb, const char *data, size_t bytes_actual, size_t bytes_uncompressed, STREAM_TRAFFIC_TYPE type) { +bool stream_circular_buffer_add_unsafe( + STREAM_CIRCULAR_BUFFER *scb, const char *data, + size_t bytes_actual, size_t bytes_uncompressed, STREAM_TRAFFIC_TYPE type, bool autoscale) { scb->stats.adds++; scb->stats.bytes_added += bytes_actual; scb->stats.bytes_uncompressed += bytes_uncompressed; scb->stats.bytes_sent_by_type[type] += bytes_actual; - bool rc = cbuffer_add_unsafe(scb->cb, data, bytes_actual) == 0; - if(rc) - stream_circular_buffer_stats_update_unsafe(scb); - return rc; + + if(unlikely(autoscale && cbuffer_available_size_unsafe(scb->cb) < bytes_actual)) + stream_circular_buffer_set_max_size_unsafe(scb, scb->cb->max_size * 2, true); + + if(unlikely(cbuffer_add_unsafe(scb->cb, data, bytes_actual) != 0)) + return false; + + stream_circular_buffer_stats_update_unsafe(scb); + return true; } // return the first available chunk at the beginning of the buffer diff --git a/src/streaming/stream-circular-buffer.h b/src/streaming/stream-circular-buffer.h index 2b40898cd7a5e4..7fa2552483a213 100644 --- a/src/streaming/stream-circular-buffer.h +++ b/src/streaming/stream-circular-buffer.h @@ -6,10 +6,16 @@ #include "libnetdata/libnetdata.h" #include "stream-traffic-types.h" +#ifdef __cplusplus +extern "C" { +#endif + #define CBUFFER_INITIAL_SIZE (16 * 1024) #define CBUFFER_INITIAL_MAX_SIZE (10 * 1024 * 1024) #define THREAD_BUFFER_INITIAL_SIZE (8192) +#define STREAM_CIRCULAR_BUFFER_ADAPT_TO_TIMES_MAX_SIZE 3 + typedef struct stream_circular_buffer_stats { size_t adds; size_t sends; @@ -48,7 +54,7 @@ void stream_circular_buffer_recreate_timed_unsafe(STREAM_CIRCULAR_BUFFER *scb, u // returns true if it increased the buffer size // if it changes the size, it updates the statistics -bool stream_circular_buffer_set_max_size_unsafe(STREAM_CIRCULAR_BUFFER *scb, size_t uncompressed_msg_size, bool force); +bool stream_circular_buffer_set_max_size_unsafe(STREAM_CIRCULAR_BUFFER *scb, size_t max_size, bool force); // returns a pointer to the current circular buffer statistics // copy it if you plan to use it without a lock @@ -71,7 +77,9 @@ usec_t stream_circular_buffer_get_since_ut(STREAM_CIRCULAR_BUFFER *scb); // adds data to the end of the circular buffer, returns false when it can't (buffer is full) // it updates the statistics -bool stream_circular_buffer_add_unsafe(STREAM_CIRCULAR_BUFFER *scb, const char *data, size_t bytes_actual, size_t bytes_uncompressed, STREAM_TRAFFIC_TYPE type); +bool stream_circular_buffer_add_unsafe( + STREAM_CIRCULAR_BUFFER *scb, const char *data, size_t bytes_actual, size_t bytes_uncompressed, + STREAM_TRAFFIC_TYPE type, bool autoscale); // returns a pointer to the beginning of the buffer, and its size in bytes size_t stream_circular_buffer_get_unsafe(STREAM_CIRCULAR_BUFFER *scb, char **chunk); @@ -80,4 +88,8 @@ size_t stream_circular_buffer_get_unsafe(STREAM_CIRCULAR_BUFFER *scb, char **chu // it updates the statistics void stream_circular_buffer_del_unsafe(STREAM_CIRCULAR_BUFFER *scb, size_t bytes); +#ifdef __cplusplus +} +#endif + #endif //NETDATA_STREAM_CIRCULAR_BUFFER_H diff --git a/src/streaming/stream-compression/compression.h b/src/streaming/stream-compression/compression.h index 4e65dfd908f26e..67bc8c0829564f 100644 --- a/src/streaming/stream-compression/compression.h +++ b/src/streaming/stream-compression/compression.h @@ -124,7 +124,10 @@ static inline size_t stream_decompress_decode_signature(const char *data, size_t if (unlikely(data_size != STREAM_COMPRESSION_SIGNATURE_SIZE)) return 0; - stream_compression_signature_t sign = *(stream_compression_signature_t *)data; + stream_compression_signature_t sign; + memcpy(&sign, data, sizeof(stream_compression_signature_t)); // Safe copy to aligned variable + // stream_compression_signature_t sign = *(stream_compression_signature_t *)data; + if (unlikely((sign & STREAM_COMPRESSION_SIGNATURE_MASK) != STREAM_COMPRESSION_SIGNATURE)) return 0; diff --git a/src/streaming/stream-conf.c b/src/streaming/stream-conf.c index 9630b02d04d170..1b14e925afa318 100644 --- a/src/streaming/stream-conf.c +++ b/src/streaming/stream-conf.c @@ -194,7 +194,7 @@ void stream_conf_receiver_config(struct receiver_state *rpt, struct stream_recei rrd_memory_mode_name(default_rrd_memory_mode)))); if (unlikely(config->mode == RRD_MEMORY_MODE_DBENGINE && !dbengine_enabled)) { - netdata_log_error("STREAM '%s' [receive from %s:%s]: " + netdata_log_error("STREAM RECEIVE '%s' [from [%s]:%s]: " "dbengine is not enabled, falling back to default." , rpt->hostname , rpt->client_ip, rpt->client_port @@ -270,11 +270,8 @@ void stream_conf_receiver_config(struct receiver_state *rpt, struct stream_recei stream_parse_compression_order( config, appconfig_get( - &stream_config, - machine_guid, - "compression algorithms order", - appconfig_get( - &stream_config, api_key, "compression algorithms order", STREAM_COMPRESSION_ALGORITHMS_ORDER))); + &stream_config, machine_guid, "compression algorithms order", + appconfig_get(&stream_config, api_key, "compression algorithms order", STREAM_COMPRESSION_ALGORITHMS_ORDER))); } config->ephemeral = diff --git a/src/streaming/stream-connector.c b/src/streaming/stream-connector.c index 11f9872365f087..0befff248839a9 100644 --- a/src/streaming/stream-connector.c +++ b/src/streaming/stream-connector.c @@ -2,49 +2,6 @@ #include "stream-sender-internals.h" -typedef struct { - char *os_name; - char *os_id; - char *os_version; - char *kernel_name; - char *kernel_version; -} stream_encoded_t; - -static void rrdpush_encode_variable(stream_encoded_t *se, RRDHOST *host) { - se->os_name = (host->system_info->host_os_name)?url_encode(host->system_info->host_os_name):strdupz(""); - se->os_id = (host->system_info->host_os_id)?url_encode(host->system_info->host_os_id):strdupz(""); - se->os_version = (host->system_info->host_os_version)?url_encode(host->system_info->host_os_version):strdupz(""); - se->kernel_name = (host->system_info->kernel_name)?url_encode(host->system_info->kernel_name):strdupz(""); - se->kernel_version = (host->system_info->kernel_version)?url_encode(host->system_info->kernel_version):strdupz(""); -} - -static void rrdpush_clean_encoded(stream_encoded_t *se) { - if (se->os_name) { - freez(se->os_name); - se->os_name = NULL; - } - - if (se->os_id) { - freez(se->os_id); - se->os_id = NULL; - } - - if (se->os_version) { - freez(se->os_version); - se->os_version = NULL; - } - - if (se->kernel_name) { - freez(se->kernel_name); - se->kernel_name = NULL; - } - - if (se->kernel_version) { - freez(se->kernel_version); - se->kernel_version = NULL; - } -} - static struct { const char *response; const char *status; @@ -152,7 +109,7 @@ static struct { .dynamic = false, .error = "remote server is initializing, we should try later", .worker_job_id = WORKER_SENDER_CONNECTOR_JOB_DISCONNECT_BAD_HANDSHAKE, - .postpone_reconnect_seconds = 2 * 60, // 2 minute + .postpone_reconnect_seconds = 30, // 30 seconds .priority = NDLP_NOTICE, }, @@ -303,12 +260,23 @@ stream_connect_validate_first_response(RRDHOST *host, struct sender_state *s, ch rfc3339_datetime_ut(buf, sizeof(buf), stream_parent_get_reconnection_ut(host->stream.snd.parents.current), 0, false); nd_log(NDLS_DAEMON, priority, - "STREAM %s [send to %s]: %s - will retry in %d secs, at %s", + "STREAM CONNECT '%s' [to %s]: %s - will retry in %d secs, at %s", rrdhost_hostname(host), s->connected_to, error, delay, buf); return false; } +static inline void buffer_key_value_urlencode(BUFFER *wb, const char *key, const char *value) { + char *encoded = NULL; + + if(value && *value) + encoded = url_encode(value); + + buffer_sprintf(wb, "%s=%s", key, encoded ? encoded : ""); + + freez(encoded); +} + bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeout) { worker_is_busy(WORKER_SENDER_CONNECTOR_JOB_CONNECTING); @@ -342,104 +310,53 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou /* TODO: During the implementation of #7265 switch the set of variables to HOST_* and CONTAINER_* if the version negotiation resulted in a high enough version. */ - stream_encoded_t se; - rrdpush_encode_variable(&se, host); - - char http[HTTP_HEADER_SIZE + 1]; - int eol = snprintfz(http, HTTP_HEADER_SIZE, - "STREAM " - "key=%s" - "&hostname=%s" - "®istry_hostname=%s" - "&machine_guid=%s" - "&update_every=%d" - "&os=%s" - "&timezone=%s" - "&abbrev_timezone=%s" - "&utc_offset=%d" - "&hops=%d" - "&ml_capable=%d" - "&ml_enabled=%d" - "&mc_version=%d" - "&ver=%u" - "&NETDATA_INSTANCE_CLOUD_TYPE=%s" - "&NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=%s" - "&NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=%s" - "&NETDATA_SYSTEM_OS_NAME=%s" - "&NETDATA_SYSTEM_OS_ID=%s" - "&NETDATA_SYSTEM_OS_ID_LIKE=%s" - "&NETDATA_SYSTEM_OS_VERSION=%s" - "&NETDATA_SYSTEM_OS_VERSION_ID=%s" - "&NETDATA_SYSTEM_OS_DETECTION=%s" - "&NETDATA_HOST_IS_K8S_NODE=%s" - "&NETDATA_SYSTEM_KERNEL_NAME=%s" - "&NETDATA_SYSTEM_KERNEL_VERSION=%s" - "&NETDATA_SYSTEM_ARCHITECTURE=%s" - "&NETDATA_SYSTEM_VIRTUALIZATION=%s" - "&NETDATA_SYSTEM_VIRT_DETECTION=%s" - "&NETDATA_SYSTEM_CONTAINER=%s" - "&NETDATA_SYSTEM_CONTAINER_DETECTION=%s" - "&NETDATA_CONTAINER_OS_NAME=%s" - "&NETDATA_CONTAINER_OS_ID=%s" - "&NETDATA_CONTAINER_OS_ID_LIKE=%s" - "&NETDATA_CONTAINER_OS_VERSION=%s" - "&NETDATA_CONTAINER_OS_VERSION_ID=%s" - "&NETDATA_CONTAINER_OS_DETECTION=%s" - "&NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=%s" - "&NETDATA_SYSTEM_CPU_FREQ=%s" - "&NETDATA_SYSTEM_TOTAL_RAM=%s" - "&NETDATA_SYSTEM_TOTAL_DISK_SIZE=%s" - "&NETDATA_PROTOCOL_VERSION=%s" - HTTP_1_1 HTTP_ENDL - "User-Agent: %s/%s" HTTP_ENDL - "Accept: */*" HTTP_HDR_END - , string2str(host->stream.snd.api_key) - , rrdhost_hostname(host) - , rrdhost_registry_hostname(host) - , host->machine_guid - , default_rrd_update_every - , rrdhost_os(host) - , rrdhost_timezone(host) - , rrdhost_abbrev_timezone(host) - , host->utc_offset - , s->hops - , host->system_info->ml_capable - , host->system_info->ml_enabled - , host->system_info->mc_version - , s->capabilities - , (host->system_info->cloud_provider_type) ? host->system_info->cloud_provider_type : "" - , (host->system_info->cloud_instance_type) ? host->system_info->cloud_instance_type : "" - , (host->system_info->cloud_instance_region) ? host->system_info->cloud_instance_region : "" - , se.os_name - , se.os_id - , (host->system_info->host_os_id_like) ? host->system_info->host_os_id_like : "" - , se.os_version - , (host->system_info->host_os_version_id) ? host->system_info->host_os_version_id : "" - , (host->system_info->host_os_detection) ? host->system_info->host_os_detection : "" - , (host->system_info->is_k8s_node) ? host->system_info->is_k8s_node : "" - , se.kernel_name - , se.kernel_version - , (host->system_info->architecture) ? host->system_info->architecture : "" - , (host->system_info->virtualization) ? host->system_info->virtualization : "" - , (host->system_info->virt_detection) ? host->system_info->virt_detection : "" - , (host->system_info->container) ? host->system_info->container : "" - , (host->system_info->container_detection) ? host->system_info->container_detection : "" - , (host->system_info->container_os_name) ? host->system_info->container_os_name : "" - , (host->system_info->container_os_id) ? host->system_info->container_os_id : "" - , (host->system_info->container_os_id_like) ? host->system_info->container_os_id_like : "" - , (host->system_info->container_os_version) ? host->system_info->container_os_version : "" - , (host->system_info->container_os_version_id) ? host->system_info->container_os_version_id : "" - , (host->system_info->container_os_detection) ? host->system_info->container_os_detection : "" - , (host->system_info->host_cores) ? host->system_info->host_cores : "" - , (host->system_info->host_cpu_freq) ? host->system_info->host_cpu_freq : "" - , (host->system_info->host_ram_total) ? host->system_info->host_ram_total : "" - , (host->system_info->host_disk_space) ? host->system_info->host_disk_space : "" - , STREAMING_PROTOCOL_VERSION - , rrdhost_program_name(host) - , rrdhost_program_version(host) - ); - http[eol] = 0x00; - rrdpush_clean_encoded(&se); + CLEAN_BUFFER *wb = buffer_create(0, NULL); + buffer_strcat(wb, "STREAM "); + buffer_key_value_urlencode(wb, "key", string2str(host->stream.snd.api_key)); + buffer_key_value_urlencode(wb, "&hostname", rrdhost_hostname(host)); + buffer_key_value_urlencode(wb, "®istry_hostname", rrdhost_registry_hostname(host)); + buffer_key_value_urlencode(wb, "&machine_guid", host->machine_guid); + buffer_sprintf(wb, "&update_every=%d", default_rrd_update_every); + buffer_key_value_urlencode(wb, "&os", rrdhost_os(host)); + buffer_key_value_urlencode(wb, "&timezone", rrdhost_timezone(host)); + buffer_key_value_urlencode(wb, "&abbrev_timezone", rrdhost_abbrev_timezone(host)); + buffer_sprintf(wb, "&utc_offset=%d", host->utc_offset); + buffer_sprintf(wb, "&hops=%d", s->hops); + buffer_sprintf(wb, "&ml_capable=%d", host->system_info->ml_capable); + buffer_sprintf(wb, "&ml_enabled=%d", host->system_info->ml_enabled); + buffer_sprintf(wb, "&mc_version=%d", host->system_info->mc_version); + buffer_sprintf(wb, "&ver=%u", s->capabilities); + buffer_key_value_urlencode(wb, "&NETDATA_INSTANCE_CLOUD_TYPE", host->system_info->cloud_provider_type); + buffer_key_value_urlencode(wb, "&NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE", host->system_info->cloud_instance_type); + buffer_key_value_urlencode(wb, "&NETDATA_INSTANCE_CLOUD_INSTANCE_REGION", host->system_info->cloud_instance_region); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_NAME", host->system_info->host_os_name); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_ID", host->system_info->host_os_id); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_ID_LIKE", host->system_info->host_os_id_like); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_VERSION", host->system_info->host_os_version); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_VERSION_ID", host->system_info->host_os_version_id); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_OS_DETECTION", host->system_info->host_os_detection); + buffer_key_value_urlencode(wb, "&NETDATA_HOST_IS_K8S_NODE", host->system_info->is_k8s_node); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_KERNEL_NAME", host->system_info->kernel_name); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_KERNEL_VERSION", host->system_info->kernel_version); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_ARCHITECTURE", host->system_info->architecture); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_VIRTUALIZATION", host->system_info->virtualization); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_VIRT_DETECTION", host->system_info->virt_detection); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_CONTAINER", host->system_info->container); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_CONTAINER_DETECTION", host->system_info->container_detection); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_NAME", host->system_info->container_os_name); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_ID", host->system_info->container_os_id); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_ID_LIKE", host->system_info->container_os_id_like); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_VERSION", host->system_info->container_os_version); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_VERSION_ID", host->system_info->container_os_version_id); + buffer_key_value_urlencode(wb, "&NETDATA_CONTAINER_OS_DETECTION", host->system_info->container_os_detection); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", host->system_info->host_cores); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_CPU_FREQ", host->system_info->host_cpu_freq); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_TOTAL_RAM", host->system_info->host_ram_total); + buffer_key_value_urlencode(wb, "&NETDATA_SYSTEM_TOTAL_DISK_SIZE", host->system_info->host_disk_space); + buffer_key_value_urlencode(wb, "&NETDATA_PROTOCOL_VERSION", STREAMING_PROTOCOL_VERSION); + buffer_strcat(wb, HTTP_1_1 HTTP_ENDL); + buffer_sprintf(wb, "User-Agent: %s/%s" HTTP_ENDL, rrdhost_program_name(host), rrdhost_program_version(host)); + buffer_strcat(wb, "Accept: */*" HTTP_HDR_END); if (s->parent_using_h2o && stream_connect_upgrade_prelude(host, s)) { ND_LOG_STACK lgs[] = { @@ -455,8 +372,8 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou return false; } - ssize_t len = (ssize_t)strlen(http); - ssize_t bytes = nd_sock_send_timeout(&s->sock, http, len, 0, timeout); + ssize_t len = (ssize_t)buffer_strlen(wb); + ssize_t bytes = nd_sock_send_timeout(&s->sock, (void *)buffer_tostring(wb), len, 0, timeout); if(bytes <= 0) { // timeout is 0 ND_LOG_STACK lgs[] = { ND_LOG_FIELD_TXT(NDF_RESPONSE_CODE, STREAM_STATUS_TIMEOUT), @@ -468,7 +385,7 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou nd_sock_close(&s->sock); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: failed to send HTTP header to remote netdata.", + "STREAM CONNECT '%s' [to %s]: failed to send HTTP header to remote netdata.", rrdhost_hostname(host), s->connected_to); stream_parent_set_reconnect_delay( @@ -476,7 +393,8 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou return false; } - bytes = nd_sock_recv_timeout(&s->sock, http, HTTP_HEADER_SIZE, 0, timeout); + char response[4096]; + bytes = nd_sock_recv_timeout(&s->sock, response, sizeof(response) - 1, 0, timeout); if(bytes <= 0) { // timeout is 0 nd_sock_close(&s->sock); @@ -489,7 +407,7 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou worker_is_busy(WORKER_SENDER_CONNECTOR_JOB_DISCONNECT_TIMEOUT); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: remote netdata does not respond.", + "STREAM CONNECT '%s' [to %s]: remote netdata does not respond.", rrdhost_hostname(host), s->connected_to); stream_parent_set_reconnect_delay( @@ -497,21 +415,21 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou return false; } - http[bytes] = '\0'; + response[bytes] = '\0'; if(sock_setnonblock(s->sock.fd) < 0) nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s]: cannot set non-blocking mode for socket.", + "STREAM CONNECT '%s' [to %s]: cannot set non-blocking mode for socket.", rrdhost_hostname(host), s->connected_to); sock_setcloexec(s->sock.fd); if(sock_enlarge_out(s->sock.fd) < 0) nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM %s [send to %s]: cannot enlarge the socket buffer.", + "STREAM CONNECT '%s' [to %s]: cannot enlarge the socket buffer.", rrdhost_hostname(host), s->connected_to); - if(!stream_connect_validate_first_response(host, s, http, bytes)) { + if(!stream_connect_validate_first_response(host, s, response, bytes)) { nd_sock_close(&s->sock); return false; } @@ -527,7 +445,7 @@ bool stream_connect(struct sender_state *s, uint16_t default_port, time_t timeou ND_LOG_STACK_PUSH(lgs); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM [connector] %s: connected to %s...", + "STREAM CONNECT '%s' [to %s]: connected to parent...", rrdhost_hostname(host), s->connected_to); return true; @@ -592,7 +510,7 @@ void stream_connector_requeue(struct sender_state *s) { struct connector *sc = stream_connector_get(s); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM [connector] [%s]: adding host in connector queue...", + "STREAM CONNECT '%s' [to parent]: adding host in connector queue...", rrdhost_hostname(s->host)); spinlock_lock(&sc->queue.spinlock); @@ -608,13 +526,13 @@ void stream_connector_add(struct sender_state *s) { // multiple threads may come here - only one should be able to pass through stream_sender_lock(s); if(!rrdhost_has_stream_sender_enabled(s->host) || !s->host->stream.snd.destination || !s->host->stream.snd.api_key) { - nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM %s [send]: host has streaming disabled - not sending data to a parent.", + nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM CONNECT '%s' [disabled]: host has streaming disabled - not sending data to a parent.", rrdhost_hostname(s->host)); stream_sender_unlock(s); return; } if(rrdhost_flag_check(s->host, RRDHOST_FLAG_STREAM_SENDER_ADDED)) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, "STREAM %s [send]: host has already added to sender - ignoring request", + nd_log(NDLS_DAEMON, NDLP_DEBUG, "STREAM CONNECT '%s' [duplicate]: host has already added to sender - ignoring request.", rrdhost_hostname(s->host)); stream_sender_unlock(s); return; @@ -632,7 +550,7 @@ void stream_connector_add(struct sender_state *s) { static void stream_connector_remove(struct sender_state *s) { nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM [connector] [%s]: stopped streaming connector for host: %s", + "STREAM CONNECT '%s' [stopped]: stopped streaming connector for host, reason: %s", rrdhost_hostname(s->host), stream_handshake_error_to_string(s->exit.reason)); struct connector *sc = stream_connector_get(s); @@ -658,8 +576,8 @@ static void *stream_connector_thread(void *ptr) { worker_register_job_custom_metric(WORKER_SENDER_CONNECTOR_JOB_CANCELLED_NODES, "cancelled nodes", "nodes", WORKER_METRIC_ABSOLUTE); unsigned job_id = 0; - while(!nd_thread_signaled_to_cancel() && service_running(SERVICE_STREAMING)) { + worker_is_idle(); job_id = completion_wait_for_a_job_with_timeout(&sc->completion, job_id, 1000); size_t nodes = 0, connected_nodes = 0, failed_nodes = 0, cancelled_nodes = 0; @@ -730,7 +648,7 @@ bool stream_connector_init(struct sender_state *s) { if(!sc->thread) { sc->id = (int8_t)(sc - connector_globals.connectors); // find the slot number if(&connector_globals.connectors[sc->id] != sc) - fatal("Connector ID and slot do not match!"); + fatal("STREAM CONNECT '%s': connector ID and slot do not match!", rrdhost_hostname(s->host)); spinlock_init(&sc->queue.spinlock); completion_init(&sc->completion); @@ -741,7 +659,9 @@ bool stream_connector_init(struct sender_state *s) { sc->thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, stream_connector_thread, sc); if (!sc->thread) - nd_log_daemon(NDLP_ERR, "STREAM connector: failed to create new thread for client."); + nd_log_daemon(NDLP_ERR, + "STREAM CONNECT '%s': failed to create new thread for client.", + rrdhost_hostname(s->host)); } spinlock_unlock(&spinlock); diff --git a/src/streaming/stream-control.c b/src/streaming/stream-control.c new file mode 100644 index 00000000000000..a5b49c38ca0c44 --- /dev/null +++ b/src/streaming/stream-control.c @@ -0,0 +1,116 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "stream-control.h" +#include "stream.h" +#include "replication.h" + +static struct { + CACHE_LINE_PADDING(); + + uint32_t backfill_runners; + + CACHE_LINE_PADDING(); + + uint32_t replication_runners; + + CACHE_LINE_PADDING(); + + uint32_t user_data_queries_runners; + + CACHE_LINE_PADDING(); + + uint32_t user_weights_queries_runners; + + CACHE_LINE_PADDING(); +} sc; + +// -------------------------------------------------------------------------------------------------------------------- +// backfilling + +static uint32_t backfill_runners(void) { + return __atomic_load_n(&sc.backfill_runners, __ATOMIC_RELAXED); +} + +void stream_control_backfill_query_started(void) { + __atomic_add_fetch(&sc.backfill_runners, 1, __ATOMIC_RELAXED); +} + +void stream_control_backfill_query_finished(void) { + __atomic_sub_fetch(&sc.backfill_runners, 1, __ATOMIC_RELAXED); +} + +// -------------------------------------------------------------------------------------------------------------------- +// replication + +static uint32_t replication_runners(void) { + return __atomic_load_n(&sc.replication_runners, __ATOMIC_RELAXED); +} + +void stream_control_replication_query_started(void) { + __atomic_add_fetch(&sc.replication_runners, 1, __ATOMIC_RELAXED); +} + +void stream_control_replication_query_finished(void) { + __atomic_sub_fetch(&sc.replication_runners, 1, __ATOMIC_RELAXED); +} + +// -------------------------------------------------------------------------------------------------------------------- +// user data queries + +static uint32_t user_data_query_runners(void) { + return __atomic_load_n(&sc.user_data_queries_runners, __ATOMIC_RELAXED); +} + +void stream_control_user_data_query_started(void) { + __atomic_add_fetch(&sc.user_data_queries_runners, 1, __ATOMIC_RELAXED); +} + +void stream_control_user_data_query_finished(void) { + __atomic_sub_fetch(&sc.user_data_queries_runners, 1, __ATOMIC_RELAXED); +} + +// -------------------------------------------------------------------------------------------------------------------- +// user weights queries + +static uint32_t user_weights_query_runners(void) { + return __atomic_load_n(&sc.user_weights_queries_runners, __ATOMIC_RELAXED); +} + +void stream_control_user_weights_query_started(void) { + __atomic_add_fetch(&sc.user_weights_queries_runners, 1, __ATOMIC_RELAXED); +} + +void stream_control_user_weights_query_finished(void) { + __atomic_sub_fetch(&sc.user_weights_queries_runners, 1, __ATOMIC_RELAXED); +} + +// -------------------------------------------------------------------------------------------------------------------- +// consumer API + +bool stream_control_ml_should_be_running(void) { + return backfill_runners() == 0 && + replication_runners() == 0 && + user_data_query_runners() == 0 && + user_weights_query_runners() == 0; +} + +bool stream_control_children_should_be_accepted(void) { + // we should not check for replication here. + // replication benefits from multiple nodes (merges the extents) + // and also the nodes should be close in time in the db + // - checking for replication leaves the last few nodes locked-out (since all the others are replicating) + + return backfill_runners() == 0; +} + +bool stream_control_replication_should_be_running(void) { + return backfill_runners() == 0 && + user_data_query_runners() == 0 && + user_weights_query_runners() == 0; +} + +bool stream_control_health_should_be_running(void) { + return backfill_runners() == 0 && + replication_runners() == 0 && + (user_data_query_runners() + user_weights_query_runners()) <= 1; +} diff --git a/src/streaming/stream-control.h b/src/streaming/stream-control.h new file mode 100644 index 00000000000000..06cb0db9a7e81a --- /dev/null +++ b/src/streaming/stream-control.h @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STREAM_CONTROL_H +#define NETDATA_STREAM_CONTROL_H + +#include "libnetdata/libnetdata.h" + +#define STREAM_CONTROL_SLEEP_UT (10 * USEC_PER_MS + os_random(10 * USEC_PER_MS)) + +#define stream_control_throttle() microsleep(STREAM_CONTROL_SLEEP_UT) + +void stream_control_backfill_query_started(void); +void stream_control_backfill_query_finished(void); + +void stream_control_replication_query_started(void); +void stream_control_replication_query_finished(void); + +void stream_control_user_weights_query_started(void); +void stream_control_user_weights_query_finished(void); + +void stream_control_user_data_query_started(void); +void stream_control_user_data_query_finished(void); + +bool stream_control_ml_should_be_running(void); +bool stream_control_children_should_be_accepted(void); +bool stream_control_replication_should_be_running(void); +bool stream_control_health_should_be_running(void); + +#endif //NETDATA_STREAM_CONTROL_H diff --git a/src/streaming/stream-parents.c b/src/streaming/stream-parents.c index 9ca78fc05d2940..177177ed58e56f 100644 --- a/src/streaming/stream-parents.c +++ b/src/streaming/stream-parents.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "stream-sender-internals.h" +#include "replication.h" #define TIME_TO_CONSIDER_PARENTS_SIMILAR 120 @@ -150,7 +151,7 @@ void rrdhost_stream_parents_to_json(BUFFER *wb, RRDHOST_STATUS *s) { STREAM_PARENT *d; for (d = s->host->stream.snd.parents.all; d; d = d->next) { buffer_json_add_array_item_object(wb); - buffer_json_member_add_uint64(wb, "attempts", d->attempts); + buffer_json_member_add_uint64(wb, "attempts", d->attempts + 1); { if (d->ssl) { snprintfz(buf, sizeof(buf) - 1, "%s:SSL", string2str(d->destination)); @@ -305,6 +306,10 @@ int stream_info_to_json_v1(BUFFER *wb, const char *machine_guid) { buffer_json_member_add_uint64(wb, "nonce", os_random32()); if(ret == HTTP_RESP_OK) { + if((status.ingest.status == RRDHOST_INGEST_STATUS_ARCHIVED || status.ingest.status == RRDHOST_INGEST_STATUS_OFFLINE) && + !stream_control_children_should_be_accepted()) + status.ingest.status = RRDHOST_INGEST_STATUS_INITIALIZING; + buffer_json_member_add_string(wb, "db_status", rrdhost_db_status_to_string(status.db.status)); buffer_json_member_add_string(wb, "db_liveness", rrdhost_db_liveness_to_string(status.db.liveness)); buffer_json_member_add_string(wb, "ingest_type", rrdhost_ingest_type_to_string(status.ingest.type)); @@ -375,7 +380,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po rrdhost_program_version(localhost)); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: fetching stream info from '%s'...", + "STREAM PARENTS '%s': fetching stream info from '%s'...", hostname, string2str(d->destination)); // Establish connection @@ -384,7 +389,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po d->selection.info = false; stream_parent_nd_sock_error_to_reason(d, &sock); nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: failed to connect for stream info to '%s': %s", + "STREAM PARENTS '%s': failed to connect for stream info to '%s': %s", hostname, string2str(d->destination), ND_SOCK_ERROR_2str(sock.error)); return false; @@ -396,7 +401,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po d->selection.info = false; stream_parent_nd_sock_error_to_reason(d, &sock); nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: failed to send stream info request to '%s': %s", + "STREAM PARENTS '%s': failed to send stream info request to '%s': %s", hostname, string2str(d->destination), ND_SOCK_ERROR_2str(sock.error)); return false; @@ -413,7 +418,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po if (remaining <= 1) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: stream info receive buffer is full while receiving response from '%s'", + "STREAM PARENTS '%s': stream info receive buffer is full while receiving response from '%s'", hostname, string2str(d->destination)); d->selection.info = false; d->reason = STREAM_HANDSHAKE_INTERNAL_ERROR; @@ -423,7 +428,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po ssize_t received = nd_sock_recv_timeout(&sock, buf + total_received, remaining - 1, 0, 5); if (received <= 0) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: socket receive error while querying stream info on '%s' " + "STREAM PARENTS '%s': socket receive error while querying stream info on '%s' " "(total received %zu, payload received %zu, content length %zu): %s", hostname, string2str(d->destination), total_received, payload_received, content_length, @@ -453,7 +458,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po char *content_length_ptr = strstr(buf, "Content-Length: "); if (!content_length_ptr) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: stream info response from '%s' does not have a Content-Length", + "STREAM PARENTS '%s': stream info response from '%s' does not have a Content-Length", hostname, string2str(d->destination)); d->selection.info = false; @@ -463,7 +468,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po content_length = strtoul(content_length_ptr + strlen("Content-Length: "), NULL, 10); if (!content_length) { nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: stream info response from '%s' has invalid Content-Length", + "STREAM PARENTS '%s': stream info response from '%s' has invalid Content-Length", hostname, string2str(d->destination)); d->selection.info = false; @@ -479,7 +484,7 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po d->selection.info = false; d->reason = STREAM_HANDSHAKE_NO_STREAM_INFO; nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: failed to parse stream info response from '%s', JSON data: %s", + "STREAM PARENTS '%s': failed to parse stream info response from '%s', JSON data: %s", hostname, string2str(d->destination), payload_start); return false; } @@ -490,14 +495,14 @@ static bool stream_info_fetch(STREAM_PARENT *d, const char *uuid, int default_po d->selection.info = false; d->reason = STREAM_HANDSHAKE_NO_STREAM_INFO; nd_log(NDLS_DAEMON, NDLP_WARNING, - "STREAM PARENTS of %s: failed to extract fields from JSON stream info response from '%s': %s", + "STREAM PARENTS '%s': failed to extract fields from JSON stream info response from '%s': %s", hostname, string2str(d->destination), buffer_tostring(error)); return false; } nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: received stream_info data from '%s': " + "STREAM PARENTS '%s': received stream_info data from '%s': " "status: %d, nodes: %zu, receivers: %zu, first_time_s: %ld, last_time_s: %ld, " "db status: %s, db liveness: %s, ingest type: %s, ingest status: %s", hostname, string2str(d->destination), @@ -554,7 +559,7 @@ bool stream_parent_connect_to_one_unsafe( // do we have any parents? if(!size) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, "STREAM PARENTS of %s: no parents configured", rrdhost_hostname(host)); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "STREAM PARENTS '%s': no parents configured", rrdhost_hostname(host)); return false; } @@ -581,7 +586,7 @@ bool stream_parent_connect_to_one_unsafe( if (d->postpone_until_ut > now_ut) { skipped_but_useful++; nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: skipping useful parent '%s': POSTPONED FOR %ld SECS MORE: %s", + "STREAM PARENTS '%s': skipping useful parent '%s': POSTPONED FOR %ld SECS MORE: %s", rrdhost_hostname(host), string2str(d->destination), (time_t)((d->postpone_until_ut - now_ut) / USEC_PER_SEC), @@ -603,7 +608,7 @@ bool stream_parent_connect_to_one_unsafe( d->banned_permanently = true; skipped_not_useful++; nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM PARENTS of %s: destination '%s' is banned permanently because it is the origin server", + "STREAM PARENTS '%s': destination '%s' is banned permanently because it is the origin server", rrdhost_hostname(host), string2str(d->destination)); continue; } @@ -631,7 +636,7 @@ bool stream_parent_connect_to_one_unsafe( d->banned_for_this_session = true; skipped_not_useful++; nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM PARENTS of %s: destination '%s' is banned for this session, because it is in our path before us.", + "STREAM PARENTS '%s': destination '%s' is banned for this session, because it is in our path before us.", rrdhost_hostname(host), string2str(d->destination)); continue; } @@ -648,7 +653,7 @@ bool stream_parent_connect_to_one_unsafe( if(skip) { skipped_but_useful++; nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: skipping useful parent '%s': %s", + "STREAM PARENTS '%s': skipping useful parent '%s': %s", rrdhost_hostname(host), string2str(d->destination), stream_handshake_error_to_string(d->reason)); @@ -664,7 +669,7 @@ bool stream_parent_connect_to_one_unsafe( // can we use any parent? if(!count) { nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: no parents available (%zu skipped but useful, %zu skipped not useful)", + "STREAM PARENTS '%s': no parents available (%zu skipped but useful, %zu skipped not useful)", rrdhost_hostname(host), skipped_but_useful, skipped_not_useful); return false; @@ -692,7 +697,7 @@ bool stream_parent_connect_to_one_unsafe( // if we have only 1 similar, move on if (similar == 1) { nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: reordering keeps parent No %zu, '%s'", + "STREAM PARENTS '%s': reordering keeps parent No %zu, '%s'", rrdhost_hostname(host), base, string2str(array[base]->destination)); array[base]->selection.order = base + 1; array[base]->selection.batch = batch + 1; @@ -716,7 +721,7 @@ bool stream_parent_connect_to_one_unsafe( SWAP(array[base], array[chosen]); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: random reordering of %zu similar parents (slots %zu to %zu), No %zu is '%s'", + "STREAM PARENTS '%s': random reordering of %zu similar parents (slots %zu to %zu), No %zu is '%s'", rrdhost_hostname(host), similar, base, base + similar, base, string2str(array[base]->destination)); @@ -743,7 +748,7 @@ bool stream_parent_connect_to_one_unsafe( array[0]->selection.random = false; nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: only 1 parent is available: '%s'", + "STREAM PARENTS '%s': only 1 parent is available: '%s'", rrdhost_hostname(host), string2str(array[0]->destination)); } @@ -760,7 +765,7 @@ bool stream_parent_connect_to_one_unsafe( } nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: connecting to '%s' (default port: %d, parent %zu of %zu)...", + "STREAM PARENTS '%s': connecting to '%s' (default port: %d, parent %zu of %zu)...", rrdhost_hostname(host), string2str(d->destination), default_port, i + 1, count); @@ -788,7 +793,7 @@ bool stream_parent_connect_to_one_unsafe( DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(host->stream.snd.parents.all, d, prev, next); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: connected to '%s' (default port: %d, fd %d)...", + "STREAM PARENTS '%s': connected to '%s' (default port: %d, fd %d)...", rrdhost_hostname(host), string2str(d->destination), default_port, sender_sock->fd); @@ -798,7 +803,7 @@ bool stream_parent_connect_to_one_unsafe( else { stream_parent_nd_sock_error_to_reason(d, sender_sock); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: stream connection to '%s' failed (default port: %d): %s", + "STREAM PARENTS '%s': stream connection to '%s' failed (default port: %d): %s", rrdhost_hostname(host), string2str(d->destination), default_port, ND_SOCK_ERROR_2str(sender_sock->error)); @@ -854,7 +859,7 @@ static bool stream_parent_add_one_unsafe(char *entry, void *data) { t->count++; nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM PARENTS of %s: added streaming destination No %d: '%s'", + "STREAM PARENTS '%s': added streaming destination No %d: '%s'", rrdhost_hostname(t->host), t->count, string2str(d->destination)); return false; // we return false, so that we will get all defined destinations diff --git a/src/streaming/stream-path.c b/src/streaming/stream-path.c index 451a3faf840bb9..f6d955af6b073c 100644 --- a/src/streaming/stream-path.c +++ b/src/streaming/stream-path.c @@ -237,7 +237,7 @@ void stream_path_send_to_child(RRDHOST *host) { rrdhost_receiver_lock(host); if(stream_has_capability(host->receiver, STREAM_CAP_PATHS) && - !rrdhost_flag_check(host, RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED)) { + rrdhost_flag_check(host, RRDHOST_FLAG_COLLECTOR_ONLINE)) { CLEAN_BUFFER *wb = buffer_create(0, NULL); buffer_sprintf(wb, PLUGINSD_KEYWORD_JSON " " PLUGINSD_KEYWORD_JSON_CMD_STREAM_PATH "\n%s\n" PLUGINSD_KEYWORD_JSON_END "\n", buffer_tostring(payload)); @@ -317,7 +317,7 @@ static bool parse_single_path(json_object *jobj, const char *path, STREAM_PATH * } if(p->hops < 0) { - buffer_strcat(error, "hops cannot be negative"); + buffer_strcat(error, "hops cannot be negative (probably the child disconnected from the Netdata before us"); return false; } @@ -360,7 +360,8 @@ bool stream_path_set_from_json(RRDHOST *host, const char *json, bool from_parent CLEAN_JSON_OBJECT *jobj = json_tokener_parse(json); if(!jobj) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM PATH: Cannot parse json: %s", json); + "STREAM PATH '%s': Cannot parse json: %s", + rrdhost_hostname(host), json); return false; } @@ -381,14 +382,16 @@ bool stream_path_set_from_json(RRDHOST *host, const char *json, bool from_parent json_object *joption = json_object_array_get_idx(_jarray, i); if (!json_object_is_type(joption, json_type_object)) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM PATH: Array item No %zu is not an object: %s", i, json); + "STREAM PATH '%s': Array item No %zu is not an object: %s", + rrdhost_hostname(host), i, json); continue; } if(!parse_single_path(joption, "", &host->stream.path.array[host->stream.path.used], error)) { stream_path_cleanup(&host->stream.path.array[host->stream.path.used]); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM PATH: Array item No %zu cannot be parsed: %s: %s", i, buffer_tostring(error), json); + "STREAM PATH '%s': Array item No %zu cannot be parsed: %s: %s", + rrdhost_hostname(host), i, buffer_tostring(error), json); } else host->stream.path.used++; diff --git a/src/streaming/stream-receiver-connection.c b/src/streaming/stream-receiver-connection.c index fc3da73d4f53d5..09641f17770389 100644 --- a/src/streaming/stream-receiver-connection.c +++ b/src/streaming/stream-receiver-connection.c @@ -4,6 +4,7 @@ #include "stream-thread.h" #include "stream-receiver-internals.h" #include "web/server/h2o/http_server.h" +#include "replication.h" // -------------------------------------------------------------------------------------------------------------------- @@ -25,8 +26,9 @@ void stream_receiver_log_status(struct receiver_state *rpt, const char *msg, con , (rpt->machine_guid && *rpt->machine_guid) ? rpt->machine_guid : "" , msg); - nd_log(NDLS_DAEMON, priority, "STREAM RECEIVE '%s': %s %s%s%s" + nd_log(NDLS_DAEMON, priority, "STREAM RECEIVE '%s' [from [%s]:%s]: %s %s%s%s" , (rpt->hostname && *rpt->hostname) ? rpt->hostname : "" + , rpt->client_ip, rpt->client_port , msg , rpt->exit.reason != STREAM_HANDSHAKE_NEVER?" (":"" , stream_handshake_error_to_string(rpt->exit.reason) @@ -142,30 +144,41 @@ static bool stream_receiver_send_first_response(struct receiver_state *rpt) { if(!host) { stream_receiver_log_status( rpt, - "failed to find/create host structure, rejecting connection", + "rejecting streaming connection; failed to find or create the required host structure", STREAM_STATUS_INTERNAL_SERVER_ERROR, NDLP_ERR); stream_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_INTERNAL_ERROR); return false; } + // IMPORTANT: KEEP THIS FIRST AFTER CHECKING host RESPONSE! + // THIS IS HOW WE KNOW THE system_info IS GONE NOW... + // system_info has been consumed by the host structure + rpt->system_info = NULL; if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) { stream_receiver_log_status( rpt, - "host is initializing, retry later", + "rejecting streaming connection; host is initializing, retry later", STREAM_STATUS_INITIALIZATION_IN_PROGRESS, NDLP_NOTICE); stream_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_INITIALIZATION); return false; } - // system_info has been consumed by the host structure - rpt->system_info = NULL; + if (unlikely(!stream_control_children_should_be_accepted())) { + stream_receiver_log_status( + rpt, + "rejecting streaming connection; the system is backfilling higher tiers with high-resolution data, retry later", + STREAM_STATUS_INITIALIZATION_IN_PROGRESS, NDLP_NOTICE); + + stream_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_INITIALIZATION); + return false; + } if(!rrdhost_set_receiver(host, rpt)) { stream_receiver_log_status( rpt, - "host is already served by another receiver", + "rejecting streaming connection; host is already served by another receiver", STREAM_STATUS_DUPLICATE_RECEIVER, NDLP_INFO); stream_send_error_on_taken_over_connection(rpt, START_STREAMING_ERROR_ALREADY_STREAMING); @@ -174,7 +187,7 @@ static bool stream_receiver_send_first_response(struct receiver_state *rpt) { } #ifdef NETDATA_INTERNAL_CHECKS - netdata_log_info("STREAM '%s' [receive from [%s]:%s]: " + netdata_log_info("STREAM RECEIVE '%s' [from [%s]:%s]: " "client willing to stream metrics for host '%s' with machine_guid '%s': " "update every = %d, history = %d, memory mode = %s, health %s,%s" , rpt->hostname @@ -395,7 +408,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!rpt->key || !*rpt->key) { stream_receiver_log_status( rpt, - "request without an API key, rejecting connection", + "rejecting streaming connection; request without an API key", STREAM_STATUS_NO_API_KEY, NDLP_WARNING); stream_receiver_free(rpt); @@ -405,7 +418,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!rpt->hostname || !*rpt->hostname) { stream_receiver_log_status( rpt, - "request without a hostname, rejecting connection", + "rejecting streaming connection; request without a hostname", STREAM_STATUS_NO_HOSTNAME, NDLP_WARNING); stream_receiver_free(rpt); @@ -418,7 +431,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!rpt->machine_guid || !*rpt->machine_guid) { stream_receiver_log_status( rpt, - "request without a machine GUID, rejecting connection", + "rejecting streaming connection; request without a machine UUID", STREAM_STATUS_NO_MACHINE_GUID, NDLP_WARNING); stream_receiver_free(rpt); @@ -431,7 +444,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if (regenerate_guid(rpt->key, buf) == -1) { stream_receiver_log_status( rpt, - "API key is not a valid UUID (use the command uuidgen to generate one)", + "rejecting streaming connection; API key is not a valid UUID (use the command uuidgen to generate one)", STREAM_STATUS_INVALID_API_KEY, NDLP_WARNING); stream_receiver_free(rpt); @@ -441,7 +454,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if (regenerate_guid(rpt->machine_guid, buf) == -1) { stream_receiver_log_status( rpt, - "machine GUID is not a valid UUID", + "rejecting streaming connection; machine UUID is not a valid UUID", STREAM_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); stream_receiver_free(rpt); @@ -452,7 +465,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!stream_conf_is_key_type(rpt->key, "api")) { stream_receiver_log_status( rpt, - "API key is a machine GUID", + "rejecting streaming connection; API key provided is a machine UUID (did you mix them up?)", STREAM_STATUS_INVALID_API_KEY, NDLP_WARNING); stream_receiver_free(rpt); @@ -464,7 +477,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!stream_conf_api_key_is_enabled(rpt->key, false)) { stream_receiver_log_status( rpt, - "API key is not enabled", + "rejecting streaming connection; API key is not enabled in stream.conf", STREAM_STATUS_API_KEY_DISABLED, NDLP_WARNING); stream_receiver_free(rpt); @@ -474,7 +487,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!stream_conf_api_key_allows_client(rpt->key, w->client_ip)) { stream_receiver_log_status( rpt, - "API key is not allowed from this IP", + "rejecting streaming connection; API key is not allowed from this IP", STREAM_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); stream_receiver_free(rpt); @@ -484,7 +497,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if (!stream_conf_is_key_type(rpt->machine_guid, "machine")) { stream_receiver_log_status( rpt, - "machine GUID is an API key", + "rejecting streaming connection; machine UUID is an API key (did you mix them up?)", STREAM_STATUS_INVALID_MACHINE_GUID, NDLP_WARNING); stream_receiver_free(rpt); @@ -496,7 +509,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!stream_conf_api_key_is_enabled(rpt->machine_guid, true)) { stream_receiver_log_status( rpt, - "machine GUID is not enabled", + "rejecting streaming connection; machine UUID is not enabled in stream.conf", STREAM_STATUS_MACHINE_GUID_DISABLED, NDLP_WARNING); stream_receiver_free(rpt); @@ -506,7 +519,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ if(!stream_conf_api_key_allows_client(rpt->machine_guid, w->client_ip)) { stream_receiver_log_status( rpt, - "machine GUID is not allowed from this IP", + "rejecting streaming connection; machine UUID is not allowed from this IP", STREAM_STATUS_NOT_ALLOWED_IP, NDLP_WARNING); stream_receiver_free(rpt); @@ -518,7 +531,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ stream_receiver_log_status( rpt, - "machine GUID is my own", + "rejecting streaming connection; machine UUID is my own", STREAM_STATUS_LOCALHOST, NDLP_DEBUG); char initial_response[HTTP_HEADER_SIZE + 1]; @@ -551,7 +564,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ char msg[100 + 1]; snprintfz(msg, sizeof(msg) - 1, - "rate limit, will accept new connection in %ld secs", + "rejecting streaming connection; rate limit, will accept new connection in %ld secs", (long)(web_client_streaming_rate_t - (now - last_stream_accepted_t))); stream_receiver_log_status(rpt, msg, STREAM_STATUS_RATE_LIMIT, NDLP_NOTICE); @@ -616,7 +629,7 @@ int stream_receiver_accept_connection(struct web_client *w, char *decoded_query_ char msg[200 + 1]; snprintfz(msg, sizeof(msg) - 1, - "multiple connections for same host, " + "rejecting streaming connection; multiple connections for same host, " "old connection was last used %ld secs ago%s", age, receiver_stale ? " (signaled old receiver to stop)" : " (new connection not accepted)"); diff --git a/src/streaming/stream-receiver-internals.h b/src/streaming/stream-receiver-internals.h index 4210c78d18de76..f97f260507c654 100644 --- a/src/streaming/stream-receiver-internals.h +++ b/src/streaming/stream-receiver-internals.h @@ -35,6 +35,8 @@ struct receiver_state { struct buffered_reader reader; struct { + bool draining_input; // used exclusively by the stream thread + // The parser pointer is safe to read and use, only when having the host receiver lock. // Without this lock, the data pointed by the pointer may vanish randomly. // Also, since the receiver sets it when it starts, it should be read with @@ -88,7 +90,6 @@ void stream_receiver_log_status(struct receiver_state *rpt, const char *msg, con void stream_receiver_free(struct receiver_state *rpt); bool stream_receiver_signal_to_stop_and_wait(RRDHOST *host, STREAM_HANDSHAKE reason); -ssize_t send_to_child(const char *txt, void *data, STREAM_TRAFFIC_TYPE type); void stream_receiver_send_opcode(struct receiver_state *rpt, struct stream_opcode msg); void stream_receiver_handle_op(struct stream_thread *sth, struct receiver_state *rpt, struct stream_opcode *msg); diff --git a/src/streaming/stream-receiver.c b/src/streaming/stream-receiver.c index 1778b7bf8f09dd..06b17157521a1f 100644 --- a/src/streaming/stream-receiver.c +++ b/src/streaming/stream-receiver.c @@ -147,14 +147,18 @@ static inline decompressor_status_t receiver_feed_decompressor(struct receiver_s stream_decompressor_start(&r->thread.compressed.decompressor, buf + start, signature_size); if (unlikely(!compressed_message_size)) { - nd_log(NDLS_DAEMON, NDLP_ERR, "multiplexed uncompressed data in compressed stream!"); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[x] '%s' [from [%s]:%s]: multiplexed uncompressed data in compressed stream!", + rrdhost_hostname(r->host), r->client_ip, r->client_port); return DECOMPRESS_FAILED; } if(unlikely(compressed_message_size > COMPRESSION_MAX_MSG_SIZE)) { nd_log(NDLS_DAEMON, NDLP_ERR, - "received a compressed message of %zu bytes, which is bigger than the max compressed message " + "STREAM RECEIVE[x] '%s' [from [%s]:%s]: received a compressed message of %zu bytes, " + "which is bigger than the max compressed message " "size supported of %zu. Ignoring message.", + rrdhost_hostname(r->host), r->client_ip, r->client_port, compressed_message_size, (size_t)COMPRESSION_MAX_MSG_SIZE); return DECOMPRESS_FAILED; } @@ -169,7 +173,9 @@ static inline decompressor_status_t receiver_feed_decompressor(struct receiver_s stream_decompress(&r->thread.compressed.decompressor, buf + start + signature_size, compressed_message_size); if (unlikely(!bytes_to_parse)) { - nd_log(NDLS_DAEMON, NDLP_ERR, "no bytes to parse."); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[x] '%s' [from [%s]:%s]: no bytes to decompress.", + rrdhost_hostname(r->host), r->client_ip, r->client_port); return DECOMPRESS_FAILED; } @@ -259,9 +265,9 @@ void stream_receiver_handle_op(struct stream_thread *sth, struct receiver_state STREAM_CIRCULAR_BUFFER_STATS stats = *stream_circular_buffer_stats_unsafe(rpt->thread.send_to_child.scb); spinlock_unlock(&rpt->thread.send_to_child.spinlock); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM RECEIVE[%zu] %s [from %s]: send buffer is full (buffer size %u, max %u, used %u, available %u). " + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: send buffer is full (buffer size %u, max %u, used %u, available %u). " "Restarting connection.", - sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, stats.bytes_size, stats.bytes_max_size, stats.bytes_outstanding, stats.bytes_available); stream_receiver_remove(sth, rpt, "receiver send buffer overflow"); @@ -272,7 +278,7 @@ void stream_receiver_handle_op(struct stream_thread *sth, struct receiver_state "STREAM RECEIVE[%zu]: invalid msg id %u", sth->id, (unsigned)msg->opcode); } -ssize_t send_to_child(const char *txt, void *data, STREAM_TRAFFIC_TYPE type) { +static ssize_t send_to_child(const char *txt, void *data, STREAM_TRAFFIC_TYPE type) { struct receiver_state *rpt = data; if(!rpt || rpt->thread.meta.type != POLLFD_TYPE_RECEIVER || !rpt->thread.send_to_child.scb) return 0; @@ -286,7 +292,8 @@ ssize_t send_to_child(const char *txt, void *data, STREAM_TRAFFIC_TYPE type) { size_t size = strlen(txt); ssize_t rc = (ssize_t)size; - if(!stream_circular_buffer_add_unsafe(scb, txt, size, size, type)) { + if(!stream_circular_buffer_add_unsafe(scb, txt, size, size, type, true)) { + // should never happen, because of autoscaling msg.opcode = STREAM_OPCODE_RECEIVER_BUFFER_OVERFLOW; rc = -1; } @@ -315,14 +322,10 @@ static void streaming_parser_init(struct receiver_state *rpt) { // put the client IP and port into the buffers used by plugins.d { char buf[CONFIG_MAX_NAME]; - snprintfz(buf, sizeof(buf), "%s:%s", rpt->client_ip, rpt->client_port); + snprintfz(buf, sizeof(buf), "[%s]:%s", rpt->client_ip, rpt->client_port); string_freez(rpt->thread.cd.id); rpt->thread.cd.id = string_strdupz(buf); - } - { - char buf[FILENAME_MAX + 1]; - snprintfz(buf, sizeof(buf), "%s:%s", rpt->client_ip, rpt->client_port); string_freez(rpt->thread.cd.filename); rpt->thread.cd.filename = string_strdupz(buf); @@ -384,6 +387,23 @@ static void streaming_parser_init(struct receiver_state *rpt) { // -------------------------------------------------------------------------------------------------------------------- +static void stream_receive_log_database_gap(struct receiver_state *rpt) { + RRDHOST *host = rpt->host; + + time_t now = now_realtime_sec(); + time_t last_db_entry = 0; + rrdhost_retention(host, now, false, NULL, &last_db_entry); + + if(now < last_db_entry) + last_db_entry = now; + + char buf[128]; + duration_snprintf(buf, sizeof(buf), now - last_db_entry, "s", true); + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "STREAM RECEIVE '%s' [from [%s]:%s]: node connected; last sample in the database %s ago", + rrdhost_hostname(host), rpt->client_ip, rpt->client_port, buf); +} + void stream_receiver_move_queue_to_running_unsafe(struct stream_thread *sth) { internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); @@ -404,8 +424,8 @@ void stream_receiver_move_queue_to_running_unsafe(struct stream_thread *sth) { ND_LOG_STACK_PUSH(lgs); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM RECEIVE[%zu] [%s]: moving host from receiver queue to receiver running...", - sth->id, rrdhost_hostname(rpt->host)); + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: moving host from receiver queue to receiver running...", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port); rpt->host->stream.rcv.status.tid = gettid_cached(); rpt->thread.meta.type = POLLFD_TYPE_RECEIVER; @@ -413,9 +433,6 @@ void stream_receiver_move_queue_to_running_unsafe(struct stream_thread *sth) { spinlock_lock(&rpt->thread.send_to_child.spinlock); rpt->thread.send_to_child.scb = stream_circular_buffer_create(); - - // this should be big enough to fit all the replies to the replication requests we may receive in a batch - stream_circular_buffer_set_max_size_unsafe(rpt->thread.send_to_child.scb, 100 * 1024 * 1024, true); rpt->thread.send_to_child.msg.thread_slot = (int32_t)sth->id; rpt->thread.send_to_child.msg.session = os_random32(); rpt->thread.send_to_child.msg.meta = &rpt->thread.meta; @@ -430,7 +447,12 @@ void stream_receiver_move_queue_to_running_unsafe(struct stream_thread *sth) { rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, rpt->sock.fd); if(!nd_poll_add(sth->run.ndpl, rpt->sock.fd, ND_POLL_READ, &rpt->thread.meta)) - nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to add receiver socket to nd_poll()"); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]:" + "Failed to add receiver socket to nd_poll()", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port); + + stream_receive_log_database_gap(rpt); // keep this last, since it sends commands back to the child streaming_parser_init(rpt); @@ -508,6 +530,8 @@ static void stream_receiver_remove(struct stream_thread *sth, struct receiver_st static ssize_t stream_receive_and_process(struct stream_thread *sth, struct receiver_state *rpt, PARSER *parser, bool *removed) { + internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__); + ssize_t rc; if(rpt->thread.compressed.enabled) { rc = receiver_read_compressed(rpt); @@ -589,10 +613,10 @@ stream_receive_and_process(struct stream_thread *sth, struct receiver_state *rpt } // process poll() events for streaming receivers -void stream_receive_process_poll_events(struct stream_thread *sth, struct receiver_state *rpt, nd_poll_event_t events, usec_t now_ut) +// returns true when the receiver is still there, false if it removed it +bool stream_receive_process_poll_events(struct stream_thread *sth, struct receiver_state *rpt, nd_poll_event_t events, usec_t now_ut) { - internal_fatal( - sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__); + internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__); PARSER *parser = __atomic_load_n(&rpt->thread.parser, __ATOMIC_RELAXED); ND_LOG_STACK lgs[] = { @@ -612,7 +636,7 @@ void stream_receive_process_poll_events(struct stream_thread *sth, struct receiv if (receiver_should_stop(rpt)) { receiver_set_exit_reason(rpt, rpt->exit.reason, false); stream_receiver_remove(sth, rpt, "received stop signal"); - return; + return false; } if (unlikely(events & (ND_POLL_ERROR | ND_POLL_HUP | ND_POLL_INVALID))) { @@ -631,109 +655,122 @@ void stream_receive_process_poll_events(struct stream_thread *sth, struct receiv worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SOCKET_ERROR); - nd_log( - NDLS_DAEMON, - NDLP_ERR, - "STREAM RECEIVE[%zu] %s [from %s]: %s - closing connection", - sth->id, - rrdhost_hostname(rpt->host), - rpt->client_ip, - error); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: %s - closing connection", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, error); receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_SOCKET_ERROR, false); stream_receiver_remove(sth, rpt, error); - return; + return false; } if (events & ND_POLL_WRITE) { worker_is_busy(WORKER_STREAM_JOB_SOCKET_SEND); - if (spinlock_trylock(&rpt->thread.send_to_child.spinlock)) { - const char *disconnect_reason = NULL; - STREAM_HANDSHAKE reason; - - char *chunk; - STREAM_CIRCULAR_BUFFER *scb = rpt->thread.send_to_child.scb; - STREAM_CIRCULAR_BUFFER_STATS *stats = stream_circular_buffer_stats_unsafe(scb); - size_t outstanding = stream_circular_buffer_get_unsafe(scb, &chunk); - ssize_t rc = write_stream(rpt, chunk, outstanding); - if (likely(rc > 0)) { - stream_circular_buffer_del_unsafe(scb, rc); - if (!stats->bytes_outstanding) { - if (!nd_poll_upd(sth->run.ndpl, rpt->sock.fd, ND_POLL_READ, &rpt->thread.meta)) - nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM RECEIVE: cannot update nd_poll()"); - - // recreate the circular buffer if we have to - stream_circular_buffer_recreate_timed_unsafe(rpt->thread.send_to_child.scb, now_ut, false); + bool stop = false; + while(!stop) { + if (spinlock_trylock(&rpt->thread.send_to_child.spinlock)) { + const char *disconnect_reason = NULL; + STREAM_HANDSHAKE reason; + + char *chunk; + STREAM_CIRCULAR_BUFFER *scb = rpt->thread.send_to_child.scb; + STREAM_CIRCULAR_BUFFER_STATS *stats = stream_circular_buffer_stats_unsafe(scb); + size_t outstanding = stream_circular_buffer_get_unsafe(scb, &chunk); + ssize_t rc = write_stream(rpt, chunk, outstanding); + if (likely(rc > 0)) { + stream_circular_buffer_del_unsafe(scb, rc); + if (!stats->bytes_outstanding) { + if (!nd_poll_upd(sth->run.ndpl, rpt->sock.fd, ND_POLL_READ, &rpt->thread.meta)) + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: cannot update nd_poll()", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port); + + // recreate the circular buffer if we have to + stream_circular_buffer_recreate_timed_unsafe(rpt->thread.send_to_child.scb, now_ut, false); + stop = true; + } + else if(stream_thread_process_opcodes(sth, &rpt->thread.meta)) + stop = true; } - } else if (rc == 0 || errno == ECONNRESET) { - disconnect_reason = "socket reports EOF (closed by child)"; - reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END; - } else if (rc < 0) { - if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) - // will try later - ; - else { - disconnect_reason = "socket reports error while writing"; - reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_WRITE_FAILED; + else if (rc == 0 || errno == ECONNRESET) { + disconnect_reason = "socket reports EOF (closed by child)"; + reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END; + } + else if (rc < 0) { + if (errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) + // will try later + stop = true; + else { + disconnect_reason = "socket reports error while writing"; + reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_WRITE_FAILED; + } + } + spinlock_unlock(&rpt->thread.send_to_child.spinlock); + + if (disconnect_reason) { + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: %s (%zd, on fd %d) - closing connection - " + "we have sent %zu bytes in %zu operations.", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, + disconnect_reason, rc, rpt->sock.fd, stats->bytes_sent, stats->sends); + + receiver_set_exit_reason(rpt, reason, false); + stream_receiver_remove(sth, rpt, disconnect_reason); + return false; } } - spinlock_unlock(&rpt->thread.send_to_child.spinlock); - - if (disconnect_reason) { - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR); - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM RECEIVE[%zu] %s [from %s]: %s (%zd, on fd %d) - closing connection - " - "we have sent %zu bytes in %zu operations.", - sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, disconnect_reason, rc, rpt->sock.fd, - stats->bytes_sent, stats->sends); - - receiver_set_exit_reason(rpt, reason, false); - stream_receiver_remove(sth, rpt, disconnect_reason); - return; - } + else + break; } } if (!(events & ND_POLL_READ)) - return; + return true; // we can receive data from this socket worker_is_busy(WORKER_STREAM_JOB_SOCKET_RECEIVE); - bool removed = false; - while(!removed) { + bool removed = false, stop = false; + size_t iterations = 0; + while(!removed && !stop && iterations++ < MAX_IO_ITERATIONS_PER_EVENT) { ssize_t rc = stream_receive_and_process(sth, rpt, parser, &removed); if (likely(rc > 0)) { rpt->last_msg_t = (time_t)(now_ut / USEC_PER_SEC); + + if(stream_thread_process_opcodes(sth, &rpt->thread.meta)) + stop = true; } else if (rc == 0 || errno == ECONNRESET) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_REMOTE_CLOSED); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM RECEIVE[%zu] %s [from %s]: socket %d reports EOF (closed by child).", - sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->sock.fd); + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: socket %d reports EOF (closed by child).", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, rpt->sock.fd); receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END, false); stream_receiver_remove(sth, rpt, "socket reports EOF (closed by child)"); - return; + return false; } else if (rc < 0) { if(removed) - return; + return false; else if ((errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR)) // will try later - break; + stop = true; else { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM RECEIVE[%zu] %s [from %s]: error during receive (%zd, on fd %d) - closing connection.", - sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rc, rpt->sock.fd); + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: error during receive (%zd, on fd %d) - closing connection.", + sth->id, rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, rc, rpt->sock.fd); receiver_set_exit_reason(rpt, STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED, false); stream_receiver_remove(sth, rpt, "error during receive"); - return; + return false; } } } + + return !removed; } void stream_receiver_cleanup(struct stream_thread *sth) { @@ -782,8 +819,9 @@ bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) { if (rpt->config.health.delay > 0) { host->health.delay_up_to = now_realtime_sec() + rpt->config.health.delay; nd_log(NDLS_DAEMON, NDLP_DEBUG, - "[%s]: Postponing health checks for %" PRId64 " seconds, because it was just connected.", - rrdhost_hostname(host), + "STREAM RECEIVE '%s' [from [%s]:%s]: " + "Postponing health checks for %" PRId64 " seconds, because it was just connected.", + rrdhost_hostname(host), rpt->client_ip, rpt->client_port, (int64_t) rpt->config.health.delay); } } @@ -797,7 +835,7 @@ bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) { signal_rrdcontext = true; stream_receiver_replication_reset(host); - rrdhost_flag_clear(rpt->host, RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED); + rrdhost_flag_set(rpt->host, RRDHOST_FLAG_COLLECTOR_ONLINE); aclk_queue_node_info(rpt->host, true); rrdhost_stream_parents_reset(host, STREAM_HANDSHAKE_PREPARING); @@ -810,6 +848,9 @@ bool rrdhost_set_receiver(RRDHOST *host, struct receiver_state *rpt) { if(signal_rrdcontext) rrdcontext_host_child_connected(host); + if(set_this) + ml_host_start(host); + return set_this; } @@ -822,11 +863,12 @@ void rrdhost_clear_receiver(struct receiver_state *rpt) { // Make sure that we detach this thread and don't kill a freshly arriving receiver if (host->receiver == rpt) { - rrdhost_flag_set(host, RRDHOST_FLAG_STREAM_RECEIVER_DISCONNECTED); + rrdhost_flag_clear(host, RRDHOST_FLAG_COLLECTOR_ONLINE); rrdhost_receiver_unlock(host); { // run all these without having the receiver lock + ml_host_stop(host); stream_path_child_disconnected(host); stream_sender_signal_to_stop_and_wait(host, STREAM_HANDSHAKE_DISCONNECT_RECEIVER_LEFT, false); stream_receiver_replication_reset(host); diff --git a/src/streaming/stream-sender-commit.c b/src/streaming/stream-sender-commit.c index 7ff34eb79bee3a..1ebb2df030ddc4 100644 --- a/src/streaming/stream-sender-commit.c +++ b/src/streaming/stream-sender-commit.c @@ -21,14 +21,16 @@ void sender_commit_thread_buffer_free(void) { // Collector thread starting a transmission BUFFER *sender_commit_start_with_trace(struct sender_state *s __maybe_unused, struct sender_buffer *commit, const char *func) { if(unlikely(commit->used)) - fatal("STREAMING: thread buffer is used multiple times concurrently (%u). " + fatal("STREAM SEND '%s' [to %s]: thread buffer is used multiple times concurrently (%u). " "It is already being used by '%s()', and now is called by '%s()'", + rrdhost_hostname(s->host), s->connected_to, (unsigned)commit->used, commit->last_function ? commit->last_function : "(null)", func ? func : "(null)"); if(unlikely(commit->receiver_tid && commit->receiver_tid != gettid_cached())) - fatal("STREAMING: thread buffer is reserved for tid %d, but it used by thread %d function '%s()'.", + fatal("STREAM SEND '%s' [to %s]: thread buffer is reserved for tid %d, but it used by thread %d function '%s()'.", + rrdhost_hostname(s->host), s->connected_to, commit->receiver_tid, gettid_cached(), func ? func : "(null)"); if(unlikely(commit->wb && @@ -81,11 +83,12 @@ void sender_buffer_commit(struct sender_state *s, BUFFER *wb, struct sender_buff return; } - if (unlikely(stream_circular_buffer_set_max_size_unsafe(s->scb, src_len, false))) { + if (unlikely(stream_circular_buffer_set_max_size_unsafe( + s->scb, src_len * STREAM_CIRCULAR_BUFFER_ADAPT_TO_TIMES_MAX_SIZE, false))) { // adaptive sizing of the circular buffer nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM SEND %s [to %s]: Increased max buffer size to %u (message size %zu).", - rrdhost_hostname(s->host), s->connected_to, stats->bytes_max_size, buffer_strlen(wb) + 1); + "STREAM SEND '%s' [to %s]: Increased max buffer size to %u (message size %zu).", + rrdhost_hostname(s->host), s->connected_to, stats->bytes_max_size, src_len + 1); } stream_sender_log_payload(s, wb, type, false); @@ -123,7 +126,7 @@ void sender_buffer_commit(struct sender_state *s, BUFFER *wb, struct sender_buff size_t dst_len = stream_compress(&s->compressor, src, size_to_compress, &dst); if (!dst_len) { nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: COMPRESSION failed. Resetting compressor and re-trying", + "STREAM SEND '%s' [to %s]: COMPRESSION failed. Resetting compressor and re-trying", rrdhost_hostname(s->host), s->connected_to); stream_compression_initialize(s); @@ -139,13 +142,16 @@ void sender_buffer_commit(struct sender_state *s, BUFFER *wb, struct sender_buff size_t decoded_dst_len = stream_decompress_decode_signature((const char *)&signature, sizeof(signature)); if (decoded_dst_len != dst_len) fatal( - "STREAM COMPRESSION: invalid signature, original payload %zu bytes, " + "STREAM SEND '%s' [to %s]: invalid signature, original payload %zu bytes, " "compressed payload length %zu bytes, but signature says payload is %zu bytes", + rrdhost_hostname(s->host), s->connected_to, size_to_compress, dst_len, decoded_dst_len); #endif - if (!stream_circular_buffer_add_unsafe(s->scb, (const char *)&signature, sizeof(signature), sizeof(signature), type) || - !stream_circular_buffer_add_unsafe(s->scb, dst, dst_len, size_to_compress, type)) + if (!stream_circular_buffer_add_unsafe(s->scb, (const char *)&signature, sizeof(signature), + sizeof(signature), type, false) || + !stream_circular_buffer_add_unsafe(s->scb, dst, dst_len, + size_to_compress, type, false)) goto overflow_with_lock; src = src + size_to_compress; @@ -155,7 +161,8 @@ void sender_buffer_commit(struct sender_state *s, BUFFER *wb, struct sender_buff else { // uncompressed traffic - if (!stream_circular_buffer_add_unsafe(s->scb, src, src_len, src_len, type)) + if (!stream_circular_buffer_add_unsafe(s->scb, src, src_len, + src_len, type, false)) goto overflow_with_lock; } @@ -179,11 +186,12 @@ overflow_with_lock: { stream_sender_unlock(s); msg.opcode = STREAM_OPCODE_SENDER_BUFFER_OVERFLOW; stream_sender_send_opcode(s, msg); - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: buffer overflow (buffer size %u, max size %u, used %u, available %u). " - "Restarting connection.", - rrdhost_hostname(s->host), s->connected_to, - stats->bytes_size, stats->bytes_max_size, stats->bytes_outstanding, stats->bytes_available); + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, + "STREAM SEND '%s' [to %s]: buffer overflow (buffer size %u, max size %u, used %u, available %u). " + "Restarting connection.", + rrdhost_hostname(s->host), s->connected_to, + stats->bytes_size, stats->bytes_max_size, stats->bytes_outstanding, stats->bytes_available); return; } @@ -193,9 +201,11 @@ compression_failed_with_lock: { stream_sender_unlock(s); msg.opcode = STREAM_OPCODE_SENDER_RECONNECT_WITHOUT_COMPRESSION; stream_sender_send_opcode(s, msg); - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM %s [send to %s]: COMPRESSION failed (twice). Deactivating compression and restarting connection.", - rrdhost_hostname(s->host), s->connected_to); + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, + "STREAM SEND '%s' [to %s]: COMPRESSION failed (twice). " + "Deactivating compression and restarting connection.", + rrdhost_hostname(s->host), s->connected_to); } } @@ -203,10 +213,12 @@ void sender_thread_commit(struct sender_state *s, BUFFER *wb, STREAM_TRAFFIC_TYP struct sender_buffer *commit = (wb == commit___thread.wb) ? & commit___thread : &s->host->stream.snd.commit; if (unlikely(wb != commit->wb)) - fatal("STREAMING: function '%s()' is trying to commit an unknown commit buffer.", func); + fatal("STREAM SEND '%s' [to %s]: function '%s()' is trying to commit an unknown commit buffer.", + rrdhost_hostname(s->host), s->connected_to, func); if (unlikely(!commit->used)) - fatal("STREAMING: function '%s()' is committing a sender buffer twice.", func); + fatal("STREAM SEND '%s' [to %s]: function '%s()' is committing a sender buffer twice.", + rrdhost_hostname(s->host), s->connected_to, func); commit->used = false; commit->last_function = NULL; diff --git a/src/streaming/stream-sender-execute.c b/src/streaming/stream-sender-execute.c index 783591f8faf7d1..0d8b7cf3a4f3cb 100644 --- a/src/streaming/stream-sender-execute.c +++ b/src/streaming/stream-sender-execute.c @@ -26,7 +26,7 @@ static void stream_execute_function_callback(BUFFER *func_wb, int code, void *da sender_commit_clean_buffer(s, wb, STREAM_TRAFFIC_TYPE_FUNCTIONS); - internal_error(true, "STREAM %s [send to %s] FUNCTION transaction %s sending back response (%zu bytes, %"PRIu64" usec).", + internal_error(true, "STREAM SEND '%s' [to %s]: FUNCTION transaction %s sending back response (%zu bytes, %"PRIu64" usec).", rrdhost_hostname(s->host), s->connected_to, string2str(tmp->transaction), buffer_strlen(func_wb), @@ -57,7 +57,7 @@ static void execute_commands_function(struct sender_state *s, const char *comman nd_log(NDLS_ACCESS, NDLP_INFO, NULL); if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { - netdata_log_error("STREAM %s [send to %s] %s execution command is incomplete (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + netdata_log_error("STREAM SEND '%s' [to %s]: %s execution command is incomplete (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", rrdhost_hostname(s->host), s->connected_to, command, transaction?transaction:"(unset)", @@ -110,7 +110,10 @@ static void execute_deferred_json(struct sender_state *s, void *data) { if(strcmp(keyword, PLUGINSD_KEYWORD_JSON_CMD_STREAM_PATH) == 0) stream_path_set_from_json(s->host, buffer_tostring(s->defer.payload), true); else - nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM: unknown JSON keyword '%s' with payload: %s", keyword, buffer_tostring(s->defer.payload)); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND '%s' [to %s]: unknown JSON keyword '%s' with payload: %s", + rrdhost_hostname(s->host), s->connected_to, + keyword, buffer_tostring(s->defer.payload)); } static void cleanup_deferred_json(struct sender_state *s __maybe_unused, void *data) { @@ -274,7 +277,7 @@ void stream_sender_execute_commands(struct sender_state *s) { const char *before = get_word(s->rbuf.line.words, s->rbuf.line.num_words, 4); if (!chart_id || !start_streaming || !after || !before) { - netdata_log_error("STREAM %s [send to %s] %s command is incomplete" + netdata_log_error("STREAM SEND '%s' [to %s] %s command is incomplete" " (chart=%s, start_streaming=%s, after=%s, before=%s)", rrdhost_hostname(s->host), s->connected_to, command, @@ -310,7 +313,7 @@ void stream_sender_execute_commands(struct sender_state *s) { s->defer.action_data = strdupz(keyword); } else { - netdata_log_error("STREAM %s [send to %s] received unknown command over connection: %s", + netdata_log_error("STREAM SEND '%s' [to %s] received unknown command over connection: %s", rrdhost_hostname(s->host), s->connected_to, s->rbuf.line.words[0]?s->rbuf.line.words[0]:"(unset)"); } diff --git a/src/streaming/stream-sender-internals.h b/src/streaming/stream-sender-internals.h index 395feaac003979..ec3e3f4eedce33 100644 --- a/src/streaming/stream-sender-internals.h +++ b/src/streaming/stream-sender-internals.h @@ -40,6 +40,8 @@ struct sender_state { ND_SOCK sock; struct { + bool draining_input; // used exclusively by the stream thread + struct stream_opcode msg; // the template for sending a message to the dispatcher - protected by sender_lock() // this is a property of stream_sender_send_msg_to_dispatcher() diff --git a/src/streaming/stream-sender.c b/src/streaming/stream-sender.c index 34a60cc11c5b6b..2c1ab048d24e5f 100644 --- a/src/streaming/stream-sender.c +++ b/src/streaming/stream-sender.c @@ -89,7 +89,7 @@ void stream_sender_on_connect(struct sender_state *s) { static void stream_sender_on_ready_to_dispatch(struct sender_state *s) { nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM SEND [%s]: running ready-to-dispatch hooks...", + "STREAM SEND '%s': running ready-to-dispatch hooks...", rrdhost_hostname(s->host)); // set this flag before sending any data, or the data will not be sent @@ -105,7 +105,7 @@ static void stream_sender_on_ready_to_dispatch(struct sender_state *s) { static void stream_sender_on_disconnect(struct sender_state *s) { nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM SEND [%s]: running on-disconnect hooks...", + "STREAM SEND '%s': running on-disconnect hooks...", rrdhost_hostname(s->host)); stream_sender_lock(s); @@ -182,7 +182,7 @@ void stream_sender_handle_op(struct stream_thread *sth, struct sender_state *s, STREAM_CIRCULAR_BUFFER_STATS stats = *stream_circular_buffer_stats_unsafe(s->scb); stream_sender_unlock(s); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [to %s]: send buffer is full (buffer size %u, max %u, used %u, available %u). " + "STREAM SEND[%zu] '%s' [to %s]: send buffer is full (buffer size %u, max %u, used %u, available %u). " "Restarting connection.", sth->id, rrdhost_hostname(s->host), s->connected_to, stats.bytes_size, stats.bytes_max_size, stats.bytes_outstanding, stats.bytes_available); @@ -203,7 +203,7 @@ void stream_sender_handle_op(struct stream_thread *sth, struct sender_state *s, worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_COMPRESSION_ERROR); errno_clear(); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [send to %s]: restarting connection without compression.", + "STREAM SEND[%zu] '%s' [to %s]: restarting connection without compression.", sth->id, rrdhost_hostname(s->host), s->connected_to); stream_sender_move_running_to_connector_or_remove( @@ -245,8 +245,8 @@ void stream_sender_move_queue_to_running_unsafe(struct stream_thread *sth) { ND_LOG_STACK_PUSH(lgs); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM SEND[%zu] [%s]: moving host from dispatcher queue to dispatcher running...", - sth->id, rrdhost_hostname(s->host)); + "STREAM SEND[%zu] '%s' [to %s]: moving host from dispatcher queue to dispatcher running...", + sth->id, rrdhost_hostname(s->host), s->connected_to); stream_sender_lock(s); s->thread.meta.type = POLLFD_TYPE_SENDER; @@ -268,7 +268,9 @@ void stream_sender_move_queue_to_running_unsafe(struct stream_thread *sth) { META_SET(&sth->run.meta, (Word_t)&s->thread.meta, &s->thread.meta); if(!nd_poll_add(sth->run.ndpl, s->sock.fd, ND_POLL_READ, &s->thread.meta)) - nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to add sender socket to nd_poll()"); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND[%zu] '%s' [to %s]: failed to add sender socket to nd_poll()", + sth->id, rrdhost_hostname(s->host), s->connected_to); stream_sender_on_ready_to_dispatch(s); } @@ -279,8 +281,8 @@ void stream_sender_remove(struct sender_state *s) { // when it gives up on a certain node nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM SEND [%s]: streaming sender removed host: %s", - rrdhost_hostname(s->host), stream_handshake_error_to_string(s->exit.reason)); + "STREAM SEND '%s' [to %s]: streaming sender removed host: %s", + rrdhost_hostname(s->host), s->connected_to, stream_handshake_error_to_string(s->exit.reason)); stream_sender_lock(s); @@ -316,7 +318,9 @@ static void stream_sender_move_running_to_connector_or_remove(struct stream_thre META_DEL(&sth->run.meta, (Word_t)&s->thread.meta); if(!nd_poll_del(sth->run.ndpl, s->sock.fd)) - nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to delete sender socket from nd_poll()"); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND[%zu] '%s' [to %s]: failed to delete sender socket from nd_poll()", + sth->id, rrdhost_hostname(s->host), s->connected_to); // clear this flag asap, to stop other threads from pushing metrics for this node rrdhost_flag_clear(s->host, RRDHOST_FLAG_STREAM_SENDER_CONNECTED | RRDHOST_FLAG_STREAM_SENDER_READY_4_METRICS); @@ -331,8 +335,8 @@ static void stream_sender_move_running_to_connector_or_remove(struct stream_thre stream_sender_unlock(s); nd_log(NDLS_DAEMON, NDLP_NOTICE, - "STREAM SEND [%s]: sender disconnected from parent, reason: %s", - rrdhost_hostname(s->host), stream_handshake_error_to_string(reason)); + "STREAM SEND[%zu] '%s' [to %s]: sender disconnected from parent, reason: %s", + sth->id, rrdhost_hostname(s->host), s->connected_to, stream_handshake_error_to_string(reason)); nd_sock_close(&s->sock); @@ -398,7 +402,7 @@ void stream_sender_check_all_nodes_from_poll(struct stream_thread *sth, usec_t n size_snprintf(pending, sizeof(pending), stats.bytes_outstanding, "B", false); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [send to %s]: could not send data for %ld seconds - closing connection - " + "STREAM SEND[%zu] '%s' [to %s]: could not send data for %ld seconds - closing connection - " "we have sent %zu bytes in %zu operations, it is idle for %s, and we have %s pending to send " "(buffer is used %.2f%%).", sth->id, rrdhost_hostname(s->host), s->connected_to, stream_send.parents.timeout_s, @@ -414,7 +418,7 @@ void stream_sender_check_all_nodes_from_poll(struct stream_thread *sth, usec_t n if(!nd_poll_upd(sth->run.ndpl, s->sock.fd, ND_POLL_READ | (stats.bytes_outstanding ? ND_POLL_WRITE : 0), &s->thread.meta)) nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [send to %s]: failed to update nd_poll().", + "STREAM SEND[%zu] '%s' [to %s]: failed to update nd_poll().", sth->id, rrdhost_hostname(s->host), s->connected_to); } @@ -428,7 +432,9 @@ void stream_sender_check_all_nodes_from_poll(struct stream_thread *sth, usec_t n worker_set_metric(WORKER_SENDER_JOB_BUFFER_RATIO, overall_buffer_ratio); } -void stream_sender_process_poll_events(struct stream_thread *sth, struct sender_state *s, nd_poll_event_t events, usec_t now_ut) { +// process poll() events for streaming senders +// returns true when the sender is still there, false if it removed it +bool stream_sender_process_poll_events(struct stream_thread *sth, struct sender_state *s, nd_poll_event_t events, usec_t now_ut) { internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); ND_LOG_STACK lgs[] = { @@ -464,80 +470,90 @@ void stream_sender_process_poll_events(struct stream_thread *sth, struct sender_ stream_sender_unlock(s); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [to %s]: %s restarting connection - %zu bytes transmitted in %zu operations.", + "STREAM SEND[%zu] '%s' [to %s]: %s restarting connection - %zu bytes transmitted in %zu operations.", sth->id, rrdhost_hostname(s->host), s->connected_to, error, stats.bytes_sent, stats.sends); stream_sender_move_running_to_connector_or_remove(sth, s, STREAM_HANDSHAKE_DISCONNECT_SOCKET_ERROR, true); - return; + return false; } if(events & ND_POLL_WRITE) { // we can send data on this socket - if(stream_sender_trylock(s)) { - worker_is_busy(WORKER_STREAM_JOB_SOCKET_SEND); - - const char *disconnect_reason = NULL; - STREAM_HANDSHAKE reason; - - STREAM_CIRCULAR_BUFFER_STATS *stats = stream_circular_buffer_stats_unsafe(s->scb); - char *chunk; - size_t outstanding = stream_circular_buffer_get_unsafe(s->scb, &chunk); - ssize_t rc = nd_sock_send_nowait(&s->sock, chunk, outstanding); - if (likely(rc > 0)) { - stream_circular_buffer_del_unsafe(s->scb, rc); - replication_recalculate_buffer_used_ratio_unsafe(s); - s->thread.last_traffic_ut = now_ut; - sth->snd.bytes_sent += rc; - - if (!stats->bytes_outstanding) { - // we sent them all - remove ND_POLL_WRITE - if (!nd_poll_upd(sth->run.ndpl, s->sock.fd, ND_POLL_READ, &s->thread.meta)) - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [send to %s]: failed to update nd_poll().", - sth->id, rrdhost_hostname(s->host), s->connected_to); - - // recreate the circular buffer if we have to - stream_circular_buffer_recreate_timed_unsafe(s->scb, now_ut, false); + bool stop = false; + while(!stop) { + if(stream_sender_trylock(s)) { + worker_is_busy(WORKER_STREAM_JOB_SOCKET_SEND); + + const char *disconnect_reason = NULL; + STREAM_HANDSHAKE reason; + + STREAM_CIRCULAR_BUFFER_STATS *stats = stream_circular_buffer_stats_unsafe(s->scb); + char *chunk; + size_t outstanding = stream_circular_buffer_get_unsafe(s->scb, &chunk); + ssize_t rc = nd_sock_send_nowait(&s->sock, chunk, outstanding); + if (likely(rc > 0)) { + stream_circular_buffer_del_unsafe(s->scb, rc); + replication_recalculate_buffer_used_ratio_unsafe(s); + s->thread.last_traffic_ut = now_ut; + sth->snd.bytes_sent += rc; + + if (!stats->bytes_outstanding) { + // we sent them all - remove ND_POLL_WRITE + if (!nd_poll_upd(sth->run.ndpl, s->sock.fd, ND_POLL_READ, &s->thread.meta)) + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND[%zu] '%s' [to %s]: failed to update nd_poll().", + sth->id, rrdhost_hostname(s->host), s->connected_to); + + // recreate the circular buffer if we have to + stream_circular_buffer_recreate_timed_unsafe(s->scb, now_ut, false); + stop = true; + } + else if(stream_thread_process_opcodes(sth, &s->thread.meta)) + stop = true; } - } - else if (rc == 0 || errno == ECONNRESET) { - disconnect_reason = "socket reports EOF (closed by parent)"; - reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END; - } - else if (rc < 0) { - if(errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) - // will try later - ; - else { - disconnect_reason = "socket reports error while writing"; - reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_WRITE_FAILED; + else if (rc == 0 || errno == ECONNRESET) { + disconnect_reason = "socket reports EOF (closed by parent)"; + reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END; + } + else if (rc < 0) { + if(errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) { + // will try later + stop = true; + } + else { + disconnect_reason = "socket reports error while writing"; + reason = STREAM_HANDSHAKE_DISCONNECT_SOCKET_WRITE_FAILED; + } + } + stream_sender_unlock(s); + + if (disconnect_reason) { + worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND[%zu] '%s' [to %s]: %s (%zd, on fd %d) - restarting connection - " + "we have sent %zu bytes in %zu operations.", + sth->id, rrdhost_hostname(s->host), s->connected_to, disconnect_reason, rc, s->sock.fd, + stats->bytes_sent, stats->sends); + + stream_sender_move_running_to_connector_or_remove(sth, s, reason, true); + return false; } } - stream_sender_unlock(s); - - if (disconnect_reason) { - worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_SEND_ERROR); - nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [to %s]: %s (%zd, on fd %d) - restarting connection - " - "we have sent %zu bytes in %zu operations.", - sth->id, rrdhost_hostname(s->host), s->connected_to, disconnect_reason, rc, s->sock.fd, - stats->bytes_sent, stats->sends); - - stream_sender_move_running_to_connector_or_remove(sth, s, reason, true); - - return; - } + else + break; } } if(!(events & ND_POLL_READ)) - return; + return true; // we can receive data from this socket worker_is_busy(WORKER_STREAM_JOB_SOCKET_RECEIVE); - while(true) { + bool stop = false; + size_t iterations = 0; + while(!stop && iterations++ < MAX_IO_ITERATIONS_PER_EVENT) { // we have to drain the socket! ssize_t rc = nd_sock_revc_nowait(&s->sock, s->rbuf.b + s->rbuf.read_len, sizeof(s->rbuf.b) - s->rbuf.read_len - 1); @@ -549,31 +565,36 @@ void stream_sender_process_poll_events(struct stream_thread *sth, struct sender_ worker_is_busy(WORKER_SENDER_JOB_EXECUTE); stream_sender_execute_commands(s); + + if(stream_thread_process_opcodes(sth, &s->thread.meta)) + stop = true; } else if (rc == 0 || errno == ECONNRESET) { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_REMOTE_CLOSED); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [to %s]: socket %d reports EOF (closed by parent).", + "STREAM SEND[%zu] '%s' [to %s]: socket %d reports EOF (closed by parent).", sth->id, rrdhost_hostname(s->host), s->connected_to, s->sock.fd); stream_sender_move_running_to_connector_or_remove( sth, s, STREAM_HANDSHAKE_DISCONNECT_SOCKET_CLOSED_BY_REMOTE_END, true); - return; + return false; } else if (rc < 0) { if(errno == EWOULDBLOCK || errno == EAGAIN || errno == EINTR) // will try later - break; + stop = true; else { worker_is_busy(WORKER_SENDER_JOB_DISCONNECT_RECEIVE_ERROR); nd_log(NDLS_DAEMON, NDLP_ERR, - "STREAM SEND[%zu] %s [to %s]: error during receive (%zd, on fd %d) - restarting connection.", + "STREAM SEND[%zu] '%s' [to %s]: error during receive (%zd, on fd %d) - restarting connection.", sth->id, rrdhost_hostname(s->host), s->connected_to, rc, s->sock.fd); stream_sender_move_running_to_connector_or_remove( sth, s, STREAM_HANDSHAKE_DISCONNECT_SOCKET_READ_FAILED, true); - return; + return false; } } } + + return true; } void stream_sender_cleanup(struct stream_thread *sth) { diff --git a/src/streaming/stream-thread.c b/src/streaming/stream-thread.c index 1a04e2a3e8917a..25a532d1136a5f 100644 --- a/src/streaming/stream-thread.c +++ b/src/streaming/stream-thread.c @@ -27,8 +27,12 @@ static void stream_thread_handle_op(struct stream_thread *sth, struct stream_opc { if(m->type == POLLFD_TYPE_SENDER) { if(msg->opcode & STREAM_OPCODE_SENDER_POLLOUT) { - if(!nd_poll_upd(sth->run.ndpl, m->s->sock.fd, ND_POLL_READ|ND_POLL_WRITE, m)) - internal_fatal(true, "Failed to update sender socket in nd_poll()"); + if(!nd_poll_upd(sth->run.ndpl, m->s->sock.fd, ND_POLL_READ|ND_POLL_WRITE, m)) { + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_DEBUG, + "STREAM SEND[%zu] '%s' [to %s]: cannot enable output on sender socket %d.", + sth->id, rrdhost_hostname(m->s->host), m->s->connected_to, m->s->sock.fd); + } msg->opcode &= ~(STREAM_OPCODE_SENDER_POLLOUT); } @@ -37,8 +41,12 @@ static void stream_thread_handle_op(struct stream_thread *sth, struct stream_opc } else if(m->type == POLLFD_TYPE_RECEIVER) { if (msg->opcode & STREAM_OPCODE_RECEIVER_POLLOUT) { - if (!nd_poll_upd(sth->run.ndpl, m->rpt->sock.fd, ND_POLL_READ | ND_POLL_WRITE, m)) - internal_fatal(true, "Failed to update receiver socket in nd_poll()"); + if (!nd_poll_upd(sth->run.ndpl, m->rpt->sock.fd, ND_POLL_READ | ND_POLL_WRITE, m)) { + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_DEBUG, + "STREAM RECEIVE[%zu] '%s' [from [%s]:%s]: cannot enable output on receiver socket %d.", + sth->id, rrdhost_hostname(m->rpt->host), m->rpt->client_ip, m->rpt->client_port, m->rpt->sock.fd); + } msg->opcode &= ~(STREAM_OPCODE_RECEIVER_POLLOUT); } @@ -48,7 +56,8 @@ static void stream_thread_handle_op(struct stream_thread *sth, struct stream_opc } else { // this may happen if we receive a POLLOUT opcode, but the sender has been disconnected - nd_log(NDLS_DAEMON, NDLP_DEBUG, "STREAM THREAD[%zu]: OPCODE %u ignored.", sth->id, (unsigned)msg->opcode); + nd_log_limit_static_global_var(erl, 1, 0); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_DEBUG, "STREAM THREAD[%zu]: OPCODE %u ignored.", sth->id, (unsigned)msg->opcode); } } @@ -70,17 +79,22 @@ void stream_receiver_send_opcode(struct receiver_state *rpt, struct stream_opcod if (!msg.session || !msg.meta || !rpt) return; - internal_fatal(msg.meta != &rpt->thread.meta, "the receiver pointer in the message does not match this receiver"); + if(msg.meta != &rpt->thread.meta) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE '%s' [from [%s]:%s]: the receiver in the opcode the message does not match this receiver. " + "Ignoring opcode.", rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port); + return; + } struct stream_thread *sth = stream_thread_by_slot_id(msg.thread_slot); if(!sth) { - internal_fatal(true, - "STREAM RECEIVE[x] [%s] thread pointer in the opcode message does not match the expected", - rrdhost_hostname(rpt->host)); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM RECEIVE '%s' [from [%s]:%s]: the opcode (%u) message cannot be verified. Ignoring it.", + rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port, msg.opcode); return; } // check if we can execute the message now - if(msg.opcode == STREAM_OPCODE_RECEIVER_POLLOUT && sth->tid == gettid_cached()) { + if(sth->tid == gettid_cached() && (!rpt->thread.draining_input || msg.opcode == STREAM_OPCODE_RECEIVER_POLLOUT)) { // we are running at the stream thread, and the request is about enabling POLLOUT, // we can do this synchronously. // IMPORTANT: DO NOT HANDLE FAILURES THAT REMOVE THE RECEIVER OR THE SENDER THIS WAY @@ -108,6 +122,7 @@ void stream_receiver_send_opcode(struct receiver_state *rpt, struct stream_opcod return; } +#ifdef NETDATA_INTERNAL_CHECKS // try to find us in the list for (size_t i = 0; i < sth->messages.size; i++) { if (sth->messages.array[i].meta == &rpt->thread.meta) { @@ -118,8 +133,10 @@ void stream_receiver_send_opcode(struct receiver_state *rpt, struct stream_opcod return; } } +#endif - fatal("The streaming opcode queue is full, but this should never happen"); + fatal("STREAM RECEIVE '%s' [from [%s]:%s]: The streaming opcode queue is full, but this should never happen...", + rrdhost_hostname(rpt->host), rpt->client_ip, rpt->client_port); } // let's use a new slot @@ -142,17 +159,23 @@ void stream_sender_send_opcode(struct sender_state *s, struct stream_opcode msg) if (!msg.session || !msg.meta || !s) return; - internal_fatal(msg.meta != &s->thread.meta, "the sender pointer in the message does not match this sender"); + if(msg.meta != &s->thread.meta) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND '%s' [to %s]: the opcode message does not match this sender. " + "Ignoring opcode.", rrdhost_hostname(s->host), s->connected_to); + return; + } + struct stream_thread *sth = stream_thread_by_slot_id(msg.thread_slot); if(!sth) { - internal_fatal(true, - "STREAM SEND[x] [%s] thread pointer in the opcode message does not match the expected", - rrdhost_hostname(s->host)); + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM SEND[x] '%s' [to %s] the opcode (%u) message cannot be verified. Ignoring it.", + rrdhost_hostname(s->host), s->connected_to, msg.opcode); return; } // check if we can execute the message now - if(msg.opcode == STREAM_OPCODE_SENDER_POLLOUT && sth->tid == gettid_cached()) { + if(sth->tid == gettid_cached() && (!s->thread.draining_input || msg.opcode == STREAM_OPCODE_SENDER_POLLOUT)) { // we are running at the stream thread, and the request is about enabling POLLOUT, // we can do this synchronously. // IMPORTANT: DO NOT HANDLE FAILURES THAT REMOVE THE RECEIVER OR THE SENDER THIS WAY @@ -180,6 +203,7 @@ void stream_sender_send_opcode(struct sender_state *s, struct stream_opcode msg) return; } +#ifdef NETDATA_INTERNAL_CHECKS // try to find us in the list for (size_t i = 0; i < sth->messages.size; i++) { if (sth->messages.array[i].meta == &s->thread.meta) { @@ -190,8 +214,10 @@ void stream_sender_send_opcode(struct sender_state *s, struct stream_opcode msg) return; } } +#endif - fatal("the streaming opcode queue is full, but this should never happen"); + fatal("STREAM SEND '%s' [to %s]: The streaming opcode queue is full, but this should never happen...", + rrdhost_hostname(s->host), s->connected_to); } // let's use a new slot @@ -210,12 +236,9 @@ void stream_sender_send_opcode(struct sender_state *s, struct stream_opcode msg) stream_thread_send_pipe_signal(sth); } -static void stream_thread_read_pipe_messages(struct stream_thread *sth) { +bool stream_thread_process_opcodes(struct stream_thread *sth, struct pollfd_meta *my_meta) { internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); - if(read(sth->pipe.fds[PIPE_READ], sth->pipe.buffer, sth->pipe.size * sizeof(*sth->pipe.buffer)) <= 0) - nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu]: signal pipe read error", sth->id); - size_t used = 0; spinlock_lock(&sth->messages.spinlock); if(sth->messages.used) { @@ -225,10 +248,23 @@ static void stream_thread_read_pipe_messages(struct stream_thread *sth) { } spinlock_unlock(&sth->messages.spinlock); + bool rc = false; for(size_t i = 0; i < used ;i++) { struct stream_opcode *msg = &sth->messages.copy[i]; + if(msg->meta == my_meta) rc = true; stream_thread_handle_op(sth, msg); } + + return rc; +} + +static void stream_thread_read_pipe_messages(struct stream_thread *sth) { + internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); + + if(read(sth->pipe.fds[PIPE_READ], sth->pipe.buffer, sth->pipe.size * sizeof(*sth->pipe.buffer)) <= 0) + nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu]: signal pipe read error", sth->id); + + stream_thread_process_opcodes(sth, NULL); } // -------------------------------------------------------------------------------------------------------------------- @@ -265,8 +301,8 @@ static int set_pipe_size(int pipe_fd, int new_size) { static void stream_thread_messages_resize_unsafe(struct stream_thread *sth) { internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); - if(sth->nodes_count >= sth->messages.size) { - size_t new_size = sth->messages.size ? sth->messages.size * 2 : 2; + if(sth->nodes_count * 2 >= sth->messages.size) { + size_t new_size = MAX(sth->messages.size * 2, sth->nodes_count * 2); sth->messages.array = reallocz(sth->messages.array, new_size * sizeof(*sth->messages.array)); sth->messages.copy = reallocz(sth->messages.copy, new_size * sizeof(*sth->messages.copy)); sth->messages.size = new_size; @@ -276,20 +312,30 @@ static void stream_thread_messages_resize_unsafe(struct stream_thread *sth) { // -------------------------------------------------------------------------------------------------------------------- static bool stream_thread_process_poll_slot(struct stream_thread *sth, nd_poll_result_t *ev, usec_t now_ut, size_t *replay_entries) { + internal_fatal(sth->tid != gettid_cached(), "Function %s() should only be used by the dispatcher thread", __FUNCTION__ ); + struct pollfd_meta *m = ev->data; - internal_fatal(!m, "Failed to get meta from event"); + if(!m) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "STREAM THREAD[%zu]: cannot get meta from nd_poll() event. Ignoring event.", sth->id); + return false; + } switch(m->type) { case POLLFD_TYPE_SENDER: { struct sender_state *s = m->s; - stream_sender_process_poll_events(sth, s, ev->events, now_ut); + s->thread.draining_input = true; + if(stream_sender_process_poll_events(sth, s, ev->events, now_ut)) + s->thread.draining_input = false; *replay_entries += dictionary_entries(s->replication.requests); break; } case POLLFD_TYPE_RECEIVER: { struct receiver_state *rpt = m->rpt; - stream_receive_process_poll_events(sth, rpt, ev->events, now_ut); + rpt->thread.draining_input = true; + if(stream_receive_process_poll_events(sth, rpt, ev->events, now_ut)) + rpt->thread.draining_input = false; break; } @@ -427,7 +473,7 @@ void *stream_thread(void *ptr) { META_SET(&sth->run.meta, (Word_t)&sth->run.pipe, &sth->run.pipe); if(!nd_poll_add(sth->run.ndpl, sth->pipe.fds[PIPE_READ], ND_POLL_READ, &sth->run.pipe)) - internal_fatal(true, "Failed to add pipe to nd_poll()"); + nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu]: failed to add pipe to nd_poll()", sth->id); bool exit_thread = false; size_t replay_entries = 0; @@ -484,7 +530,7 @@ void *stream_thread(void *ptr) { internal_fatal(true, "nd_poll() failed"); worker_is_busy(WORKER_STREAM_JOB_POLL_ERROR); nd_log_limit_static_thread_var(erl, 1, 1 * USEC_PER_MS); - nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu] poll() returned error", sth->id); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu] nd_poll() returned error", sth->id); continue; } @@ -597,7 +643,8 @@ static struct stream_thread * stream_thread_assign_and_start(RRDHOST *host) { if(!sth->thread) { sth->id = (sth - stream_thread_globals.threads); // find the slot number if(&stream_thread_globals.threads[sth->id] != sth) - fatal("STREAM THREAD[x] [%s]: thread id and slot do not match!", rrdhost_hostname(host)); + fatal("STREAM THREAD[x] [%s]: thread and slot owner do not match!", + rrdhost_hostname(host)); sth->pipe.fds[PIPE_READ] = -1; sth->pipe.fds[PIPE_WRITE] = -1; @@ -611,7 +658,7 @@ static struct stream_thread * stream_thread_assign_and_start(RRDHOST *host) { sth->thread = nd_thread_create(tag, NETDATA_THREAD_OPTION_DEFAULT, stream_thread, sth); if (!sth->thread) - nd_log_daemon(NDLP_ERR, "STREAM THREAD[%zu]: failed to create new thread for client.", sth->id); + nd_log(NDLS_DAEMON, NDLP_ERR, "STREAM THREAD[%zu]: failed to create new thread for client.", sth->id); } spinlock_unlock(&stream_thread_globals.assign.spinlock); @@ -638,7 +685,7 @@ void stream_receiver_add_to_queue(struct receiver_state *rpt) { stream_thread_node_queued(rpt->host); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM RECEIVE[%zu] [%s]: moving host to receiver queue...", + "STREAM RECEIVE[%zu] '%s': moving host to receiver queue...", sth->id, rrdhost_hostname(rpt->host)); spinlock_lock(&sth->queue.spinlock); @@ -653,7 +700,7 @@ void stream_sender_add_to_queue(struct sender_state *s) { stream_thread_node_queued(s->host); nd_log(NDLS_DAEMON, NDLP_DEBUG, - "STREAM THREAD[%zu] [%s]: moving host to dispatcher queue...", + "STREAM THREAD[%zu] '%s': moving host to sender queue...", sth->id, rrdhost_hostname(s->host)); spinlock_lock(&sth->queue.spinlock); diff --git a/src/streaming/stream-thread.h b/src/streaming/stream-thread.h index b3587e642920fa..5a8878a9803e64 100644 --- a/src/streaming/stream-thread.h +++ b/src/streaming/stream-thread.h @@ -91,6 +91,8 @@ struct stream_opcode { #define STREAM_MAX_THREADS 2048 #define THREAD_TAG_STREAM "STREAM" +#define MAX_IO_ITERATIONS_PER_EVENT 65536 // drain the input, take it all + typedef enum { POLLFD_TYPE_EMPTY, POLLFD_TYPE_SENDER, @@ -181,8 +183,8 @@ void stream_sender_check_all_nodes_from_poll(struct stream_thread *sth, usec_t n void stream_receiver_add_to_queue(struct receiver_state *rpt); void stream_sender_add_to_connector_queue(struct rrdhost *host); -void stream_sender_process_poll_events(struct stream_thread *sth, struct sender_state *s, nd_poll_event_t events, usec_t now_ut); -void stream_receive_process_poll_events(struct stream_thread *sth, struct receiver_state *rpt, nd_poll_event_t events, usec_t now_ut); +bool stream_sender_process_poll_events(struct stream_thread *sth, struct sender_state *s, nd_poll_event_t events, usec_t now_ut); +bool stream_receive_process_poll_events(struct stream_thread *sth, struct receiver_state *rpt, nd_poll_event_t events, usec_t now_ut); void stream_sender_cleanup(struct stream_thread *sth); void stream_receiver_cleanup(struct stream_thread *sth); @@ -193,6 +195,9 @@ struct stream_thread *stream_thread_by_slot_id(size_t thread_slot); void stream_thread_node_queued(struct rrdhost *host); void stream_thread_node_removed(struct rrdhost *host); +// returns true if my_meta has received a message +bool stream_thread_process_opcodes(struct stream_thread *sth, struct pollfd_meta *my_meta); + #include "stream-sender-internals.h" #include "stream-receiver-internals.h" #include "plugins.d/pluginsd_parser.h" diff --git a/src/streaming/stream-traffic-types.h b/src/streaming/stream-traffic-types.h index 4871c26a519f16..162887bea12d52 100644 --- a/src/streaming/stream-traffic-types.h +++ b/src/streaming/stream-traffic-types.h @@ -3,6 +3,10 @@ #ifndef NETDATA_STREAM_TRAFFIC_TYPES_H #define NETDATA_STREAM_TRAFFIC_TYPES_H +#ifdef __cplusplus +extern "C" { +#endif + typedef enum __attribute__((packed)) { STREAM_TRAFFIC_TYPE_REPLICATION = 0, STREAM_TRAFFIC_TYPE_FUNCTIONS, @@ -13,4 +17,8 @@ typedef enum __attribute__((packed)) { STREAM_TRAFFIC_TYPE_MAX, } STREAM_TRAFFIC_TYPE; +#ifdef __cplusplus +} +#endif + #endif //NETDATA_STREAM_TRAFFIC_TYPES_H diff --git a/src/streaming/stream.h b/src/streaming/stream.h index a4e4e08f98f08d..3cfdc52a39efad 100644 --- a/src/streaming/stream.h +++ b/src/streaming/stream.h @@ -41,6 +41,7 @@ char *stream_receiver_program_version_strdupz(struct rrdhost *host); #include "rrdhost-status.h" #include "protocol/commands.h" #include "stream-path.h" +#include "stream-control.h" void stream_threads_cancel(void); diff --git a/src/web/api/formatters/rrd2json.c b/src/web/api/formatters/rrd2json.c index a80275487ab1bc..7a1b23187547bc 100644 --- a/src/web/api/formatters/rrd2json.c +++ b/src/web/api/formatters/rrd2json.c @@ -124,7 +124,9 @@ int data_query_execute(ONEWAYALLOC *owa, BUFFER *wb, QUERY_TARGET *qt, time_t *l wrapper_end = rrdr_json_wrapper_end2; } + stream_control_user_data_query_started(); RRDR *r = rrd2rrdr(owa, qt); + stream_control_user_data_query_finished(); if(!r) { buffer_strcat(wb, "Cannot generate output with these parameters on this chart."); diff --git a/src/web/api/queries/query.c b/src/web/api/queries/query.c index c5acfccad05f32..1e06c739327ec3 100644 --- a/src/web/api/queries/query.c +++ b/src/web/api/queries/query.c @@ -1964,7 +1964,7 @@ static void rrd2rrdr_query_execute(RRDR *r, size_t dim_id_in_rrdr, QUERY_ENGINE_ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAGE_POINT sp, usec_t now_ut); -void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s) { +void backfill_tier_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s) { if(unlikely(tier >= storage_tiers)) return; #ifdef ENABLE_DBENGINE if(default_backfill == RRD_BACKFILL_NONE) return; @@ -1989,9 +1989,10 @@ void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s // there is really nothing we can do if(now_s <= latest_time_s || time_diff < granularity) return; - struct storage_engine_query_handle seqh; + stream_control_backfill_query_started(); // for each lower tier + struct storage_engine_query_handle seqh; for(int read_tier = (int)tier - 1; read_tier >= 0 ; read_tier--){ time_t smaller_tier_first_time = storage_engine_oldest_time_s(rd->tiers[read_tier].seb, rd->tiers[read_tier].smh); time_t smaller_tier_last_time = storage_engine_latest_time_s(rd->tiers[read_tier].seb, rd->tiers[read_tier].smh); @@ -2023,6 +2024,8 @@ void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s //internal_error(true, "DBENGINE: backfilled chart '%s', dimension '%s', tier %d, from %ld to %ld, with %zu points from tier %d", // rd->rrdset->name, rd->name, tier, after_wanted, before_wanted, points, tr); } + + stream_control_backfill_query_finished(); } // ---------------------------------------------------------------------------- diff --git a/src/web/api/queries/query.h b/src/web/api/queries/query.h index 37202a0bada0af..c7ed4f0a07eec3 100644 --- a/src/web/api/queries/query.h +++ b/src/web/api/queries/query.h @@ -3,6 +3,8 @@ #ifndef NETDATA_API_DATA_QUERY_H #define NETDATA_API_DATA_QUERY_H +#include "libnetdata/common.h" + #ifdef __cplusplus extern "C" { #endif diff --git a/src/web/api/queries/weights.c b/src/web/api/queries/weights.c index e34774f3218686..c43d0116641b43 100644 --- a/src/web/api/queries/weights.c +++ b/src/web/api/queries/weights.c @@ -1285,7 +1285,10 @@ NETDATA_DOUBLE *rrd2rrdr_ks2( }; QUERY_TARGET *qt = query_target_create(&qtr); + stream_control_user_weights_query_started(); RRDR *r = rrd2rrdr(owa, qt); + stream_control_user_weights_query_finished(); + if(!r) goto cleanup; @@ -1524,7 +1527,9 @@ static void rrdset_weights_multi_dimensional_value(struct query_weights_data *qw ONEWAYALLOC *owa = onewayalloc_create(16 * 1024); QUERY_TARGET *qt = query_target_create(&qtr); + stream_control_user_weights_query_started(); RRDR *r = rrd2rrdr(owa, qt); + stream_control_user_weights_query_finished(); if(!r || rrdr_rows(r) != 1 || !r->d || r->d != r->internal.qt->query.used) goto cleanup;