From 815042a56a3d6aa9603e4f37800e7271769dc125 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Sun, 6 Mar 2022 17:37:32 -0500 Subject: [PATCH 01/16] Make no_dedup_sentences the default for extract-and-vector (can be overridden via MC_NO_DEDUP_SENTENCES env var) apps/common/src/python/mediawords/util/config/__init__.py: add env_bool function apps/extract-and-vector/bin/extract_and_vector_worker.py: honor MC_NO_DEDUP_SENTENCES --- .../src/python/mediawords/util/config/__init__.py | 10 ++++++++++ .../bin/extract_and_vector_worker.py | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/apps/common/src/python/mediawords/util/config/__init__.py b/apps/common/src/python/mediawords/util/config/__init__.py index 08f12feb8e..53819ff3e5 100644 --- a/apps/common/src/python/mediawords/util/config/__init__.py +++ b/apps/common/src/python/mediawords/util/config/__init__.py @@ -46,6 +46,16 @@ def env_value(name: str, required: bool = True, allow_empty_string: bool = False return value +def env_bool(name: str, default: bool = False) -> bool: + """ + Retrieve boolean from environment variable; should be 0 or 1. + + :param name: Environment variable name. + :param default: default value, if no value found. + """ + + value = os.environ.get(name, default) + return bool(int(value)) def file_with_env_value(name: str, allow_empty_string: bool = False, encoded_with_base64: bool = False) -> str: """ diff --git a/apps/extract-and-vector/bin/extract_and_vector_worker.py b/apps/extract-and-vector/bin/extract_and_vector_worker.py index 0738c6e200..7a21a67864 100755 --- a/apps/extract-and-vector/bin/extract_and_vector_worker.py +++ b/apps/extract-and-vector/bin/extract_and_vector_worker.py @@ -4,6 +4,7 @@ from mediawords.db import connect_to_db from mediawords.job import JobBroker +from mediawords.util.config import env_bool from mediawords.util.log import create_logger from mediawords.util.perl import decode_object_from_bytes_if_needed from extract_and_vector.dbi.stories.extractor_arguments import PyExtractorArguments @@ -69,8 +70,10 @@ def run_extract_and_vector(stories_id: int, use_cache: bool = False, use_existin log.info("Extracting story {}...".format(stories_id)) + no_dedup_sentences = env_bool('MC_NO_DEDUP_SENTENCES', True) try: - extractor_args = PyExtractorArguments(use_cache=use_cache, use_existing=use_existing) + extractor_args = PyExtractorArguments(use_cache=use_cache, use_existing=use_existing, + no_dedup_sentences=no_dedup_sentences) extract_and_process_story(db=db, story=story, extractor_args=extractor_args) except Exception as ex: From c77dd185a959740930f6c4c4048c6ea9d3b62335 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Fri, 18 Mar 2022 18:29:41 -0400 Subject: [PATCH 02/16] common/Dockerfile: skip jieba.cache creation; makes empty root owned file --- apps/common/Dockerfile | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/apps/common/Dockerfile b/apps/common/Dockerfile index 08d99c3ef5..7fbb0b8c4a 100644 --- a/apps/common/Dockerfile +++ b/apps/common/Dockerfile @@ -139,11 +139,13 @@ RUN \ done # Prebuild Jieba dictionary cache -COPY bin/build_jieba_dict_cache.py / -RUN \ - /build_jieba_dict_cache.py && \ - rm /build_jieba_dict_cache.py && \ - true +# PLB 2022-03-18: was creating empty, root owned file + +#COPY bin/build_jieba_dict_cache.py / +#RUN \ +# /build_jieba_dict_cache.py && \ +# rm /build_jieba_dict_cache.py && \ +# true # Symlink Log::Log4perl configuration to where it's going to be found RUN ln -s /opt/mediacloud/src/common/perl/log4perl.conf /etc/log4perl.conf From f37b7f92729106d2afc5161e1d822c5da25b7be8 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Fri, 18 Mar 2022 18:31:45 -0400 Subject: [PATCH 03/16] solr-base/Dockerfile: try cloning mediacloud config as mediacloud64 --- apps/solr-base/Dockerfile | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/apps/solr-base/Dockerfile b/apps/solr-base/Dockerfile index 0ff5015f9c..6c76d33d8e 100644 --- a/apps/solr-base/Dockerfile +++ b/apps/solr-base/Dockerfile @@ -19,5 +19,18 @@ RUN \ RUN mkdir -p /usr/src/ COPY src/solr/ /usr/src/solr/ +# Try to create 64-bit enabled mediacloud64 collection by cloning config +# NOTE: collections/mediacloud/conf/solrconfig.xml uses +# ${mediacloud.luceneMatchVersion} ${mediacloud.solr_webapp_dir} ${mediacloud.solr_dist_dir} +# which reference JVM properties set in solr-shard/bin/solr-shard.sh +# ALSO: core.properties has "instanceDir=/var/lib/solr/mediacloud" (dir does not exist?!) +# will be wacked to .../mediacloud64 (also does not exist) +RUN \ + mkdir -p /usr/src/solr/collections/mediacloud64 && \ + cp -rp /usr/src/solr/collections/mediacloud/* /usr/src/solr/collections/mediacloud64/ && \ + sed -i.32 's/mediacloud/mediacloud64/' /usr/src/solr/collections/mediacloud64/core.properties && \ + sed -i.32 '/ Date: Tue, 22 Mar 2022 21:30:34 -0400 Subject: [PATCH 04/16] common/Dockerfile: reenable jieba.cache creation & chown it --- apps/common/Dockerfile | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/common/Dockerfile b/apps/common/Dockerfile index 7fbb0b8c4a..b7a575490c 100644 --- a/apps/common/Dockerfile +++ b/apps/common/Dockerfile @@ -139,13 +139,13 @@ RUN \ done # Prebuild Jieba dictionary cache -# PLB 2022-03-18: was creating empty, root owned file - -#COPY bin/build_jieba_dict_cache.py / -#RUN \ -# /build_jieba_dict_cache.py && \ -# rm /build_jieba_dict_cache.py && \ -# true +COPY bin/build_jieba_dict_cache.py / +RUN \ + /build_jieba_dict_cache.py && \ + rm /build_jieba_dict_cache.py && \ + chown mediacloud:mediacloud /var/tmp/jieba.cache && \ + ls -l /var/tmp/jieba.cache && \ + true # Symlink Log::Log4perl configuration to where it's going to be found RUN ln -s /opt/mediacloud/src/common/perl/log4perl.conf /etc/log4perl.conf From 18682966e56f2caf24e09d07d50479a65bb61d35 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Wed, 23 Mar 2022 16:54:51 -0400 Subject: [PATCH 05/16] apps/common/src/python/mediawords/solr/request.py: add/use SOLR_COLLECTION Set to "mediacloud2" (solr alias!) --- apps/common/src/python/mediawords/solr/request.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/common/src/python/mediawords/solr/request.py b/apps/common/src/python/mediawords/solr/request.py index 5694c0e0da..4e79984c6a 100644 --- a/apps/common/src/python/mediawords/solr/request.py +++ b/apps/common/src/python/mediawords/solr/request.py @@ -24,6 +24,8 @@ __QUERY_HTTP_TIMEOUT = 15 * 60 """Timeout of a single HTTP query.""" +# Testing alias!! +SOLR_COLLECTION = 'mediacloud2' class _AbstractSolrRequestException(Exception, metaclass=abc.ABCMeta): """Abstract .solr.request exception.""" @@ -59,7 +61,7 @@ def __wait_for_solr_to_start(config: Optional[CommonConfig]) -> None: """Wait for Solr to start and collections to become available, if needed.""" # search for an empty or rare term here because searching for *:* sometimes causes a timeout for some reason - sample_select_url = f"{config.solr_url()}/mediacloud/select?q=BOGUSQUERYTHATRETURNSNOTHINGNADA&rows=1&wt=json" + sample_select_url = f"{config.solr_url()}/{SOLR_COLLECTION}/select?q=BOGUSQUERYTHATRETURNSNOTHINGNADA&rows=1&wt=json" connected = False @@ -191,7 +193,7 @@ def solr_request(path: str, if not params: params = {} - abs_uri = furl(f"{solr_url}/mediacloud/{path}") + abs_uri = furl(f"{solr_url}/{SOLR_COLLECTION}/{path}") abs_uri = abs_uri.set(params) abs_url = str(abs_uri) From 4ab37292d295722f64040b778cf20eef9106fa69 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Wed, 23 Mar 2022 17:50:13 -0400 Subject: [PATCH 06/16] apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm: speedups for processing backlog up FETCH_BLOCK_SIZE from 100 to 200: ammortizes citus connection startup time by processing fetching larger blocks of text. Honor skip_update_snapshot option (defaults to true): skip setting snapshots.searchable=true --- apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm b/apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm index a260cb109c..223dea7fcd 100644 --- a/apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm +++ b/apps/import-solr-data/src/perl/MediaWords/Solr/Dump.pm @@ -55,7 +55,7 @@ Readonly my @SOLR_FIELDS => qw/stories_id media_id publish_date publish_day publ text title language processed_stories_id tags_id_stories timespans_id/; # how many sentences to fetch at a time from the postgres query -Readonly my $FETCH_BLOCK_SIZE => 100; +Readonly my $FETCH_BLOCK_SIZE => 200; # default time sleep when there are less than MIN_STORIES_TO_PROCESS: Readonly my $DEFAULT_THROTTLE => 60; @@ -601,6 +601,7 @@ Options: * throttle -- sleep this number of seconds between each block of stories (default 60) * full -- shortcut for: update=false, empty_queue=true, throttle=1; assume and optimize for static queue * skip_logging -- skip logging the import into the solr_import_stories or solr_imports tables (default=false) +* skip_update_snapshot -- skip setting snapshots.searchable=true (default=true) The import will run in blocks of "max_queued_stories" at a time. The function will keep trying to find stories to import. If there are less than @@ -627,6 +628,7 @@ sub import_data($;$) my $empty_queue = $options->{ empty_queue } // 0; my $throttle = $options->{ throttle } // $DEFAULT_THROTTLE; my $skip_logging = $options->{ skip_logging } // 0; + my $skip_update_snapshot = $options->{ skip_update_snapshot } // 1; my $daemon = $options->{ daemon } // 0; $_last_max_queue_stories_id = 0; @@ -669,7 +671,7 @@ sub import_data($;$) _save_import_log( $db, $stories_ids ); } - if ( !$skip_logging ) + if ( !$skip_logging && !$skip_update_snapshot ) { _update_snapshot_solr_status( $db ); } From 791248a0edc00154759bef6fb6773d0667ea9918 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Wed, 23 Mar 2022 18:14:15 -0400 Subject: [PATCH 07/16] add apps/solr-base/src/solr/aliases.json with "mediacloud2" solr alias --- apps/solr-base/src/solr/aliases.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 apps/solr-base/src/solr/aliases.json diff --git a/apps/solr-base/src/solr/aliases.json b/apps/solr-base/src/solr/aliases.json new file mode 100644 index 0000000000..c25d98d5cb --- /dev/null +++ b/apps/solr-base/src/solr/aliases.json @@ -0,0 +1 @@ +{"collection":{"mediacloud2":"mediacloud64,mediacloud"}} From c4fb3d600a7d53118806e0bbdbfb0b936a3d9426 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Thu, 24 Mar 2022 22:18:10 -0400 Subject: [PATCH 08/16] apps/common/src/requirements.txt: force MarkupSafe==2.0.1 (Jinja2 2.11.3 can't cope with the new MarkupSafe 2.1.1) --- apps/common/src/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/apps/common/src/requirements.txt b/apps/common/src/requirements.txt index 3bb17a43d9..6b8237199f 100644 --- a/apps/common/src/requirements.txt +++ b/apps/common/src/requirements.txt @@ -43,6 +43,10 @@ furl==2.1.0 # Chinese language tokenizer, stemmer, etc. jieba==0.42.1 +# For Jinja2 2.11.3, which requests MarkupSafe>=0.23 and is now +# getting version 2.1.1, which removed a deprecated function. +MarkupSafe==2.0.1 + # Parsing email templates Jinja2==2.11.3 From 5845a61c4ca3f879a6213a8240d3358c9d9faf6f Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Sun, 27 Mar 2022 17:35:06 -0400 Subject: [PATCH 09/16] solr-zookeeper: preload aliases.json into zookeeper --- apps/solr-zookeeper/bin/init_solr_config.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/apps/solr-zookeeper/bin/init_solr_config.sh b/apps/solr-zookeeper/bin/init_solr_config.sh index 752a435389..965dc25e3a 100755 --- a/apps/solr-zookeeper/bin/init_solr_config.sh +++ b/apps/solr-zookeeper/bin/init_solr_config.sh @@ -41,5 +41,12 @@ for collection_path in /usr/src/solr/collections/*; do fi done +ALIASES=/usr/src/solr/aliases.json +if [ -f $ALIASES ]; then + /opt/solr/server/scripts/cloud-scripts/zkcli.sh \ + -zkhost 127.0.0.1:2181 \ + -cmd putfile /aliases.json $ALIASES +fi + # Stop after initial configuration pkill java From 40391ed953f633643ede7eb6a5b38bb3b63e24e6 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Wed, 27 Apr 2022 12:19:40 -0400 Subject: [PATCH 10/16] apps/postgresql-server/bin/apply_migrations.sh: increase PGCTL_START_TIMEOUT to 3hrs --- apps/postgresql-server/bin/apply_migrations.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/postgresql-server/bin/apply_migrations.sh b/apps/postgresql-server/bin/apply_migrations.sh index bcc2d702e0..77267db3a7 100755 --- a/apps/postgresql-server/bin/apply_migrations.sh +++ b/apps/postgresql-server/bin/apply_migrations.sh @@ -14,7 +14,8 @@ MIGRATIONS_DIR="/opt/postgresql-server/pgmigrate/migrations" TEMP_PORT=12345 # In case the database is in recovery, wait for up to 1 hour for it to complete -PGCTL_START_TIMEOUT=3600 +# PLB: increased to three hours +PGCTL_START_TIMEOUT=10800 if [ ! -d "${MIGRATIONS_DIR}" ]; then echo "Migrations directory ${MIGRATIONS_DIR} does not exist." From f3951163cc58b146ef336e2e0bf05d3d417e4e60 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Fri, 6 May 2022 20:34:08 -0400 Subject: [PATCH 11/16] postgresql-pgbouncer/conf/pgbounder.init: PG server running on postgresql EC2 server w/o docker --- apps/postgresql-pgbouncer/conf/pgbouncer.ini | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/postgresql-pgbouncer/conf/pgbouncer.ini b/apps/postgresql-pgbouncer/conf/pgbouncer.ini index eb3f28662c..fd88573c47 100644 --- a/apps/postgresql-pgbouncer/conf/pgbouncer.ini +++ b/apps/postgresql-pgbouncer/conf/pgbouncer.ini @@ -1,5 +1,6 @@ [databases] -* = host=postgresql-server port=5432 user=mediacloud +; PhilB 5/6/22: PG server running on postgresql EC2 server w/o docker +* = host=postgresql. port=5432 user=mediacloud [pgbouncer] From 76be84476b420bac15a37e34373a580d57863327 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Sat, 7 May 2022 00:15:50 -0400 Subject: [PATCH 12/16] pgbouncer.ini: use postgresql server ip --- apps/postgresql-pgbouncer/conf/pgbouncer.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/postgresql-pgbouncer/conf/pgbouncer.ini b/apps/postgresql-pgbouncer/conf/pgbouncer.ini index fd88573c47..f7a14b215d 100644 --- a/apps/postgresql-pgbouncer/conf/pgbouncer.ini +++ b/apps/postgresql-pgbouncer/conf/pgbouncer.ini @@ -1,6 +1,6 @@ [databases] ; PhilB 5/6/22: PG server running on postgresql EC2 server w/o docker -* = host=postgresql. port=5432 user=mediacloud +* = host=172.30.0.58 port=5432 user=mediacloud [pgbouncer] From bf745541b996e0f7571e1b04c5c857958aed8aa6 Mon Sep 17 00:00:00 2001 From: Phil Budne Date: Wed, 18 May 2022 11:58:56 -0400 Subject: [PATCH 13/16] apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Timespans.pm Removed extra AND to try to fix #833 as suggested by Rahul. --- .../src/perl/MediaWords/Controller/Api/V2/Topics/Timespans.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Topics/Timespans.pm b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Topics/Timespans.pm index 3fbb4e236e..cb87851cc7 100644 --- a/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Topics/Timespans.pm +++ b/apps/webapp-api/src/perl/MediaWords/Controller/Api/V2/Topics/Timespans.pm @@ -88,7 +88,7 @@ SQL snapshots_id FROM timespans AS t where - topics_id = ? AND + topics_id = ? $snapshot_clause $focus_clause $timespan_clause From b75c6caf59e1f586eca32e2ac991c9e847a6bf95 Mon Sep 17 00:00:00 2001 From: Xavier Frankline Date: Wed, 26 Oct 2022 11:22:47 +0300 Subject: [PATCH 14/16] update rabbitmq to include prometheus plugin and update docker compose file --- apps/docker-compose.dist.yml | 6 ++++-- apps/rabbitmq-server/Dockerfile | 2 +- apps/rabbitmq-server/conf/enabled_plugins | 2 +- apps/temporal-prometheus/Dockerfile | 2 +- apps/temporal-prometheus/prometheus.yml | 11 +++++++++-- 5 files changed, 16 insertions(+), 7 deletions(-) diff --git a/apps/docker-compose.dist.yml b/apps/docker-compose.dist.yml index 32fadf7436..0e43ef8ac3 100644 --- a/apps/docker-compose.dist.yml +++ b/apps/docker-compose.dist.yml @@ -1813,7 +1813,8 @@ services: placement: constraints: # Must run on the host with Temporal Grafana data volume - - node.labels.role-temporal-grafana == true + # - node.labels.role-temporal-grafana == true + - node.labels.role-monitoring == true # Worker count replicas: 1 resources: @@ -1909,7 +1910,8 @@ services: placement: constraints: # Must run on the host with Temporal Prometheus data volume - - node.labels.role-temporal-prometheus == true + # - node.labels.role-temporal-prometheus == true + - node.labels.role-monitoring == true # Worker count replicas: 1 resources: diff --git a/apps/rabbitmq-server/Dockerfile b/apps/rabbitmq-server/Dockerfile index b5ff424313..d31e142257 100644 --- a/apps/rabbitmq-server/Dockerfile +++ b/apps/rabbitmq-server/Dockerfile @@ -2,7 +2,7 @@ # RabbitMQ server # -FROM gcr.io/mcback/base:latest +FROM gcr.io/mcback/base:release # Add RabbitMQ APT repository RUN \ diff --git a/apps/rabbitmq-server/conf/enabled_plugins b/apps/rabbitmq-server/conf/enabled_plugins index d8bb228458..402c318d8f 100644 --- a/apps/rabbitmq-server/conf/enabled_plugins +++ b/apps/rabbitmq-server/conf/enabled_plugins @@ -1 +1 @@ -[rabbitmq_amqp1_0,rabbitmq_management,rabbitmq_management_visualiser,rabbitmq_shovel,rabbitmq_shovel_management]. +[rabbitmq_amqp1_0,rabbitmq_management,rabbitmq_management_visualiser,rabbitmq_shovel,rabbitmq_shovel_management,rabbitmq_prometheus]. diff --git a/apps/temporal-prometheus/Dockerfile b/apps/temporal-prometheus/Dockerfile index d5e3036b43..5af91f0f2e 100644 --- a/apps/temporal-prometheus/Dockerfile +++ b/apps/temporal-prometheus/Dockerfile @@ -2,7 +2,7 @@ # Prometheus for Temporal stats # -FROM gcr.io/mcback/base:latest +FROM gcr.io/mcback/base:release RUN \ mkdir -p /opt/prometheus/ && \ diff --git a/apps/temporal-prometheus/prometheus.yml b/apps/temporal-prometheus/prometheus.yml index 0a62dfbacb..6e361cf34d 100644 --- a/apps/temporal-prometheus/prometheus.yml +++ b/apps/temporal-prometheus/prometheus.yml @@ -1,6 +1,6 @@ global: - scrape_interval: 5s - scrape_timeout: 5s + scrape_interval: 15s + scrape_timeout: 30s scrape_configs: @@ -20,3 +20,10 @@ scrape_configs: - 'temporal-server:9093' # worker - 'temporal-server:9094' + + # rabbitmq monitoring from rabbitmq_prometheus plugin + - job_name: 'rabbitmq' + static_configs: + - targets: + - "localhost:15692" + From 1e9f0c30025b6d72b739aec4b9ab34257ba88571 Mon Sep 17 00:00:00 2001 From: Xavier Frankline Date: Wed, 2 Nov 2022 08:09:56 +0300 Subject: [PATCH 15/16] add prometheus alertmanager --- apps/docker-compose.dist.yml | 36 +++++++- apps/temporal-alertmanager/.dockerignore | 92 +++++++++++++++++++++ apps/temporal-alertmanager/Dockerfile | 28 +++++++ apps/temporal-alertmanager/alertmanager.yml | 14 ++++ apps/temporal-prometheus/alert.rules | 41 +++++++++ apps/temporal-prometheus/prometheus.yml | 13 ++- 6 files changed, 222 insertions(+), 2 deletions(-) create mode 100644 apps/temporal-alertmanager/.dockerignore create mode 100644 apps/temporal-alertmanager/Dockerfile create mode 100644 apps/temporal-alertmanager/alertmanager.yml create mode 100644 apps/temporal-prometheus/alert.rules diff --git a/apps/docker-compose.dist.yml b/apps/docker-compose.dist.yml index 0e43ef8ac3..84a3960c6f 100644 --- a/apps/docker-compose.dist.yml +++ b/apps/docker-compose.dist.yml @@ -2239,7 +2239,33 @@ services: # RAM limit memory: "2G" - + # + # Temporal Prometheus (Temporal's statistics store) + # ------------------------------------------------- + # + temporal-alertmanager: + image: thepsalmist/temporal-alertmanager:release_monitoring_v2 + init: true + depends_on: + - temporal-prometheus + networks: + - default + expose: + - "9093" + volumes: + - vol_temporal_alertmanager_data:/opt/alertmanager/data/ + deploy: + <<: *endpoint-mode-dnsrr + placement: + constraints: + # Must run on the host with Temporal Alertmanager data volume + - node.labels.role-monitoring == true + # Worker count + replicas: 1 + resources: + limits: + cpus: "1" + memory: "1G" # # Networks # ======== @@ -2546,3 +2572,11 @@ volumes: type: none o: bind device: /space/mediacloud/vol_temporal_grafana_data + + # Temporal Grafana data + vol_temporal_alertmanager_data: + driver: local + driver_opts: + type: none + o: bind + device: /space/mediacloud/vol_temporal_alertmanager_data diff --git a/apps/temporal-alertmanager/.dockerignore b/apps/temporal-alertmanager/.dockerignore new file mode 100644 index 0000000000..9b2c362a80 --- /dev/null +++ b/apps/temporal-alertmanager/.dockerignore @@ -0,0 +1,92 @@ +# +# Files from the build context to be ignored by "docker build". +# +# You might want to add as many of constantly changing files here as possible +# to prevent container's image from getting rebuilt every full moon. +# +# Unfortunately, we can't just symlink this file to every app's directory: +# +# https://github.com/moby/moby/issues/12886 +# +# so for the time being you have to manually copy this file to every app +# subdirectory: +# +# cd apps/ +# find . -maxdepth 1 -type d \( ! -name . \) -exec bash -c "cd '{}' && cp ../dockerignore.dist ./.dockerignore" \; +# + +*$py.class +*.cover +*.DS_Store +*.egg +*.egg-info/ +*.log +*.manifest +*.mo +*.pot +*.py[cod] +*.sage.py +*.so +*.spec +*.swp +*/*.py[cod] +*/*.swp +*/*/*.py[cod] +*/*/*.swp +*/*/*/*.py[cod] +*/*/*/*.swp +*/*/*/__pycache__/ +*/*/__pycache__/ +*/__pycache__/ +._* +.apdisk +.AppleDB +.AppleDesktop +.AppleDouble +.cache +.com.apple.timemachine.donotpresent +.coverage +.coverage.* +.dockerignore +.DocumentRevisions-V100 +.DS_Store +.eggs +.env +.fseventsd +.git +.gitignore +.hypothesis +.idea +.installed.cfg +.ipynb_checkpoints +.LSOverride +.mypy_cache +.pytest_cache +.Python +.python-version +.ropeproject +.scrapy +.Spotlight-V100 +.spyderproject +.spyproject +.TemporaryItems +.tox +.Trashes +.venv +.VolumeIcon.icns +.webassets-cache +__pycache__ +celerybeat-schedule +coverage.xml +Icon +local_settings.py +Network Trash Folder +nosetests.xml +parts +pip-delete-this-directory.txt +pip-log.txt +sdist +Temporary Items +wheels +_Inline + diff --git a/apps/temporal-alertmanager/Dockerfile b/apps/temporal-alertmanager/Dockerfile new file mode 100644 index 0000000000..6225929070 --- /dev/null +++ b/apps/temporal-alertmanager/Dockerfile @@ -0,0 +1,28 @@ +FROM gcr.io/mcback/base:release + +RUN \ + mkdir -p /opt/alertmanager/ && \ + /dl_to_stdout.sh "https://github.com/prometheus/alertmanager/releases/download/v0.24.0/alertmanager-0.24.0.linux-$(dpkg --print-architecture).tar.gz" | \ + tar -zx -C /opt/alertmanager/ --strip 1 && \ + true + +COPY alertmanager.yml /opt/alertmanager/alertmanager.yml + +# Add unprivileged user the service will run as +RUN \ + useradd -ms /bin/bash temporal && \ + mkdir -p /opt/alertmanager/data/ && \ + chown temporal:temporal /opt/alertmanager/data/ && \ + true + +WORKDIR /opt/alertmanager/ + +ENV PATH="/opt/alertmanager:${PATH}" + +EXPOSE 9093 + +USER temporal + +VOLUME /opt/alertmanager/data + +CMD ["alertmanager"] \ No newline at end of file diff --git a/apps/temporal-alertmanager/alertmanager.yml b/apps/temporal-alertmanager/alertmanager.yml new file mode 100644 index 0000000000..ca3e858ca1 --- /dev/null +++ b/apps/temporal-alertmanager/alertmanager.yml @@ -0,0 +1,14 @@ +route: + receiver: 'mail' + repeat_interval: 4h + group_by: [ alertname ] + + +receivers: + - name: 'mail' + email_configs: + - smarthost: 'smtp.gmail.com:587' + auth_username: 'testmail@gmail.com' + auth_password: "temppass" + from: 'fromemail' + to: 'toemail' \ No newline at end of file diff --git a/apps/temporal-prometheus/alert.rules b/apps/temporal-prometheus/alert.rules new file mode 100644 index 0000000000..65d2dbf2a8 --- /dev/null +++ b/apps/temporal-prometheus/alert.rules @@ -0,0 +1,41 @@ +groups: +- name: sample + rules: + + - alert: service-down + expr: up{job='prometheus'} == 0 + for: "30s" + labels: + severity: page + annotations: + summary: "Service down" + description: "Sample service down" + +- name: rabbitmq_alerts + rules: + + - alert: rabbitmq_down + expr: up{job='rabbitmq'} == 0 + for: "30s" + labels: + severity: page + annotations: + summary: "Rabbitmq service down" + + - alert: RabbitmqQueueFillingUp + expr: rabbitmq_queue_messages{queue="MediaWords::Job::ExtractAndVector"} > 10000 + for: 5m + labels: + severity: critical + annotations: + summary: "Rabbitmq queue filling up" + description: "Queue is filling up" + + - alert: RabbitmqTooManyMessagesInQueue + expr: rabbitmq_queue_messages_ready{queue="MediaWords::Job::ExtractAndVector"} > 40000 + for: 5m + labels: + severity: warning + annotations: + summary: "Rabbitmq too many mesages in queue" + description: "Queue is filling up (> 20000 msgs)" diff --git a/apps/temporal-prometheus/prometheus.yml b/apps/temporal-prometheus/prometheus.yml index 6e361cf34d..b4209f0e04 100644 --- a/apps/temporal-prometheus/prometheus.yml +++ b/apps/temporal-prometheus/prometheus.yml @@ -2,9 +2,19 @@ global: scrape_interval: 15s scrape_timeout: 30s +rule_files: + - 'alert.rules' + +alerting: + alertmanagers: + - static_configs: + - targets: + - "alertmanager:9093" + scrape_configs: - job_name: 'prometheus' + metrics_path: /metrics static_configs: - targets: - 'localhost:9090' @@ -23,7 +33,8 @@ scrape_configs: # rabbitmq monitoring from rabbitmq_prometheus plugin - job_name: 'rabbitmq' + metrics_path: /metrics static_configs: - targets: - - "localhost:15692" + - "rabbitmq-server:15692" From f8bf9cf58c129c94472c73ab6425e6c6a232710f Mon Sep 17 00:00:00 2001 From: Xavier Frankline Date: Wed, 2 Nov 2022 08:14:51 +0300 Subject: [PATCH 16/16] alertmanager email config variabes --- apps/temporal-alertmanager/alertmanager.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/apps/temporal-alertmanager/alertmanager.yml b/apps/temporal-alertmanager/alertmanager.yml index ca3e858ca1..e9f6c992b3 100644 --- a/apps/temporal-alertmanager/alertmanager.yml +++ b/apps/temporal-alertmanager/alertmanager.yml @@ -7,8 +7,8 @@ route: receivers: - name: 'mail' email_configs: - - smarthost: 'smtp.gmail.com:587' - auth_username: 'testmail@gmail.com' - auth_password: "temppass" - from: 'fromemail' - to: 'toemail' \ No newline at end of file + - smarthost: ${EMAIL_HOST} + auth_username: ${EMAIL_USERNAME} + auth_password: ${EMAIL_PASSWORD} + from: ${FROM_EMAIL} + to: ${TO_EMAIL} \ No newline at end of file