Skip to content

Commit

Permalink
[#4] add documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
gricertg authored and gricertg committed Aug 18, 2020
1 parent 1047b1c commit 1cefded
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 26 deletions.
5 changes: 5 additions & 0 deletions essnapshot/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
"""A tool for creating Elasticsearch snapshots and deleting old ones.
See https://github.com/ctriegg/essnapshot for details.
"""

__version__ = '0.1.0'
23 changes: 18 additions & 5 deletions essnapshot/cli.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""This script is for running the esssnapshot utility interactivly."""
from optparse import OptionParser
from time import sleep
from essnapshot.helpers import open_configfile, snapshot_name
Expand All @@ -7,6 +8,19 @@


def wait_for_running_snapshots(esclient, repository_name):
"""Run in a loop until all running snapshots are done or failed.
Parameters
----------
esclient : Elasticsearch
the client for the elasticsearch cluster to connect to.
repostiory_name : str
the name of the repository to check for running snapshots
Returns
-------
nothing
"""
while check_snapshots_in_progress(
es.get_snapshots(esclient, repository_name)
):
Expand All @@ -15,7 +29,7 @@ def wait_for_running_snapshots(esclient, repository_name):


def main():
# parse the only needed command line parameter
"""Runs the essnapshot tool."""
parser = OptionParser()
parser.add_option("-c", "--config", dest="configfile",
help="Path to configuration file. "
Expand All @@ -24,15 +38,16 @@ def main():
metavar="FILE")
options = parser.parse_args()[0]

# check if configfile parameter is given
if options.configfile is None:
parser.error('No configuration file given.')

# fetch config from configfile
config = open_configfile(options.configfile)

# if the optional es_connections parameter is given, use it
# otherwise we set None to use the default config
esconfig = config['es_connections'] if 'es_connections' in config else None
esclient = es.initialize_es_client(esconfig)

es.connection_check(esclient)
es.ensure_snapshot_repo(
esclient,
Expand All @@ -41,13 +56,11 @@ def main():
wait_for_running_snapshots(esclient, config['repository_name'])
es.create_snapshot(esclient, config['repository_name'], snapshot_name())

# find all snapshots to delete
wait_for_running_snapshots
delete_eligible_snapshots = find_delete_eligible_snapshots(
es.get_snapshots(esclient, config['repository_name']),
config['retention_time'])

# delete snapshots older than the configured retention time
if len(delete_eligible_snapshots) > 0:
es.delete_snapshots(esclient, config['repository_name'],
delete_eligible_snapshots)
Expand Down
111 changes: 99 additions & 12 deletions essnapshot/es.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,40 @@
"""This module contains functions for ES communication used by essnapshot."""
import sys
from elasticsearch import Elasticsearch, NotFoundError
from elasticsearch import TransportError, ConnectionError


def initialize_es_client(es_connections: list):
"""initialize an instance of the ES client and return it."""
def initialize_es_client(es_connections: list) -> Elasticsearch:
"""initialize an instance of the ES client and return it.
Parameters
----------
es_connections : list
the parameter is optional. It should contain hosts definitions
as decribed in
https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch
Returns
-------
Elasticsearch
An Elasticsearch instance client is returned for further usage
"""
return Elasticsearch(es_connections)


def connection_check(esclient):
"""Make sure that the connection to the ES cluster is working"""
def connection_check(esclient: Elasticsearch) -> bool:
"""Make sure that the connection to the ES cluster is working
Parameters
----------
esclient : Elasticsearch
the client for the ES cluster which should be checked must be given
Returns
-------
bool
The function will return True if the ES cluster is online
"""
if not esclient.ping():
print("Can't connect to ES Cluster.", file=sys.stderr)
try:
Expand All @@ -21,10 +46,25 @@ def connection_check(esclient):


def ensure_snapshot_repo(
esclient,
esclient: Elasticsearch,
repository_name: str,
repository_config: dict):
"""Check if snapshot repo exists, if not, create it."""
"""Check if snapshot repo exists, if not, create it.
Parameters
----------
esclient : Elasticsearch
the client for the ES cluster to connect to
repository_name : str
the name of the repository to ensure
repository_config : dict
the configuration of the ES snapshot, described as body under
https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.SnapshotClient.create
Returns
-------
nothing
"""
try:
snapshot_repo = esclient.snapshot.get_repository(
repository=repository_name)
Expand All @@ -45,8 +85,26 @@ def ensure_snapshot_repo(
exit(1)


def create_snapshot(esclient, repository_name: str, snapshot_name: str):
"""Create a new snapshot, include the timestamp in the name."""
def create_snapshot(
esclient: Elasticsearch,
repository_name: str,
snapshot_name: str) -> bool:
"""Creates a new snapshot Elasticsearch snapshot
Parameters
----------
esclient : Elasticsearch
the client for the ES cluster to connect to
repository_name : str
the name of the snapshot repository to use
snapshot_name : str
The name of the Elasticsearch snapshot
Returns
-------
bool
returns True if the creation of the snapshot was successful
"""
snapshot_return = esclient.snapshot.create(
repository=repository_name,
snapshot=snapshot_name)
Expand All @@ -58,14 +116,43 @@ def create_snapshot(esclient, repository_name: str, snapshot_name: str):
return True


def get_snapshots(esclient, repository_name: str):
"""Get the list of all snapshots in the given repository."""
def get_snapshots(esclient, repository_name: str) -> list:
"""Get all snapshots in the given repository and return them as list.
Parameters
----------
esclient : Elasticsearch
the client for the ES cluster to connect to
repository_name : str
the name of the snapshot repository to use
Returns
-------
list
a list of multiple dictionaries (one per snapshot) is returned, see
https://elasticsearch-py.readthedocs.io/en/master/api.html#elasticsearch.client.CatClient.snapshots
"""
# pylint: disable=unexpected-keyword-arg
return esclient.cat.snapshots(repository=repository_name, format='json')


def delete_snapshots(esclient, repository_name: str, snapshots: list):
"""Deletes all snapshots in a list in the given repository."""
def delete_snapshots(esclient, repository_name: str, snapshots: list) -> bool:
"""Deletes all snapshots in a list in the given repository
Parameters
----------
esclient : Elasticsearch
the client for the ES cluster to connect to
repository_name : str
the name of the snapshot repository to use
snapshots : list
a list of snapshot names to delete
Returns
-------
bool
returns True if the delete operation was successful
"""
delete_return = esclient.snapshot.delete(
repository=repository_name,
snapshot=snapshots)
Expand Down
83 changes: 74 additions & 9 deletions essnapshot/helpers.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,29 @@
"""This module contains helper functions for the essnapshot utility."""
import sys
import re
import yaml
from pathlib import Path
from datetime import datetime, timezone, timedelta


def retention_timedelta(time_string: str):
"""Retruns the given retention time from String as timedelta object"""
def retention_timedelta(time: str) -> timedelta:
"""returns the given retention time from String as timedelta object
Parameters
----------
time : str
A String in the Format <digit>*<S|M|H|D, for e.g. 30D for 30 days
Returns
-------
timedelta
a timedelta object generated from the time string is returned
"""
pattern = re.compile(r"^(?P<value>\d+)(?P<unit>[a-zA-Z])?$")
match = pattern.match(time_string)
match = pattern.match(time)
if not match:
raise ValueError("Unable to parse given time String {t}."
.format(t=time_string))
.format(t=time))
if match.group('unit'):
unit = match.group('unit').upper()
else:
Expand All @@ -29,8 +41,22 @@ def retention_timedelta(time_string: str):
raise ValueError("Unsupported time unit {u}".format(u=unit))


def open_configfile(filepath):
"""returns yaml config from file if file exists and is valid yaml"""
def open_configfile(filepath: str) -> dict:
"""returns yaml config from file if file exists and is valid yaml
After the configfile is opened and parsed a check if required parameters
are present.
Parameters
----------
filepath : str
A string which is a valid absolute or relational path to the configfile
Returns
-------
dict
The parsed YAML config file is returned in as a dict
"""
try:
Path(filepath).resolve(strict=True)
except FileNotFoundError as e:
Expand Down Expand Up @@ -60,15 +86,38 @@ def open_configfile(filepath):
return config


def snapshot_name():
def snapshot_name() -> str:
"""returns a name for the snapshot with a date postfix
Parameters
----------
no parameters
Returns
-------
str
The name will look like this: essnapshot_2020-05-12_23-54-01
"""

snapshot_timestamp = datetime.utcnow()
timestamp_string = snapshot_timestamp.strftime("%Y-%m-%d_%H-%M-%S")
snapshot_name = "essnapshot_{d}".format(d=timestamp_string)
return snapshot_name


def check_snapshots_in_progress(snapshots: list):
"""Checks the list of snapshots for shit"""
"""Checks if there are snapshots in the IN_PROGRESS state
Parameters
----------
snapshots : list
a list of snapshots returned from ES must be provided
Returns
-------
bool
returns true if any snapshots are IN_PROGRESS, otherwise false
"""
if len([s['id'] for s in snapshots if s['status'] == 'IN_PROGRESS']) > 0:
return True
else:
Expand All @@ -78,7 +127,23 @@ def check_snapshots_in_progress(snapshots: list):
def find_delete_eligible_snapshots(
snapshots: list,
retention_time: str,
from_time=datetime.now(timezone.utc)):
from_time: datetime = datetime.now(timezone.utc)) -> list:
"""Find snapshots older than the given retention time
Parameters
----------
snapshots : list
a list of snapshots returned from ES must be provided
retention_time : str
the time string which will be parsed by retention_timedelta
from_time : datetime
the point in time from which to start the calculaton (should be now)
Returns
-------
list
a list of all delete eligible snapshot names will be returned
"""
delete_eligible_snapshots = []
for snapshot in snapshots:
snapshot_timestamp = datetime.fromtimestamp(int(snapshot['end_epoch']),
Expand Down

0 comments on commit 1cefded

Please sign in to comment.