Skip to content

Commit

Permalink
Add option to print log in JSON format
Browse files Browse the repository at this point in the history
  • Loading branch information
afoix committed Sep 20, 2021
1 parent d2b7dee commit deeaf81
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 18 deletions.
42 changes: 28 additions & 14 deletions pyega3/libs/pretty_printing.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
import logging
import json

from .utils import status_ok


def pretty_print_authorized_datasets(datasets):
logging.info("Dataset ID")
logging.info("-----------------")
for dataset in datasets:
logging.info(dataset.id)
def pretty_print_authorized_datasets(datasets, as_json):
if as_json:
logging.info(json.dumps([{"id": dataset.id for dataset in datasets}]))
else:
logging.info("Dataset ID")
logging.info("-----------------")
for dataset in datasets:
logging.info(dataset.id)


def pretty_print_files_in_dataset(files):
def pretty_print_files_in_dataset(files, as_json):
"""
Print a table of files in authorized dataset from api call api_list_files_in_dataset
Expand All @@ -26,12 +30,22 @@ def pretty_print_files_in_dataset(files):
}
"""
format_string = "{:15} {:6} {:12} {:36} {}"

logging.info(format_string.format("File ID", "Status", "Bytes", "Check sum", "File name"))
for file in files:
logging.info(format_string.format(file.id, status_ok(file.status), str(file.size),
file.unencrypted_checksum, file.display_name))

logging.info('-' * 80)
logging.info("Total dataset size = %.2f GB " % (sum(file.size for file in files) / (1024 * 1024 * 1024.0)))
if as_json:
logging.info(json.dumps([{
"id": file.id,
"status": file.status,
"bytes": file.size,
"checksum": file.unencrypted_checksum,
"name": file.display_name
} for file in files]))
else:
format_string = "{:15} {:6} {:12} {:36} {}"

logging.info(format_string.format("File ID", "Status", "Bytes", "Check sum", "File name"))
for file in files:
logging.info(format_string.format(file.id, status_ok(file.status), str(file.size),
file.unencrypted_checksum, file.display_name))

logging.info('-' * 80)
logging.info("Total dataset size = %.2f GB " % (sum(file.size for file in files) / (1024 * 1024 * 1024.0)))
5 changes: 3 additions & 2 deletions pyega3/pyega3.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def main():
parser.add_argument("-t", "--test", action="store_true", help="Test user activated")
parser.add_argument("-ms", "--max-slice-size", type=int, default=DataFile.DEFAULT_SLICE_SIZE,
help="Set maximum size for each slice in bytes (default: 100 MB)")
parser.add_argument("-j", "--json", action="store_true", help="Output data in JSON format instead of tables")

subparsers = parser.add_subparsers(dest="subcommand", help="subcommands")

Expand Down Expand Up @@ -137,15 +138,15 @@ def main():

if args.subcommand == "datasets":
datasets = DataSet.list_authorized_datasets(data_client)
pretty_print_authorized_datasets(datasets)
pretty_print_authorized_datasets(datasets, args.json)

if args.subcommand == "files":
if args.identifier[3] != 'D':
logging.error("Unrecognized identifier - please use EGAD accession for dataset requests")
sys.exit()
dataset = DataSet(data_client, args.identifier)
files = dataset.list_files()
pretty_print_files_in_dataset(files)
pretty_print_files_in_dataset(files, args.json)

elif args.subcommand == "fetch":
genomic_range_args = (args.reference_name, args.reference_md5, args.start, args.end, args.format)
Expand Down
34 changes: 32 additions & 2 deletions test/test_pretty_printing.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
import json

from pyega3.libs.data_file import DataFile
from pyega3.libs.data_set import DataSet
from pyega3.libs.pretty_printing import *


def test_pretty_print_authorized_datasets(mock_data_client):
pretty_print_authorized_datasets([DataSet(mock_data_client, 'EGAD0123')])
pretty_print_authorized_datasets([DataSet(mock_data_client, 'EGAD0123')], False)


def test_pretty_print_authorized_datasets_in_json(mock_data_client, caplog):
caplog.set_level(logging.INFO)
pretty_print_authorized_datasets([DataSet(mock_data_client, 'EGAD0123')], True)
assert len(caplog.messages) == 1
assert json.loads(caplog.messages[0]) == [{"id": "EGAD0123"}]


def test_pretty_print_files_in_dataset(mock_data_client):
Expand All @@ -14,4 +23,25 @@ def test_pretty_print_files_in_dataset(mock_data_client):
size=0,
unencrypted_checksum="MD5SUM678901234567890123456789012",
status="available")]
pretty_print_files_in_dataset(test_reply)
pretty_print_files_in_dataset(test_reply, False)


def test_pretty_print_files_in_dataset_in_json(mock_data_client, caplog):
caplog.set_level(logging.INFO)
test_reply = [DataFile(mock_data_client, "EGAF00001753747",
display_file_name="EGAZ00001314035.bam.bai.cip",
file_name="EGAZ00001314035.bam.bai.cip",
size=0,
unencrypted_checksum="MD5SUM678901234567890123456789012",
status="available")]
pretty_print_files_in_dataset(test_reply, True)

assert len(caplog.messages) == 1

output_object = json.loads(caplog.messages[0])
assert len(output_object) == 1
assert output_object[0]["id"] == test_reply[0].id
assert output_object[0]["name"] == test_reply[0].display_name
assert output_object[0]["bytes"] == test_reply[0].size
assert output_object[0]["checksum"] == test_reply[0].unencrypted_checksum
assert output_object[0]["status"] == test_reply[0].status

0 comments on commit deeaf81

Please sign in to comment.