From 5556379061078cfe7b7a80209a4fc2b510f52cf9 Mon Sep 17 00:00:00 2001 From: nntrn <17685332+nntrn@users.noreply.github.com> Date: Thu, 4 Jul 2024 18:27:31 -0500 Subject: [PATCH] Update code * Add TOC anchor link * Replace meta url with foundry url /api/views/ -> dev.socrata.com/foundry// * Exclude datasets with year in the title for catalogs with more than 2k items --- docs/resources.md | 2 +- scripts/update-catalogs.sh | 17 ++++++++--------- scripts/views.jq | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 37 insertions(+), 18 deletions(-) diff --git a/docs/resources.md b/docs/resources.md index 37178b4..7f09e21 100644 --- a/docs/resources.md +++ b/docs/resources.md @@ -147,7 +147,7 @@ https://api.us.socrata.com/api/catalog/v1?ids={{ids}} https://api.us.socrata.com/api/catalog/v1?derived= * Sort order: `/catalog/v1{?order}` - https://api.us.socrata.com/api/catalog/v1?order= + https://api.us.socrata.com/api/catalog/v1?order=updatedAt%20DESC * Pagination: `/catalog/v1{?offset,limit}` https://api.us.socrata.com/api/catalog/v1?offset,limit= diff --git a/scripts/update-catalogs.sh b/scripts/update-catalogs.sh index 561dfa3..0477f67 100755 --- a/scripts/update-catalogs.sh +++ b/scripts/update-catalogs.sh @@ -24,24 +24,23 @@ mkdir -p $OUTDIR mkdir -p $DOCDIR download_data() { - local data_out="$OUTDIR/${1}.json" local doc_out="$DOCDIR/${1}.md" + local catalog_url="$2" if [[ ! -f $data_out ]]; then - curl -s --create-dirs -o $data_out "$2" --fail + curl -s --create-dirs -o $data_out "$catalog_url" --fail _pids+=("$!") fi - echo "$doc_out" - - local JQ_EXPR='include "views"; results|write_markdown("category")' + echo -e "$doc_out\t$catalog_url" - if [[ $3 == "domain" ]]; then - JQ_EXPR='include "views"; results|write_markdown("domain")' - fi + jq -L $DIR/scripts -r \ + --arg catalog "$catalog_url" \ + --arg group "${3:-category}" \ + 'include "views"; results|write_markdown($group)' \ + $data_out >$doc_out - jq -L $DIR/scripts -r "$JQ_EXPR" $data_out >$doc_out _pids+=("$!") } diff --git a/scripts/views.jq b/scripts/views.jq index 4828dc1..eb47749 100644 --- a/scripts/views.jq +++ b/scripts/views.jq @@ -28,6 +28,7 @@ def neat_view: name, url: "https://\(.domaincname)/resource/\(.id).json", meta_url: "https://\(.domaincname)/api/views/\(.id)", + foundry_url: "https://dev.socrata.com/foundry/\(.domaincname)/\(.id)", category: ( .metadata_custom_fields_dataset_category_category_tile // .category // @@ -71,13 +72,26 @@ def format_title: def format_title($str): $str | format_title; +# export EXCLUDE_EXPR='[2][0-2][0-3][0-9]|FY[0-2][0-9]' +def default_exclude: [ + "ARCHIVED","BOUNDARIES","PLANNINGCADASTRE","UTILITIESCOMMUNICATION", + "GEOSCIENTIFICINFORMATION","GUIDE","TRANSPORTATION", + "Test","TEST","deprecated","demo","Demo", + (env.EXCLUDE_EXPR? // "") + ] | map(select(length > 1))| join("|") | "(\(.))"; + +def exclude(field;$str):select(field|test("\($str)";"ix")|not) ; + def write_markdown($groupby): (input_filename|title_from_filename) as $filename - | map(select((.category|length)>0)) - | map(select(.name|(test("[A-Z][a-z]";"x") and (test("DEMO|[Dd]emo|TEST|[Tt]est|ARCHIVE|[Aa]rchive|UTILITIES")|not) ) )) + | map(select((.category|length)>0 and (.name|test("[A-Z][a-z]";"x"))) | + exclude(.name;"DEMO|[Dd]emo|TEST|[Tt]est|ARCHIVE|[Aa]rchive|UTILITIES") + ) + | length as $total + | map(if $total > 2000 then exclude(.name; "[2][0-2][0-3][0-9]|FY[0-2][0-9]") else . end) | group_by(.["\($groupby)"]) | ([ - "
Table of Contents", + "
Table of Contents", "", map("- [\(.[0][$groupby])](#\(slugify(.[0][$groupby])))"), "", @@ -85,17 +99,23 @@ def write_markdown($groupby): "", "> **NOTE** ", "> (%) denotes strategic dataset", - "" - ]|flatten|join("\n")) as $toc + "", + "Data source: \((env.CATALOG_URL // $catalog )? // "" )" + ]|flatten|join("\n") + ) as $toc | map( "\n## \(.[0][$groupby])\n\n" + ( sort_by(.name) - | map(["- **\(.name)**","[Data](\(.url)) | [Meta](\(.meta_url)) | Last update: \(.last_update)",.summary] - | map(select(length > 0))|join(" \n "))|join("\n\n") + | map([ + "- **\(.name)**","[Data](\(.url)) | [Docs](\(.foundry_url)) | Last update: \(.last_update)", + .summary] | map(select(length > 0))|join(" \n ") ) + | join("\n\n") + "\n\n[[TOP]](#toc)" ) ) | flatten - | join("\n\n")| "# \($filename)\n\n\($toc)\n\n\(.)"; + | join("\n\n") + | ["# \($filename)","", $toc,"", . ] + | join("\n"); def write_markdown: write_markdown(.category);