Skip to content

Basemap vector: sync data sources (schedule) #147

Basemap vector: sync data sources (schedule)

Basemap vector: sync data sources (schedule) #147

name: "Basemap vector: sync data sources"
run-name: "Basemap vector: sync ${{ inputs.source }} data sources (${{ github.event_name }})"
on:
schedule:
- cron: "35 5 * * *"
workflow_dispatch:
inputs:
source:
description: 'Data source'
required: true
default: all
type: choice
options:
- all
- grpk
- stvk
- address-registry
permissions: { }
jobs:
basemap-vector-datasource-address-registry:
name: Address registry sync and transform
runs-on: ubuntu-latest
timeout-minutes: 30
container: ghcr.io/osgeo/gdal:ubuntu-full-3.9.1
permissions:
contents: read
concurrency: basemap-vector-datasource-address-registry
if: ${{ inputs.source == 'all' || inputs.source == 'address-registry' || github.event_name == 'schedule' }}
environment:
name: basemap-vector-datasource-address-registry
url: https://cdn.startupgov.lt/tiles/vector/sources/address-registry/houses-espg-4326.gpkg.zip
outputs:
diff: ${{ steps.comparison.outputs.diff }}
steps:
- name: Install required packages
run: apt-get update && apt-get install -y git csvkit
# Needed only for GitHub action for uploading to S3
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: |
.github
scripts
- name: Generate houses GPKG in EPSG:4326
run: bash scripts/create-houses-geopackage.sh
- name: Create output directory
run: mkdir output
- name: Copy files to output
run: |
cp houses-espg-4326.gpkg.zip output/houses-espg-4326.gpkg.zip
cp data-sources/data-source-checksums.txt output/data-source-checksums.txt
- name: Generate checksum
working-directory: output
run: md5sum houses-espg-4326.gpkg.zip >> checksums.txt
- name: Download latest data source hashes
run: wget -O published-data-source-checksums.txt "https://cdn.biip.lt/tiles/vector/sources/address-registry/data-source-checksums.txt"
- name: Check that the files are the exact same
id: comparison
run: |
{
echo 'diff<<EOF'
(diff -u 'published-data-source-checksums.txt' 'output/data-source-checksums.txt' || :)
echo 'EOF'
} >> "${GITHUB_OUTPUT}"
- name: Upload to S3
uses: ./.github/actions/rclone-sync
if: ${{ github.event_name != 'schedule' || steps.comparison.outputs.diff != '' }}
with:
s3-endpoint: ${{ vars.S3_ENDPOINT }}
s3-access-key-id: ${{ secrets.S3_ACCESS_KEY_ID }}
s3-secret-access-key: ${{ secrets.S3_SECRET_ACCESS_KEY }}
source-path: output
destination-path: /tiles/vector/sources/address-registry
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: address-registry-house-numbers
path: output
if-no-files-found: error
compression-level: 0
basemap-vector-datasource-grpk:
name: GRPK sync and transform
runs-on: ubuntu-latest
# GeoPortal download is really slow. Downloading less than 3 GB takes more than 3 hours :(
timeout-minutes: 360
container: ghcr.io/osgeo/gdal:ubuntu-full-3.9.1
permissions: { }
concurrency: basemap-vector-datasource-grpk
if: ${{ inputs.source == 'all' || inputs.source == 'grpk' || github.event_name == 'schedule' }}
environment:
name: basemap-vector-datasource-grpk
url: https://cdn.startupgov.lt/tiles/vector/sources/grpk/grpk-espg-4326.shp.zip
outputs:
diff: ${{ steps.comparison.outputs.diff }}
steps:
# Needed only for GitHub action for uploading to S3
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: .github
- name: Create output directory
run: mkdir output
- name: Download GRPK from GeoPortal
run: wget -nv https://www.geoportal.lt/download/opendata/GRPK/GRPK_Open_SHP.zip
- name: Calculate GRPK_Open_SHP.zip checksum
run: md5sum GRPK_Open_SHP.zip >> output/data-source-checksums.txt
- name: Unzip GRPK data source
run: unzip -j GRPK_Open_SHP.zip -x "*.txt" -d GRPK_Open_SHP && rm GRPK_Open_SHP.zip
- name: Transform to EPSG:4326
run: ogr2ogr -f "ESRI Shapefile" -lco ENCODING=UTF-8 -t_srs EPSG:4326 output/grpk-espg-4326.shp.zip GRPK_Open_SHP
- name: Generate checksum
working-directory: output
run: md5sum grpk-espg-4326.shp.zip >> checksums.txt
- name: Download latest data source hashes
run: wget -O published-data-source-checksums.txt "https://cdn.biip.lt/tiles/vector/sources/grpk/data-source-checksums.txt"
- name: Check that the files are the exact same
id: comparison
run: |
{
echo 'diff<<EOF'
(diff -u 'published-data-source-checksums.txt' 'output/data-source-checksums.txt' || :)
echo 'EOF'
} >> "${GITHUB_OUTPUT}"
- name: Upload to S3
uses: ./.github/actions/rclone-sync
if: ${{ github.event_name != 'schedule' || steps.comparison.outputs.diff != '' }}
with:
s3-endpoint: ${{ vars.S3_ENDPOINT }}
s3-access-key-id: ${{ secrets.S3_ACCESS_KEY_ID }}
s3-secret-access-key: ${{ secrets.S3_SECRET_ACCESS_KEY }}
source-path: output
destination-path: /tiles/vector/sources/grpk
basemap-vector-datasource-stvk:
name: STVK sync and transform
runs-on: ubuntu-latest
timeout-minutes: 15
container: ghcr.io/osgeo/gdal:ubuntu-full-3.9.1
permissions: { }
concurrency: basemap-vector-datasource-stvk
if: ${{ inputs.source == 'all' || inputs.source == 'stvk' }}
environment:
name: basemap-vector-datasource-stvk
url: https://cdn.startupgov.lt/tiles/vector/sources/stvk/stvk-4326.gpkg.zip
steps:
# Needed only for GitHub action for uploading to S3
- name: Checkout repository
uses: actions/checkout@v4
with:
sparse-checkout: .github
- name: Create output directory
run: mkdir output
- name: Generate STVK GPKG in EPSG:4326
working-directory: output
run: |
ogr2ogr -t_srs EPSG:4326 -dsco DATETIME_FORMAT=UTC -nlt CONVERT_TO_LINEAR -dsco VERSION=1.4 stvk-4326.gpkg.zip \
WFS:"https://services.stvk.lt/wfs/stvk-services?SERVICE=WFS&REQUEST=GetCapabilities&VERSION=2.0.0" \
nac_parkai reg_parkai valstybiniai_rezervatai valstybiniai_draustiniai
- name: Generate checksum
working-directory: output
run: md5sum stvk-4326.gpkg.zip >> checksums.txt
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: stvk
path: output
if-no-files-found: error
compression-level: 0
- name: Upload to S3
uses: ./.github/actions/rclone-sync
with:
s3-endpoint: ${{ vars.S3_ENDPOINT }}
s3-access-key-id: ${{ secrets.S3_ACCESS_KEY_ID }}
s3-secret-access-key: ${{ secrets.S3_SECRET_ACCESS_KEY }}
source-path: output
destination-path: /tiles/vector/sources/stvk
create-release:
name: Create release
if: ${{ needs.basemap-vector-datasource-address-registry.outputs.diff != '' || needs.basemap-vector-datasource-grpk.outputs.diff != '' }}
runs-on: ubuntu-latest
concurrency: basemap-vector-datasource-create-release
needs:
- basemap-vector-datasource-address-registry
- basemap-vector-datasource-grpk
permissions:
contents: read
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Get Token for creating new Release
id: get_workflow_token
uses: peter-murray/workflow-application-token-action@v3
with:
application_id: ${{ secrets.APPLICATION_ID }}
application_private_key: ${{ secrets.APPLICATION_PRIVATE_KEY }}
permissions: "contents:write"
revoke_token: true
- name: Get next version
uses: reecetech/[email protected]
id: version
with:
scheme: semver
increment: patch
- name: Release
uses: softprops/action-gh-release@v2
with:
make_latest: false
generate_release_notes: false
tag_name: ${{ steps.version.outputs.version }}
token: ${{ steps.get_workflow_token.outputs.token }}
body: |
Automated release after data source update
### GRPK
```
${{ needs.basemap-vector-datasource-grpk.outputs.diff }}
```
### Address registry
```
${{ needs.basemap-vector-datasource-address-registry.outputs.diff }}
```