Skip to content

Commit 54f324a

Browse files
committed
CI: Enable paddle paddle evaluation
Follow up d2l-ai#1281 Adds the missing evaluation of paddlepaddle in the D2L Infra 2.0 for d2l-zh.
1 parent e709c64 commit 54f324a

File tree

5 files changed

+106
-2
lines changed

5 files changed

+106
-2
lines changed
+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/bin/bash
2+
3+
set -ex
4+
5+
# Used to capture status exit of build eval command
6+
ss=0
7+
8+
REPO_NAME="$1" # Eg. 'd2l-zh'
9+
TARGET_BRANCH="$2" # Eg. 'master' ; if PR raised to master
10+
CACHE_DIR="$3" # Eg. 'ci_cache_pr' or 'ci_cache_push'
11+
12+
pip3 install d2l==0.17.6
13+
mkdir _build
14+
15+
source $(dirname "$0")/utils.sh
16+
17+
# Move sanity check outside
18+
d2lbook build outputcheck tabcheck
19+
20+
# Move aws copy commands for cache restore outside
21+
if [ "$DISABLE_CACHE" = "false" ]; then
22+
echo "Retrieving paddle build cache from "$CACHE_DIR""
23+
measure_command_time "aws s3 sync s3://preview.d2l.ai/"$CACHE_DIR"/"$REPO_NAME"-"$TARGET_BRANCH"/_build/eval_paddle/ _build/eval_paddle/ --delete --quiet --exclude 'data/*'"
24+
fi
25+
26+
# Continue the script even if some notebooks in build fail to
27+
# make sure that cache is copied to s3 for the successful notebooks
28+
d2lbook build eval --tab paddle || ((ss=1))
29+
30+
# Move aws copy commands for cache store outside
31+
echo "Upload paddle build cache to s3"
32+
measure_command_time "aws s3 sync _build s3://preview.d2l.ai/"$CACHE_DIR"/"$REPO_NAME"-"$TARGET_BRANCH"/_build --acl public-read --quiet --exclude 'eval*/data/*'"
33+
34+
# Exit with a non-zero status if evaluation failed
35+
if [ "$ss" -ne 0 ]; then
36+
exit 1
37+
fi

.github/workflows/build-docker.yml

+13
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ on:
1212
image_mxnet:
1313
type: boolean
1414
description: Build MXNet Image
15+
image_paddle:
16+
type: boolean
17+
description: Build Paddle Image
1518
image_builder:
1619
type: boolean
1720
description: Build D2L Builder Image
@@ -66,6 +69,16 @@ jobs:
6669
# Clean up to reclaim space
6770
echo "y" | docker system prune -a
6871
72+
- name: Build D2L Paddle Image
73+
if: github.event.inputs.image_paddle == 'true'
74+
run: |
75+
chmod +x ./login_ecr.sh; ./login_ecr.sh
76+
docker build -f Dockerfile.d2l-zh-paddle -t d2l-containers:d2l-zh-paddle-latest .
77+
docker tag d2l-containers:d2l-zh-paddle-latest 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-zh-paddle-latest
78+
docker push 650140442593.dkr.ecr.us-west-2.amazonaws.com/d2l-containers:d2l-zh-paddle-latest
79+
# Clean up to reclaim space
80+
echo "y" | docker system prune -a
81+
6982
- name: Build D2L CPU Builder Image
7083
if: github.event.inputs.image_builder == 'true'
7184
run: |

.github/workflows/ci.yml

+24-2
Original file line numberDiff line numberDiff line change
@@ -83,16 +83,38 @@ jobs:
8383
echo "Terminating Submitted AWS Batch Job: "${{ env.Batch_JobID }}""
8484
aws batch terminate-job --job-id "${{ env.Batch_JobID }}" --reason "Job terminated by cancelled workflow"
8585
86+
build_paddle:
87+
name: Build Paddle
88+
if: "github.repository == 'd2l-ai/d2l-zh' && !contains(github.event.head_commit.message, '[skip paddle]') && !contains(github.event.head_commit.message, '[skip frameworks]')"
89+
runs-on: ubuntu-latest
90+
steps:
91+
- name: Checkout repository
92+
uses: actions/checkout@v3
93+
- name: Setup Env Vars
94+
uses: ./.github/actions/setup_env_vars
95+
- name: Evaluate Paddle on AWS Batch
96+
uses: ./.github/actions/submit-job
97+
with:
98+
job-type: ci-gpu-paddle
99+
job-name: D2L-Build-Paddle
100+
command: chmod +x ./.github/workflow_scripts/build_paddle.sh && ./.github/workflow_scripts/build_paddle.sh "${{ env.REPO_NAME }}" "${{ env.TARGET_BRANCH }}" "${{ env.CACHE_DIR }}"
101+
- name: Terminate Batch Job on Cancellation
102+
if: ${{ cancelled() && env.Batch_JobID }}
103+
run: |
104+
echo "Terminating Submitted AWS Batch Job: "${{ env.Batch_JobID }}""
105+
aws batch terminate-job --job-id "${{ env.Batch_JobID }}" --reason "Job terminated by cancelled workflow"
106+
86107
build_and_deploy:
87108
name: Build Website/PDF & Publish
88-
needs: [build_torch, build_tf, build_mxnet]
109+
needs: [build_torch, build_tf, build_mxnet, build_paddle]
89110
if: |
90111
always() &&
91112
github.repository == 'd2l-ai/d2l-zh' &&
92113
!contains(github.event.head_commit.message, '[skip builder]') &&
93114
(needs.build_torch.result == 'success' || needs.build_torch.result == 'skipped') &&
94115
(needs.build_tf.result == 'success' || needs.build_tf.result == 'skipped') &&
95-
(needs.build_mxnet.result == 'success' || needs.build_mxnet.result == 'skipped')
116+
(needs.build_mxnet.result == 'success' || needs.build_mxnet.result == 'skipped') &&
117+
(needs.build_paddle.result == 'success' || needs.build_paddle.result == 'skipped')
96118
runs-on: ubuntu-latest
97119
steps:
98120
- name: Checkout repository

ci/docker/Dockerfile.d2l-zh-paddle

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Use Paddle 2.3.2 (Dec 2022)
2+
FROM nvcr.io/nvidia/paddlepaddle:22.12-py3
3+
4+
RUN adduser --disabled-password --disabled-login ci
5+
WORKDIR /home/ci
6+
7+
# Copy d2l_job script
8+
ADD d2l_job.sh .
9+
RUN chmod +x d2l_job.sh; chown ci d2l_job.sh
10+
11+
# Copy git timesync for caching
12+
ADD git-timesync /home/ci/.local/bin/
13+
RUN chmod +x /home/ci/.local/bin/git-timesync
14+
15+
# Allow permissions for pip installations and git-timesync
16+
RUN chown -R ci:ci /home/ci/.local
17+
18+
USER ci
19+
20+
ENV PATH="/home/ci/.local/bin:$PATH"
21+
22+
# Install d2lbook using pip + paddlepaddle dependencies
23+
RUN pip3 install git+https://github.com/d2l-ai/d2l-book opencv-python==4.6.0.66
24+
25+
# Python script to print framework versions
26+
ADD print_versions.py .
27+
28+
CMD ["/bin/bash"]

ci/submit-job.py

+4
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@
3535
'ci-gpu-mxnet': {
3636
'job_definition': 'd2l-ci-zh-gpu-mxnet:1',
3737
'job_queue': 'D2L-CI-GPU'
38+
},
39+
'ci-gpu-paddle': {
40+
'job_definition': 'd2l-ci-zh-gpu-paddle:1',
41+
'job_queue': 'D2L-CI-GPU'
3842
}
3943
}
4044

0 commit comments

Comments
 (0)