Skip to content

Commit

Permalink
Fix the jobs tab in the beta dashboard and fill it with data from bot…
Browse files Browse the repository at this point in the history
…h "submission" jobs and "driver" jobs (ray-project#25902)

## Why are these changes needed?
- Fixes the jobs tab in the new dashboard. Previously it didn't load.
- Combines the old job concept, "driver jobs" and the new job submission conception into a single concept called "jobs". Jobs tab shows information about both jobs.

- Updates all job APIs: They now returns both submission jobs and driver jobs. They also contains additional data in the response including "id", "job_id", "submission_id", and "driver". They also accept either job_id or submission_id as input.

- Job ID is the same as the "ray core job id" concept. It is in the form of "0100000" and is the primary id to represent jobs.
- Submission ID is an ID that is generated for each ray job submission. It is in the form of "raysubmit_12345...". It is a secondary id that can be used if a client needs to provide a self-generated id. or if the job id doesn't exist (ex: if the submission job doesn't create a ray driver)

This PR has 2 deprecations
- The `submit_job` sdk now accepts a new kwarg `submission_id`. `job_id is deprecated.
- The `ray job submit` CLI now accepts `--submission-id`. `--job-id` is deprecated.

**This PR has 4 backwards incompatible changes:**
- list_jobs sdk now returns a list instead of a dictionary
- the `ray job list` CLI now prints a list instead of a dictionary
- The `/api/jobs` endpoint returns a list instead of a dictionary
- The `POST api/jobs` endpoint (submit job) now returns a json with `submission_id` field instead of `job_id`.
  • Loading branch information
alanwguo authored Jul 27, 2022
1 parent 30ed942 commit 5d6bc53
Show file tree
Hide file tree
Showing 17 changed files with 541 additions and 183 deletions.
26 changes: 14 additions & 12 deletions dashboard/client/src/pages/job/hook/useJobList.ts
Original file line number Diff line number Diff line change
@@ -1,24 +1,23 @@
import { useCallback, useEffect, useRef, useState } from "react";
import { useCallback, useContext, useEffect, useRef, useState } from "react";
import { GlobalContext } from "../../../App";
import { getJobList } from "../../../service/job";
import { Job } from "../../../type/job";
import { UnifiedJob } from "../../../type/job";

export const useJobList = () => {
const [jobList, setList] = useState<Job[]>([]);
const [jobList, setList] = useState<UnifiedJob[]>([]);
const [page, setPage] = useState({ pageSize: 10, pageNo: 1 });
const [msg, setMsg] = useState("Loading the job list...");
const [isRefreshing, setRefresh] = useState(true);
const { ipLogMap } = useContext(GlobalContext);
const [filter, setFilter] = useState<
{
key: "jobId" | "name" | "language" | "state" | "namespaceId";
key: "job_id" | "status";
val: string;
}[]
>([]);
const refreshRef = useRef(isRefreshing);
const tot = useRef<NodeJS.Timeout>();
const changeFilter = (
key: "jobId" | "name" | "language" | "state" | "namespaceId",
val: string,
) => {
const changeFilter = (key: "job_id" | "status", val: string) => {
const f = filter.find((e) => e.key === key);
if (f) {
f.val = val;
Expand All @@ -37,9 +36,11 @@ export const useJobList = () => {
}
const rsp = await getJobList();

if (rsp?.data?.data?.summary) {
setList(rsp.data.data.summary.sort((a, b) => b.timestamp - a.timestamp));
setMsg(rsp.data.msg || "");
if (rsp) {
setList(
rsp.data.sort((a, b) => (b.start_time ?? 0) - (a.start_time ?? 0)),
);
setMsg("Fetched jobs");
}

tot.current = setTimeout(getJob, 4000);
Expand All @@ -55,7 +56,7 @@ export const useJobList = () => {
}, [getJob]);
return {
jobList: jobList.filter((node) =>
filter.every((f) => node[f.key] && node[f.key].includes(f.val)),
filter.every((f) => node[f.key] && (node[f.key] ?? "").includes(f.val)),
),
msg,
isRefreshing,
Expand All @@ -64,5 +65,6 @@ export const useJobList = () => {
page,
originalJobs: jobList,
setPage: (key: string, val: number) => setPage({ ...page, [key]: val }),
ipLogMap,
};
};
82 changes: 54 additions & 28 deletions dashboard/client/src/pages/job/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import dayjs from "dayjs";
import React from "react";
import { Link } from "react-router-dom";
import Loading from "../../components/Loading";
import { SearchInput, SearchSelect } from "../../components/SearchComponent";
import { SearchInput } from "../../components/SearchComponent";
import TitleCard from "../../components/TitleCard";
import { useJobList } from "./hook/useJobList";

Expand All @@ -25,12 +25,13 @@ const useStyles = makeStyles((theme) => ({
}));

const columns = [
"ID",
"DriverIpAddress",
"DriverPid",
"IsDead",
"Job ID",
"Submission ID",
"Status",
"Logs",
"StartTime",
"EndTime",
"Driver Pid",
];

const JobList = () => {
Expand All @@ -43,6 +44,7 @@ const JobList = () => {
changeFilter,
page,
setPage,
ipLogMap,
} = useJobList();

return (
Expand All @@ -62,13 +64,8 @@ const JobList = () => {
<TitleCard title="Job List">
<TableContainer>
<SearchInput
label="ID"
onChange={(value) => changeFilter("jobId", value)}
/>
<SearchSelect
label="Language"
onChange={(value) => changeFilter("language", value)}
options={["JAVA", "PYTHON"]}
label="Job ID"
onChange={(value) => changeFilter("job_id", value)}
/>
<SearchInput
label="Page Size"
Expand Down Expand Up @@ -100,31 +97,60 @@ const JobList = () => {
page.pageNo * page.pageSize,
)
.map(
({
jobId = "",
driverIpAddress,
isDead,
driverPid,
startTime,
endTime,
}) => (
<TableRow key={jobId}>
(
{
job_id,
submission_id,
driver_info,
type,
status,
start_time,
end_time,
},
index,
) => (
<TableRow key={job_id ?? submission_id ?? index}>
<TableCell align="center">{job_id ?? "-"}</TableCell>
<TableCell align="center">
<Link to={`/job/${jobId}`}>{jobId}</Link>
{submission_id ?? "-"}
</TableCell>
<TableCell align="center">{driverIpAddress}</TableCell>
<TableCell align="center">{driverPid}</TableCell>
<TableCell align="center">{status}</TableCell>
<TableCell align="center">
{isDead ? "true" : "false"}
{/* TODO(aguo): Also show logs for the job id instead
of just the submission's logs */}
{driver_info &&
ipLogMap[driver_info.node_ip_address] ? (
<Link
to={`/log/${encodeURIComponent(
ipLogMap[driver_info.node_ip_address],
)}?fileName=${
type === "DRIVER"
? job_id
: `driver-${submission_id}`
}`}
target="_blank"
>
Log
</Link>
) : (
"-"
)}
</TableCell>
<TableCell align="center">
{dayjs(Number(startTime)).format("YYYY/MM/DD HH:mm:ss")}
{dayjs(Number(start_time)).format(
"YYYY/MM/DD HH:mm:ss",
)}
</TableCell>
<TableCell align="center">
{endTime > 0
? dayjs(Number(endTime)).format("YYYY/MM/DD HH:mm:ss")
{end_time && end_time > 0
? dayjs(Number(end_time)).format(
"YYYY/MM/DD HH:mm:ss",
)
: "-"}
</TableCell>
<TableCell align="center">
{driver_info?.pid ?? "-"}
</TableCell>
</TableRow>
),
)}
Expand Down
2 changes: 1 addition & 1 deletion dashboard/client/src/service/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { JobDetailRsp, JobListRsp } from "../type/job";
import { get } from "./requestHandlers";

export const getJobList = () => {
return get<JobListRsp>("jobs?view=summary");
return get<JobListRsp>("api/jobs/");
};

export const getJobDetail = (id: string) => {
Expand Down
28 changes: 22 additions & 6 deletions dashboard/client/src/type/job.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,26 @@ export type JobDetailRsp = {
result: boolean;
};

export type JobListRsp = {
data: {
summary: Job[];
};
msg: string;
result: boolean;
export type JobListRsp = UnifiedJob[];

export type UnifiedJob = {
job_id: string | null;
submission_id: string | null;
type: string;
status: string;
entrypoint: string;
message: string | null;
error_type: string | null;
start_time: number | null;
end_time: number | null;
metadata: { [key: string]: string } | null;
runtime_env: { [key: string]: string } | null;
driver_info: DriverInfo | null;
};

export type DriverInfo = {
id: string;
node_ip_address: string;
node_id: string;
pid: string;
};
25 changes: 22 additions & 3 deletions dashboard/modules/job/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,17 @@ def job_cli_group():
type=str,
default=None,
required=False,
help=("Job ID to specify for the job. " "If not provided, one will be generated."),
help=("DEPRECATED: Use -- submission-id instead."),
)
@click.option(
"--submission-id",
type=str,
default=None,
required=False,
help=(
"Submission ID to specify for the job. "
"If not provided, one will be generated."
),
)
@click.option(
"--runtime-env",
Expand Down Expand Up @@ -127,6 +137,7 @@ def job_cli_group():
def submit(
address: Optional[str],
job_id: Optional[str],
submission_id: Optional[str],
runtime_env: Optional[str],
runtime_env_json: Optional[str],
working_dir: Optional[str],
Expand All @@ -139,11 +150,19 @@ def submit(
ray job submit -- python my_script.py --arg=val
"""

if job_id:
cli_logger.warning(
"--job-id option is deprecated. " "Please use --submission-id instead."
)

submission_id = submission_id or job_id

if ray_constants.RAY_JOB_SUBMIT_HOOK in os.environ:
# Submit all args as **kwargs per the JOB_SUBMIT_HOOK contract.
_load_class(os.environ[ray_constants.RAY_JOB_SUBMIT_HOOK])(
address=address,
job_id=job_id,
job_id=submission_id,
submission_id=submission_id,
runtime_env=runtime_env,
runtime_env_json=runtime_env_json,
working_dir=working_dir,
Expand All @@ -161,7 +180,7 @@ def submit(

job_id = client.submit_job(
entrypoint=list2cmdline(entrypoint),
job_id=job_id,
submission_id=submission_id,
runtime_env=final_runtime_env,
)

Expand Down
16 changes: 10 additions & 6 deletions dashboard/modules/job/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def is_terminal(self) -> bool:
return self.value in {"STOPPED", "SUCCEEDED", "FAILED"}


# TODO(aguo): Convert to pydantic model
@dataclass
class JobInfo:
"""A class for recording information associated with a job and its execution."""
Expand Down Expand Up @@ -180,10 +181,10 @@ def validate_request_type(json_data: Dict[str, Any], request_type: dataclass) ->
class JobSubmitRequest:
# Command to start execution, ex: "python script.py"
entrypoint: str
# Optional job_id to specify for the job. If the job_id is not specified,
# one will be generated. If a job with the same job_id already exists, it
# will be rejected.
job_id: Optional[str] = None
# Optional submission_id to specify for the job. If the submission_id
# is not specified, one will be generated. If a job with the same
# submission_id already exists, it will be rejected.
submission_id: Optional[str] = None
# Dict to setup execution environment.
runtime_env: Optional[Dict[str, Any]] = None
# Metadata to pass in to the JobConfig.
Expand All @@ -193,9 +194,10 @@ def __post_init__(self):
if not isinstance(self.entrypoint, str):
raise TypeError(f"entrypoint must be a string, got {type(self.entrypoint)}")

if self.job_id is not None and not isinstance(self.job_id, str):
if self.submission_id is not None and not isinstance(self.submission_id, str):
raise TypeError(
f"job_id must be a string if provided, got {type(self.job_id)}"
"submission_id must be a string if provided, "
f"got {type(self.submission_id)}"
)

if self.runtime_env is not None:
Expand Down Expand Up @@ -226,7 +228,9 @@ def __post_init__(self):

@dataclass
class JobSubmitResponse:
# DEPRECATED: Use submission_id instead.
job_id: str
submission_id: str


@dataclass
Expand Down
Loading

0 comments on commit 5d6bc53

Please sign in to comment.