forked from triton-inference-server/core
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscheduler.h
81 lines (70 loc) · 3.75 KB
/
scheduler.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
// Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <functional>
#include "infer_request.h"
#include "status.h"
namespace triton { namespace core {
// Scheduler interface.
class Scheduler {
public:
virtual ~Scheduler() {}
// The prototype for the initialization function that will be called
// by the "standard" schedulers created based on a model's
// scheduling_choice settings. The init function is called once by
// the runner that will later execute requests for 'runner_idx'. A
// non-OK error status indicates an initialization error that
// prevents scheduler from using the runner.
using StandardInitFunc = std::function<Status(uint32_t runner_idx)>;
// The prototype for the warmup function that will be called by the
// "standard" schedulers created based on a model's
// scheduling_choice settings. The warmup function is called once by
// the runner that will later execute requests for 'runner_idx'. A
// non-OK error status indicates an error that prevents scheduler
// from sending warmup requests to the runner.
using StandardWarmupFunc = std::function<Status(uint32_t runner_idx)>;
// The prototype for the run function that will be called by the
// "standard" schedulers created based on a model's
// scheduling_choice settings. The run function must accept a
// 'runner_idx' indicating which runner should execute the
// 'requests'. Ownership of the 'requests' is transferred to the
// runner which is responsible for generating responses and
// releasing the requests.
using StandardRunFunc = std::function<void(
uint32_t runner_idx,
std::vector<std::unique_ptr<InferenceRequest>>&& requests)>;
// Enqueue a request with the scheduler. If Status::Success is returned
// then the backend has taken ownership of the request object and so
// 'request' will be nullptr. If non-success is returned then the
// caller still retains ownership of 'request'.
virtual Status Enqueue(std::unique_ptr<InferenceRequest>& request) = 0;
// Return the number of in-flight inferences tracked by the scheduler.
virtual size_t InflightInferenceCount() = 0;
// Instruct the scheduler to stop processing future requests unless they are
// considered as in-flight.
virtual void Stop() = 0;
};
}} // namespace triton::core