-
Notifications
You must be signed in to change notification settings - Fork 624
/
c_api.h
859 lines (766 loc) · 32.5 KB
/
c_api.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
// Copyright (c) 2017-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef DALI_C_API_H_
#define DALI_C_API_H_
#include <cuda_runtime_api.h>
#include <inttypes.h>
#include "dali/core/api_helper.h"
// Trick to bypass gcc4.9 old ABI name mangling used by TF
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief Handle for DALI C-like API.
*
* @note Beware, the C API is just C-like API for handling some mangling issues and
* it can throw exceptions.
*/
typedef struct DALIPipeline *daliPipelineHandle;
typedef enum {
CPU = 0,
GPU = 1
} device_type_t;
typedef enum {
DALI_BACKEND_CPU = 0,
DALI_BACKEND_GPU = 1,
DALI_BACKEND_MIXED = 2
} dali_backend_t;
typedef enum {
DALI_NO_TYPE = -1,
DALI_UINT8 = 0,
DALI_UINT16 = 1,
DALI_UINT32 = 2,
DALI_UINT64 = 3,
DALI_INT8 = 4,
DALI_INT16 = 5,
DALI_INT32 = 6,
DALI_INT64 = 7,
DALI_FLOAT16 = 8,
DALI_FLOAT = 9,
DALI_FLOAT64 = 10,
DALI_BOOL = 11,
} dali_data_type_t;
typedef enum {
DALI_EXEC_IS_PIPELINED = 1,
DALI_EXEC_IS_ASYNC = 2,
DALI_EXEC_IS_SEPARATED = 4,
DALI_EXEC_IS_DYNAMIC = 8,
DALI_EXEC_SIMPLE = 0,
DALI_EXEC_ASYNC_PIPELINED = DALI_EXEC_IS_PIPELINED | DALI_EXEC_IS_ASYNC,
DALI_EXEC_DYNAMIC = DALI_EXEC_ASYNC_PIPELINED | DALI_EXEC_IS_DYNAMIC,
} dali_exec_flags_t;
#ifdef __cplusplus
constexpr dali_exec_flags_t operator|(dali_exec_flags_t x, dali_exec_flags_t y) {
return dali_exec_flags_t(static_cast<int>(x) | static_cast<int>(y));
}
constexpr dali_exec_flags_t operator&(dali_exec_flags_t x, dali_exec_flags_t y) {
return dali_exec_flags_t(static_cast<int>(x) & static_cast<int>(y));
}
#endif
/*
* Need to keep that in sync with ReaderMeta from operator.h
*/
typedef struct {
int64_t epoch_size; // raw epoch size
int64_t epoch_size_padded; // epoch size with the padding at the end
int number_of_shards; // number of shards
int shard_id; // shard id of given reader
int pad_last_batch; // if given reader should pad last batch
int stick_to_shard; // if given reader should stick to its shard
} daliReaderMetadata;
/*
* Need to keep that in sync with ExecutorMeta from executor.h
*/
typedef struct {
char *operator_name; // operator name, user need to free the memory
size_t out_num; // number of the operator outputs
size_t *real_size; // real size of the operator output, user need to free the memory
size_t *max_real_size; // the biggest size of the tensor in the batch
size_t *reserved; // reserved size of the operator output, user need to free the memory
size_t *max_reserved; // the biggest reserved memory size for the tensor in the batch
} daliExecutorMetadata;
typedef struct daliExternalContextField {
char *data;
size_t size;
} daliExternalContextField;
/*
* Need to keep that in sync with ExternalContextCheckpoint from checkpoint.h
*/
typedef struct {
daliExternalContextField pipeline_data;
daliExternalContextField iterator_data;
} daliExternalContextCheckpoint;
/**
* @brief DALI initialization
*
* Call this function to initialize DALI backend. It shall be called once per process.
* Along with this, you'll need to call @see daliInitOperatorsLib() function from
* `operators.h` file, to initialize whole DALI.
* In the unlikely event you'd like to use only Pipeline and Executor (no Operators),
* you may pass on calling @see daliInitOperatorsLib()
*/
DLL_PUBLIC void daliInitialize();
/**
* @name Create DALI Pipeline via deserialization.
* @{
*/
/**
* @brief Create DALI pipeline. Setting max_batch_size,
* num_threads or device_id here overrides
* values stored in the serialized pipeline.
* When separated_execution is equal to 0, prefetch_queue_depth is considered,
* gpu_prefetch_queue_depth and cpu_prefetch_queue_depth are ignored.
* When separated_execution is not equal to 0, cpu_prefetch_queue_depth and
* gpu_prefetch_queue_depth are considered and prefetch_queue_depth is ignored.
*/
DLL_PUBLIC void daliCreatePipeline(daliPipelineHandle *pipe_handle, const char *serialized_pipeline,
int length, int max_batch_size, int num_threads, int device_id,
int separated_execution, int prefetch_queue_depth,
int cpu_prefetch_queue_depth, int gpu_prefetch_queue_depth,
int enable_memory_stats);
/**
* Create a DALI Pipeline, using a pipeline that has been serialized beforehand.
*
* @param pipe_handle Pipeline handle.
* @param serialized_pipeline Serialized pipeline.
* @param length Length of the serialized pipeline string.
* @param max_batch_size Maximum batch size.
* @param num_threads Number of CPU threads which this pipeline uses.
* @param device_id ID of the GPU device which this pipeline uses.
* @param pipelined_execution If != 0, this pipeline will execute in Pipeline mode.
* @param async_execution If != 0, this pipeline will execute asynchronously.
* @param separated_execution If != 0, this pipeline will have different depths
* of the CPU and GPU prefetching queues.
* @param prefetch_queue_depth Depth of the prefetching queue.
* If `separated_execution != 0`, this value is ignored.
* @param cpu_prefetch_queue_depth Depth of the prefetching queue in the CPU stage.
* If `separated_execution == 0`, this value is ignored
* @param gpu_prefetch_queue_depth Depth of the prefetching queue in the GPU stage.
* If `separated_execution == 0`, this value is ignored
* @param enable_memory_stats Enable memory stats.
*/
DLL_PUBLIC void
daliCreatePipeline2(daliPipelineHandle *pipe_handle, const char *serialized_pipeline, int length,
int max_batch_size, int num_threads, int device_id, int pipelined_execution,
int async_execution, int separated_execution, int prefetch_queue_depth,
int cpu_prefetch_queue_depth, int gpu_prefetch_queue_depth,
int enable_memory_stats);
/**
* Create a DALI Pipeline, using a pipeline that has been serialized beforehand.
*
* @param pipe_handle Pipeline handle.
* @param serialized_pipeline Serialized pipeline.
* @param length Length of the serialized pipeline string.
* @param max_batch_size Maximum batch size.
* @param num_threads Number of CPU threads which this pipeline uses.
* @param device_id ID of the GPU device which this pipeline uses.
* @param pipelined_execution If != 0, this pipeline will execute in Pipeline mode.
* @param exec_flags Executor congiguration flags
* @param cpu_prefetch_queue_depth Depth of the prefetching queue in the CPU stage.
* If `separated_execution == 0`, this value is ignored
* @param gpu_prefetch_queue_depth Depth of the prefetching queue in the GPU stage.
* If `separated_execution == 0`, this value is ignored
* @param enable_memory_stats Enable memory stats.
*/
DLL_PUBLIC void
daliCreatePipeline3(daliPipelineHandle *pipe_handle, const char *serialized_pipeline, int length,
int max_batch_size, int num_threads, int device_id,
dali_exec_flags_t exec_flags,
int prefetch_queue_depth,
int cpu_prefetch_queue_depth, int gpu_prefetch_queue_depth,
int enable_memory_stats);
/**
* Convenient overload. Use it, if the Pipeline should inherit its parameters
* from serialized pipeline.
*/
DLL_PUBLIC void daliDeserializeDefault(daliPipelineHandle *pipe_handle,
const char *serialized_pipeline,
int length);
/**
* Checks, if the pipeline given by the string can be deserialized. It can be assumed that the
* pipeline, which can be deserialized, is a formally valid DALI pipeline.
*
* @param serialized_pipeline String with the serialized pipeline.
* @param length Length of the string.
* @return 0, if the pipeline is serializable. 1 otherwise.
*/
DLL_PUBLIC int daliIsDeserializable(const char* serialized_pipeline, int length);
/** @} */
enum {
DALI_ext_default = 0,
/**
* If memory transfer should be synchronous - applies to GPU memory
*/
DALI_ext_force_sync = (1 << 0),
/**
* If provided CPU memory is page-locked
*/
DALI_ext_pinned = (1 << 1),
/**
* If provided, a CUDA copy kernel will be used to feed external source instead of cudaMemcpyAsync
* Only relevant when the input is either pinned host memory or device memory
*/
DALI_use_copy_kernel = (1 << 2),
/**
* Override the `no_copy` specified for given External Source and force the data to be copied.
*/
DALI_ext_force_copy = (1 << 3),
/**
* Override the `no_copy` specified for given External Source and pass the data directly to the
* Pipeline.
*/
DALI_ext_force_no_copy = (1 << 4),
};
/**
* @name Input batch size information
* @{
*/
/**
* @brief Get the max batch size of a given pipeline.
*
* @param pipe_handle Pointer to pipeline handle
* @return Max batch size
*/
DLL_PUBLIC int daliGetMaxBatchSize(daliPipelineHandle *pipe_handle);
/**
* @brief Set the batch size for the upcoming call to `daliSetExternalInput*(...)`
*
* @param pipe_handle Pointer to pipeline handle
* @param name Pointer to a null-terminated byte string with the name of the External Source
* to be fed
* @param batch_size Batch size of the data
*/
DLL_PUBLIC void daliSetExternalInputBatchSize(daliPipelineHandle *pipe_handle, const char *name,
int batch_size);
/**
* Set the data_id for the upcoming call to `daliSetExternalInput*(...)`.
*
* The operator_name accepts the name of an input operator. Input operators are the operators,
* that can work with `daliSetExternalInput*(...)` functions, e.g. fn.external_source or
* fn.inputs.video.
*
* @param operator_name The name of the input operator to be fed.
* @param data_id data_id which will be assigned during upcoming `daliSetExternalInput*(...)` call.
*/
DLL_PUBLIC void
daliSetExternalInputDataId(daliPipelineHandle *pipe_handle, const char *operator_name,
const char *data_id);
/**
* @brief Returns how many times daliSetExternalInput on a given input before calling daliPrefetch
*
* @param pipe_handle The handle to the pipeline
* @param input_name The name of the input in question
* @return The number of calls to be made
*/
DLL_PUBLIC int
daliInputFeedCount(daliPipelineHandle *pipe_handle, const char *input_name);
/** @} */
/**
* @name Contiguous inputs
* @{
*/
/**
* @brief Feed the data to ExternalSource as contiguous memory.
*
* When calling this function, you need to provide a CUDA stream, which will be used when
* copying data onto GPU. This function is asynchronous, so it's your responsibility to
* synchronize on a provided CUDA stream.
*
* If GPU memory is provided, it is assumed to reside on the same device that the pipeline is using.
* See `device_id` parameter of the `daliCreatePipeline`.
*
* Keep in mind, that for the special case, where the data exists on the CPU and the
* ExternalSource's Backend in also a CPU, stream is not needed - feel free to pass
* the default stream.
*
* A convenience, synchronous, overload function is provided,
* which handles the stream synchronization.
*
* If `daliSetExternalInputBatchSize` has been called prior to this function, given batch size
* is assumed. Otherwise, the function will default to max batch size.
* @see daliSetExternalInputBatchSize
* @see daliCreatePipeline
*
* @param pipe_handle Pointer to pipeline handle
* @param name Pointer to a null-terminated byte string with the name of the External Source
* to be fed
* @param device Device of the supplied memory.
* @param data_ptr Pointer to contiguous buffer containing all samples
* @param data_type Type of the provided data
* @param shapes Pointer to an array containing shapes of all samples concatenated one after
* another. Should contain batch_size * sample_dim elements.
* @param sample_dim The dimensionality of a single sample.
* @param layout_str Optional layout provided as a pointer to null-terminated byte string.
* Can be set to NULL.
* @param stream CUDA stream to use when copying the data onto GPU. Remember to synchronize on the
* provided stream.
* @param flags Extra flags, check DALI_ext_* and DALI_use_copy_kernel flags
*/
DLL_PUBLIC void
daliSetExternalInputAsync(daliPipelineHandle *pipe_handle, const char *name,
device_type_t device, const void *data_ptr,
dali_data_type_t data_type, const int64_t *shapes,
int sample_dim, const char *layout_str,
cudaStream_t stream, unsigned int flags);
DLL_PUBLIC void
daliSetExternalInput(daliPipelineHandle *pipe_handle, const char *name,
device_type_t device, const void *data_ptr,
dali_data_type_t data_type, const int64_t *shapes,
int sample_dim, const char *layout_str, unsigned int flags);
/** @} */
/**
* @name Sample inputs
* @{
*/
/**
* @brief Feed the data to ExternalSource as a set of separate buffers.
*
* When calling this function, you need to provide a CUDA stream, which will be used when
* copying data onto GPU. This function is asynchronous, so it's your responsibility to
* synchronize on a provided CUDA stream.
*
* Keep in mind, that for the special case, where the data exists on the CPU and the
* ExternalSource's Backend in also a CPU, stream is not needed - feel free to pass
* the default stream.
*
* A convenience, synchronous, overload function is provided,
* which handles the stream synchronization.
*
* If `daliSetExternalInputBatchSize` has been called prior to this function, given batch size
* is assumed. Otherwise, the function will default to max batch size.
* @see daliSetExternalInputBatchSize
* @see daliCreatePipeline
*
* @param pipe_handle Pointer to pipeline handle
* @param name Pointer to a null-terminated byte string with the name of the External Source
* to be fed
* @param device Device of the supplied memory.
* @param data_ptr Pointer to an array containing batch_size pointers to separate Tensors.
* @param data_type Type of the provided data
* @param shapes Pointer to an array containing shapes of all samples concatenated one after
* another. Should contain batch_size * sample_dim elements.
* @param sample_dim The dimensionality of a single sample.
* @param layout_str Optional layout provided as a pointer to null-terminated byte string.
* Can be set to NULL.
* @param stream CUDA stream to use when copying the data onto GPU. Remember to synchronize on the
* provided stream.
* @param flags Extra flags, check DALI_ext_force_sync, DALI_ext_pinned, DALI_use_copy_kernel
*/
DLL_PUBLIC void
daliSetExternalInputTensorsAsync(daliPipelineHandle *pipe_handle, const char *name,
device_type_t device, const void *const *data_ptr,
dali_data_type_t data_type, const int64_t *shapes,
int64_t sample_dim, const char *layout_str,
cudaStream_t stream, unsigned int flags);
DLL_PUBLIC void
daliSetExternalInputTensors(daliPipelineHandle *pipe_handle, const char *name,
device_type_t device, const void *const *data_ptr,
dali_data_type_t data_type, const int64_t *shapes,
int64_t sample_dim, const char *layout_str, unsigned int flags);
/** @} */
/**
* @brief Get number of external inputs in the pipeline.
*
* @param pipe_handle Pointer to pipeline handle.
* @return Number of inputs.
*/
DLL_PUBLIC int daliGetNumExternalInput(daliPipelineHandle *pipe_handle);
/**
* @brief Get the name of n-th external input in the pipeline in the lexicographic order.
*
* Returned pointer is valid until the lifetime of the pipeline object ends.
*
* @param pipe_handle Pointer to pipeline handle.
* @param n
* @return Name of the external input.
*/
DLL_PUBLIC const char *daliGetExternalInputName(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Get the data layout required by the external input with a given name.
* If the layout is not determined, an empty string is returned.
*
* Returned pointer is valid until the lifetime of the pipeline object ends.
*
* @param pipe_handle Pointer to pipeline handle.
* @param name Name of the external input.
* @return Layout string.
*/
DLL_PUBLIC const char *daliGetExternalInputLayout(daliPipelineHandle *pipe_handle,
const char *name);
/**
* @brief Get the data type required by the external input with a given name.
*
* @param pipe_handle Pointer to pipeline handle.
* @param name Name of the external input.
* @return Data type.
*/
DLL_PUBLIC dali_data_type_t daliGetExternalInputType(daliPipelineHandle *pipe_handle,
const char *name);
/**
* @brief Get the data number of dimensions required by the external input with a given name.
* If the number of dimensions is not determined, -1 is returned.
*
* @param pipe_handle Pointer to pipeline handle.
* @param name Name of the external input.
* @return Number of dimensions.
*/
DLL_PUBLIC int daliGetExternalInputNdim(daliPipelineHandle *pipe_handle, const char *name);
/**
* @brief Start the execution of the pipeline.
*/
DLL_PUBLIC void daliRun(daliPipelineHandle *pipe_handle);
/**
* @brief Schedule initial runs to fill the buffers.
*
* This function should be called once, after a pipeline is created and external inputs
* (if any) are populated the required number of times.
* For subsequent runs, daliRun should be used.
*/
DLL_PUBLIC void daliPrefetch(daliPipelineHandle *pipe_handle);
/**
* @brief Schedule first runs to fill buffers for Executor with UniformQueue policy.
* @param queue_depth Ignored; must be equal to the pipeline's queue depth
* @deprecated Use `daliPrefetch` instead
*/
DLL_PUBLIC void daliPrefetchUniform(daliPipelineHandle *pipe_handle, int queue_depth);
/**
* @brief Schedule first runs to fill buffers for Executor with SeparateQueue policy.
* @param cpu_queue_depth Ignored; must be equal to the pipeline's CPU queue depth
* @param gpu_queue_depth Ignored; must be equal to the pipeline's GPU queue depth
* @deprecated Use `daliPrefetch` instead
*/
DLL_PUBLIC void daliPrefetchSeparate(daliPipelineHandle *pipe_handle,
int cpu_queue_depth, int gpu_queue_depth);
/**
* @brief Wait until the output of the pipeline is ready.
* Releases previously returned buffers.
*/
DLL_PUBLIC void daliOutput(daliPipelineHandle *pipe_handle);
/**
* @brief Wait until the output of the pipeline is ready.
* Doesn't release previously returned buffers.
*/
DLL_PUBLIC void daliShareOutput(daliPipelineHandle *pipe_handle);
/**
* @brief Releases buffer returned by last daliOutput call.
*/
DLL_PUBLIC void daliOutputRelease(daliPipelineHandle *pipe_handle);
/**
* @brief Returns 1 if the the output batch stored at position `n` in the pipeline can
* be represented as dense, uniform tensor. Otherwise 0.
*
* This function may only be called after
* calling Output function.
*/
DLL_PUBLIC int64_t daliOutputHasUniformShape(daliPipelineHandle *pipe_handle, int i);
/**
* @brief Return the shape of the output tensor stored at position `n` in the pipeline.
* Valid only if daliOutputHasUniformShape() returns 1.
*
* This function may only be called after
* calling Output function.
* @remarks Caller is responsible to 'free' the memory returned
*/
DLL_PUBLIC int64_t *daliShapeAt(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Return the type of the output tensor
* stored at position `n` in the pipeline.
* This function may only be called after
* calling Output function.
*/
DLL_PUBLIC dali_data_type_t daliTypeAt(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Return the shape of the 'k' output tensor from tensor list
* stored at position `n` in the pipeline.
* This function may only be called after
* calling Output function.
* @remarks Caller is responsible to 'free' the memory returned
*/
DLL_PUBLIC int64_t *daliShapeAtSample(daliPipelineHandle *pipe_handle, int n, int k);
/**
* @brief Return the number of tensors in the tensor list
* stored at position `n` in the pipeline.
*/
DLL_PUBLIC size_t daliNumTensors(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Return the number of all elements in the tensor list
* stored at position `n` in the pipeline.
*/
DLL_PUBLIC size_t daliNumElements(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Return the size of the tensor list
* stored at position `n` in the pipeline.
*/
DLL_PUBLIC size_t daliTensorSize(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Return maximum number of dimensions from all tensors
* from the tensor list stored at position `n` in the pipeline.
*/
DLL_PUBLIC size_t daliMaxDimTensors(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Check, what is the declared number of dimensions in the given output.
*
* Declared number of dimensions is a number, which user can optionally provide
* at the pipeline definition stage.
*
* @param n Index of the output, at which the check is performed.
*/
DLL_PUBLIC size_t daliGetDeclaredOutputNdim(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Check, what is the declared data type in the given output.
*
* Declared data type is a type, which user can optionally provide
* at the pipeline definition stage.
*
* @param n Index of the output, at which the check is performed.
*/
DLL_PUBLIC dali_data_type_t daliGetDeclaredOutputDtype(daliPipelineHandle *pipe_handle, int n);
/**
* @brief Returns number of DALI pipeline outputs
*/
DLL_PUBLIC unsigned daliGetNumOutput(daliPipelineHandle *pipe_handle);
/**
* @brief Returns a string indicating name of the output given by id.
* @remark The returned pointer is invalidated after calling `daliDeletePipeline(pipe_handle)`.
*/
DLL_PUBLIC const char *daliGetOutputName(daliPipelineHandle *pipe_handle, int id);
/**
* @brief Returns device_type_t indicating device backing pipeline output given by id
*/
DLL_PUBLIC device_type_t daliGetOutputDevice(daliPipelineHandle *pipe_handle, int id);
/**
* @name Operator traces
* @{
*/
/**
* Checks, if given operator produced a trace with given name.
*
* In case the name of non-existing operator is provided,
* the behaviour of this function is undefined.
*
* @return 0, if the trace with given name does not exist.
*/
DLL_PUBLIC int daliHasOperatorTrace(daliPipelineHandle *pipe_handle, const char *operator_name,
const char *trace_name);
/**
* Returns the traces of the given operator in the DALI Pipeline.
*
* Operator Traces is a communication mechanism with particular operators in the pipeline. For
* more information @see operator_trace_map_t.
*
* User does not own the returned value. In a situation, when changing of this value is necessary,
* user shall copy it to his own memory. The lifetime of this value ends, when the
* daliOutputRelease() is called.
*
* User shall check, if the trace with given name exists (@see daliHasOperatorTrace). In case the
* name of non-existing operator or non-existing trace is provided, the behaviour of this function
* is undefined.
*
* @param operator_name Name of the operator, which trace shall be returned.
* @param trace_name Name of the requested trace.
* @return Operator trace.
*/
DLL_PUBLIC const char *
daliGetOperatorTrace(daliPipelineHandle *pipe_handle, const char *operator_name,
const char *trace_name);
/** @} */
/**
* @brief Copy the output batch stored at position `output_idx` in the pipeline.
* @remarks If the pipeline output is TensorList then it needs to be dense
* @param pipe_handle Pointer to pipeline handle
* @param dst Pointer to the destination buffer where the data will be copied
* @param output_idx index of the pipeline output
* @param dst_type Device type associated with the destination buffer (0 - CPU, 1 - GPU)
* @param stream CUDA stream to use when copying the data to/from the GPU.
* @param flags Extra flags, check DALI_ext_force_sync, DALI_use_copy_kernel
*/
DLL_PUBLIC void
daliOutputCopy(daliPipelineHandle *pipe_handle, void *dst, int output_idx, device_type_t dst_type,
cudaStream_t stream, unsigned int flags);
/**
* @brief Copy the samples in output stored at position `output_idx` in the pipeline
* to scattered memory locations.
* @param pipe_handle Pointer to pipeline handle
* @param dsts Pointers to the destination buffers where each sample will be copied.
* A nullptr dst pointer for a sample will discard that sample.
* @param output_idx index of the pipeline output
* @param dst_type Device type associated with the destination buffer (0 - CPU, 1 - GPU)
* @param stream CUDA stream to use when copying the data to/from the GPU.
* @param flags Extra flags, check DALI_ext_force_sync, DALI_use_copy_kernel
*/
DLL_PUBLIC void daliOutputCopySamples(daliPipelineHandle *pipe_handle, void **dsts, int output_idx,
device_type_t dst_type, cudaStream_t stream,
unsigned int flags);
/**
* @brief DEPRECATED API: use daliOutputCopy instead
*/
DLL_PUBLIC void
daliCopyTensorNTo(daliPipelineHandle *pipe_handle, void *dst, int n, device_type_t dst_type,
cudaStream_t stream, int non_blocking);
/**
* @brief DEPRECATED API: use daliOutputCopy instead
*/
DLL_PUBLIC void
daliCopyTensorListNTo(daliPipelineHandle *pipe_handle, void *dst, int output_id,
device_type_t dst_type, cudaStream_t stream, int non_blocking);
/**
* @brief Delete the pipeline object.
*/
DLL_PUBLIC void daliDeletePipeline(daliPipelineHandle *pipe_handle);
/**
* @brief Load plugin library
*/
DLL_PUBLIC void daliLoadLibrary(const char *lib_path);
/**
* @brief The plugin paths will have the following pattern:
* {lib_path}/{sub_path}/libdali_{plugin_name}.so
*/
DLL_PUBLIC void daliLoadPluginDirectory(const char* plugin_dir);
/**
* @brief Load default plugin library
* @remarks DALI_PRELOAD_PLUGINS are environment variables that can be used to control what
* plugins are loaded. If the variable is set, it is interpreted as a list of paths separated
* by colon (:), where each element can be a directory or library path.
* If not set, the "default" path is scanned, which is a subdirectory called plugin under the
* directory where the DALI library is installed.
*/
DLL_PUBLIC void daliLoadDefaultPlugins();
/**
* @brief Returns the named reader metadata
* @param reader_name Name of the reader to query
* @param meta Pointer to metadata to be filled by the function
*/
DLL_PUBLIC void daliGetReaderMetadata(daliPipelineHandle *pipe_handle, const char *reader_name,
daliReaderMetadata* meta);
/**
* @brief Returns the backend of the operator with a given \p operator_name
* @param operator_name Name of the operator to query
*/
DLL_PUBLIC dali_backend_t daliGetOperatorBackend(daliPipelineHandle *pipe_handle,
const char *operator_name);
/**
* @brief Obtains the executor statistics
* @param operator_meta Pointer to the memory allocated by the function with operator_meta_num
* number of metadata entries. To free returned metadata use
* `daliFreeExecutorMetadata` function
* @param operator_meta_num Pointer to the variable which will tell how many meta entries
* (operators) have been files
*/
DLL_PUBLIC void daliGetExecutorMetadata(daliPipelineHandle *pipe_handle,
daliExecutorMetadata **operator_meta,
size_t *operator_meta_num);
/**
* @brief Frees executor metadata obtained from daliGetExecutorMetadata
* @param operator_meta Pointer to the memory with metadata allocated by the
* `daliGetExecutorMetadata`
* @param operator_meta_num Number of metadata entries provided by `daliGetExecutorMetadata`
*/
DLL_PUBLIC void daliFreeExecutorMetadata(daliExecutorMetadata *operator_meta,
size_t operator_meta_num);
/**
* @brief Frees unused memory from memory pools.
*
* The function frees memory from all devices and host pinned memory.
* Memory blocks that are still (even partially) used are not freed.
*/
DLL_PUBLIC void daliReleaseUnusedMemory();
/**
* @brief Preallocates device memory
*
* The function ensures that after the call, the amount of memory given in `bytes` can be
* allocated from the pool (without further requests to the OS).
*
* The function works by allocating and then freeing the requested number of bytes.
* Any outstanding allocations are not taken into account - that is, the peak amount
* of memory allocated will be the sum of pre-existing allocation and the amount given
* in `bytes`.
*
* @param device_id The ordinal number of the device to allocate the memory on. If negative,
* the current device as indicated by cudaGetDevice is used.
*
* @return Zero, if the allocation was successful, otherwise nonzero
*/
DLL_PUBLIC int daliPreallocateDeviceMemory(size_t bytes, int device_id);
/**
* @brief Preallocates host pinned memory
*
* The function ensures that after the call, the amount of memory given in `bytes` can be
* allocated from the pool (without further requests to the OS).
*
* The function works by allocating and then freeing the requested number of bytes.
* Any outstanding allocations are not taken into account - that is, the peak amount
* of memory allocated will be the sum of pre-existing allocation and the amount given
* in `bytes`.
*
* @return Zero, if the allocation was successful, otherwise nonzero
*/
DLL_PUBLIC int daliPreallocatePinnedMemory(size_t bytes);
/** @brief Returns serialized pipeline checkpoint
*
* Saves pipeline state together with provided external context.
*
* @param pipe_handle Pointer to pipeline handle.
*
* @param external_context External context to include in the checkpoint.
*
* @param checkpoint Output pointer to which checkpoint data should be saved.
* The buffer is allocated with daliAlloc, freeing it is caller's responsibility.
*
* @param n Output argument for checkpoint size in bytes.
*/
DLL_PUBLIC void daliGetSerializedCheckpoint(
daliPipelineHandle *pipe_handle,
const daliExternalContextCheckpoint *external_context,
char **checkpoint, size_t *n);
/** @brief Restores pipeline state from serialized checkpoint
*
* Should be called before running the pipeline.
* The pipeline needs to have checkpointing enabled.
*
* @param pipe_handle Pointer to pipeline handle.
*
* @param checkpoint Serialized checkpoint to restore from.
*
* @param n Size of the checkpoint, in bytes.
*
* @param external_context Output buffer to which checkpoint's external context will be saved.
* Populated fields of the external context can be later freed with
* daliDestroyExternalContextCheckpoint. Ignored if null.
*/
DLL_PUBLIC void daliRestoreFromSerializedCheckpoint(
daliPipelineHandle *pipe_handle,
const char *checkpoint, size_t n,
daliExternalContextCheckpoint *external_context);
/** @brief Frees all allocated fields of daliExternalContextCheckpoint
*
* @param external_context External context to destroy.
*/
DLL_PUBLIC void daliDestroyExternalContextCheckpoint(
daliExternalContextCheckpoint *external_context);
/** @brief Allocate memory.
*
* @param n Size, in bytes.
*
* @return Pointer to allocated memory or NULL on failure.
*/
DLL_PUBLIC void *daliAlloc(size_t n);
/** @brief Free memory allocated with daliAlloc.
*
* @param ptr Pointer to the memory buffer.
*/
DLL_PUBLIC void daliFree(void *ptr);
#ifdef __cplusplus
}
#endif
#endif // DALI_C_API_H_