-
Notifications
You must be signed in to change notification settings - Fork 8.9k
/
abfs.html
1684 lines (1570 loc) · 107 KB
/
abfs.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<!--
| Generated by Apache Maven Doxia at 2025-01-02
| Rendered using Apache Maven Stylus Skin 1.5
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Apache Hadoop Azure support – Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2</title>
<style type="text/css" media="all">
@import url("./css/maven-base.css");
@import url("./css/maven-theme.css");
@import url("./css/site.css");
</style>
<link rel="stylesheet" href="./css/print.css" type="text/css" media="print" />
<meta name="Date-Revision-yyyymmdd" content="20250102" />
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
</head>
<body class="composite">
<div id="banner">
<a href="http://hadoop.apache.org/" id="bannerLeft">
<img src="http://hadoop.apache.org/images/hadoop-logo.jpg" alt="" />
</a>
<a href="http://www.apache.org/" id="bannerRight">
<img src="http://www.apache.org/images/asf_logo_wide.png" alt="" />
</a>
<div class="clear">
<hr/>
</div>
</div>
<div id="breadcrumbs">
<div class="xright"> <a href="http://wiki.apache.org/hadoop" class="externalLink">Wiki</a>
|
<a href="https://gitbox.apache.org/repos/asf/hadoop.git" class="externalLink">git</a>
| Last Published: 2025-01-02
| Version: 3.5.0-SNAPSHOT
</div>
<div class="clear">
<hr/>
</div>
</div>
<div id="leftColumn">
<div id="navcolumn">
<h5>General</h5>
<ul>
<li class="none">
<a href="../index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SingleCluster.html">Single Node Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ClusterSetup.html">Cluster Setup</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CommandsManual.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FileSystemShell.html">FileSystem Shell</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Compatibility.html">Compatibility Specification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DownstreamDev.html">Downstream Developer's Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AdminCompatibilityGuide.html">Admin Compatibility Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/InterfaceClassification.html">Interface Classification</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/filesystem/index.html">FileSystem Specification</a>
</li>
</ul>
<h5>Common</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CLIMiniCluster.html">CLI Mini Cluster</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/FairCallQueue.html">Fair Call Queue</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/NativeLibraries.html">Native Libraries</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Superusers.html">Proxy User</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/RackAwareness.html">Rack Awareness</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/SecureMode.html">Secure Mode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/ServiceLevelAuth.html">Service Level Authorization</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/HttpAuthentication.html">HTTP Authentication</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/CredentialProviderAPI.html">Credential Provider API</a>
</li>
<li class="none">
<a href="../hadoop-kms/index.html">Hadoop KMS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Tracing.html">Tracing</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellGuide.html">Unix Shell Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/registry/index.html">Registry</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/AsyncProfilerServlet.html">Async Profiler</a>
</li>
</ul>
<h5>HDFS</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDesign.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUserGuide.html">User Guide</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html">NameNode HA With QJM</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html">NameNode HA With NFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ObserverNameNode.html">Observer NameNode</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/Federation.html">Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFs.html">ViewFs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ViewFsOverloadScheme.html">ViewFsOverloadScheme</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsSnapshots.html">Snapshots</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsEditsViewer.html">Edits Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsImageViewer.html">Image Viewer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsPermissionsGuide.html">Permissions and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsQuotaAdminGuide.html">Quotas and HDFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/LibHdfs.html">libhdfs (C API)</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/WebHDFS.html">WebHDFS (REST API)</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/index.html">HttpFS</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ShortCircuitLocalReads.html">Short Circuit Local Reads</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/CentralizedCacheManagement.html">Centralized Cache Management</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsNfsGateway.html">NFS Gateway</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsRollingUpgrade.html">Rolling Upgrade</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ExtendedAttributes.html">Extended Attributes</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/TransparentEncryption.html">Transparent Encryption</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsMultihoming.html">Multihoming</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/ArchivalStorage.html">Storage Policies</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/MemoryStorage.html">Memory Storage Support</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/SLGUserGuide.html">Synthetic Load Generator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSErasureCoding.html">Erasure Coding</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HDFSDiskbalancer.html">Disk Balancer</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsUpgradeDomain.html">Upgrade Domain</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsDataNodeAdminGuide.html">DataNode Admin</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/HDFSRouterFederation.html">Router Federation</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/HdfsProvidedStorage.html">Provided Storage</a>
</li>
</ul>
<h5>MapReduce</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html">Tutorial</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduce_Compatibility_Hadoop1_Hadoop2.html">Compatibility with 1.x</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/EncryptedShuffle.html">Encrypted Shuffle</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/PluggableShuffleAndPluggableSort.html">Pluggable Shuffle/Sort</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/DistributedCacheDeploy.html">Distributed Cache Deploy</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/SharedCacheSupport.html">Support for YARN Shared Cache</a>
</li>
</ul>
<h5>MapReduce REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapredAppMasterRest.html">MR Application Master</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html">MR History Server</a>
</li>
</ul>
<h5>YARN</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YARN.html">Architecture</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnCommands.html">Commands Reference</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/CapacityScheduler.html">Capacity Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/FairScheduler.html">Fair Scheduler</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRestart.html">ResourceManager Restart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerHA.html">ResourceManager HA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceModel.html">Resource Model</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeLabel.html">Node Labels</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeAttributes.html">Node Attributes</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebApplicationProxy.html">Web Application Proxy</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html">Timeline Service V.2</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WritingYarnApplications.html">Writing YARN Applications</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnApplicationSecurity.html">YARN Application Security</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManager.html">NodeManager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/DockerContainers.html">Running Applications in Docker Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/RuncContainers.html">Running Applications in runC Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerCgroups.html">Using CGroups</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SecureContainer.html">Secure Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ReservationSystem.html">Reservation System</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/GracefulDecommission.html">Graceful Decommission</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/OpportunisticContainers.html">Opportunistic Containers</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/Federation.html">YARN Federation</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/SharedCache.html">Shared Cache</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingGpus.html">Using GPU</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/UsingFPGA.html">Using FPGA</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/PlacementConstraints.html">Placement Constraints</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/YarnUI2.html">YARN UI2</a>
</li>
</ul>
<h5>YARN REST APIs</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/WebServicesIntro.html">Introduction</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.html">Resource Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/NodeManagerRest.html">Node Manager</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServer.html#Timeline_Server_REST_API_v1">Timeline Server</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/TimelineServiceV2.html#Timeline_Service_v.2_REST_API">Timeline Service V.2</a>
</li>
</ul>
<h5>YARN Service</h5>
<ul>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Overview.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/QuickStart.html">QuickStart</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/Concepts.html">Concepts</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/YarnServiceAPI.html">Yarn Service API</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/ServiceDiscovery.html">Service Discovery</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-site/yarn-service/SystemServices.html">System Services</a>
</li>
</ul>
<h5>Hadoop Compatible File Systems</h5>
<ul>
<li class="none">
<a href="../hadoop-aliyun/tools/hadoop-aliyun/index.html">Aliyun OSS</a>
</li>
<li class="none">
<a href="../hadoop-aws/tools/hadoop-aws/index.html">Amazon S3</a>
</li>
<li class="none">
<a href="../hadoop-azure/index.html">Azure Blob Storage</a>
</li>
<li class="none">
<a href="../hadoop-azure-datalake/index.html">Azure Data Lake Storage</a>
</li>
<li class="none">
<a href="../hadoop-cos/cloud-storage/index.html">Tencent COS</a>
</li>
<li class="none">
<a href="../hadoop-huaweicloud/index.html">Huaweicloud OBS</a>
</li>
</ul>
<h5>Auth</h5>
<ul>
<li class="none">
<a href="../hadoop-auth/index.html">Overview</a>
</li>
<li class="none">
<a href="../hadoop-auth/Examples.html">Examples</a>
</li>
<li class="none">
<a href="../hadoop-auth/Configuration.html">Configuration</a>
</li>
<li class="none">
<a href="../hadoop-auth/BuildingIt.html">Building</a>
</li>
</ul>
<h5>Tools</h5>
<ul>
<li class="none">
<a href="../hadoop-streaming/HadoopStreaming.html">Hadoop Streaming</a>
</li>
<li class="none">
<a href="../hadoop-archives/HadoopArchives.html">Hadoop Archives</a>
</li>
<li class="none">
<a href="../hadoop-archive-logs/HadoopArchiveLogs.html">Hadoop Archive Logs</a>
</li>
<li class="none">
<a href="../hadoop-distcp/DistCp.html">DistCp</a>
</li>
<li class="none">
<a href="../hadoop-federation-balance/HDFSFederationBalance.html">HDFS Federation Balance</a>
</li>
<li class="none">
<a href="../hadoop-gridmix/GridMix.html">GridMix</a>
</li>
<li class="none">
<a href="../hadoop-rumen/Rumen.html">Rumen</a>
</li>
<li class="none">
<a href="../hadoop-resourceestimator/ResourceEstimator.html">Resource Estimator Service</a>
</li>
<li class="none">
<a href="../hadoop-sls/SchedulerLoadSimulator.html">Scheduler Load Simulator</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Benchmarking.html">Hadoop Benchmarking</a>
</li>
<li class="none">
<a href="../hadoop-dynamometer/Dynamometer.html">Dynamometer</a>
</li>
</ul>
<h5>Reference</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/release/">Changelog and Release Notes</a>
</li>
<li class="none">
<a href="../api/index.html">Java API docs</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/UnixShellAPI.html">Unix Shell API</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/Metrics.html">Metrics</a>
</li>
</ul>
<h5>Configuration</h5>
<ul>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/core-default.xml">core-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs/hdfs-default.xml">hdfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-hdfs-rbf/hdfs-rbf-default.xml">hdfs-rbf-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-mapreduce-client/hadoop-mapreduce-client-core/mapred-default.xml">mapred-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-yarn/hadoop-yarn-common/yarn-default.xml">yarn-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-kms/kms-default.html">kms-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-hdfs-httpfs/httpfs-default.html">httpfs-default.xml</a>
</li>
<li class="none">
<a href="../hadoop-project-dist/hadoop-common/DeprecatedProperties.html">Deprecated Properties</a>
</li>
</ul>
<a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
<img alt="Built by Maven" src="./images/logos/maven-feather.png"/>
</a>
</div>
</div>
<div id="bodyColumn">
<div id="contentBox">
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<h1>Hadoop Azure Support: ABFS - Azure Data Lake Storage Gen2</h1>
<ul>
<li><a href="#Introduction"> Introduction</a></li>
<li><a href="#Features_of_the_ABFS_connector."> Features of the ABFS connector.</a></li>
<li><a href="#Getting_started">Getting started</a>
<ul>
<li><a href="#Concepts">Concepts</a></li></ul></li>
<li><a href="#Hierarchical_Namespaces_.28and_WASB_Compatibility.29"> Hierarchical Namespaces (and WASB Compatibility)</a>
<ul>
<li><a href="#Creating_an_Azure_Storage_Account"> Creating an Azure Storage Account</a>
<ul>
<li><a href="#Creation_through_the_Azure_Portal">Creation through the Azure Portal</a></li></ul></li>
<li><a href="#Creating_a_new_container"> Creating a new container</a></li>
<li><a href="#Listing_and_examining_containers_of_a_Storage_Account.">Listing and examining containers of a Storage Account.</a></li></ul></li>
<li><a href="#Configuring_ABFS"> Configuring ABFS</a></li>
<li><a href="#Authentication"> Authentication</a>
<ul>
<li><a href="#AAD_Token_fetch_retries"> AAD Token fetch retries</a></li>
<li><a href="#Default:_Shared_Key"> Default: Shared Key</a></li>
<li><a href="#OAuth_2.0_Client_Credentials"> OAuth 2.0 Client Credentials</a></li>
<li><a href="#OAuth_2.0:_Username_and_Password"> OAuth 2.0: Username and Password</a></li>
<li><a href="#OAuth_2.0:_Refresh_Token"> OAuth 2.0: Refresh Token</a></li>
<li><a href="#Azure_Managed_Identity"> Azure Managed Identity</a></li>
<li><a href="#Azure_Workload_Identity"> Azure Workload Identity</a></li>
<li><a href="#Custom_OAuth_2.0_Token_Provider">Custom OAuth 2.0 Token Provider</a></li>
<li><a href="#Delegation_Token_Provider"> Delegation Token Provider</a></li>
<li><a href="#Shared_Access_Signature_.28SAS.29_Token_Provider">Shared Access Signature (SAS) Token Provider</a>
<ul>
<li><a href="#Known_Issues_With_SAS">Known Issues With SAS</a></li>
<li><a href="#Using_User_Delegation_SAS_with_ABFS">Using User Delegation SAS with ABFS</a></li>
<li><a href="#Using_Account.2FService_SAS_with_ABFS">Using Account/Service SAS with ABFS</a></li></ul></li></ul></li>
<li><a href="#Technical_notes"> Technical notes</a>
<ul>
<li><a href="#Proxy_setup"> Proxy setup</a></li>
<li><a href="#Security"> Security</a></li>
<li><a href="#Limitations_of_the_ABFS_connector"> Limitations of the ABFS connector</a></li>
<li><a href="#Consistency_and_Concurrency"> Consistency and Concurrency</a></li>
<li><a href="#Performance_and_Scalability"> Performance and Scalability</a></li>
<li><a href="#Extensibility"> Extensibility</a></li>
<li><a href="#Networking_Layer:">Networking Layer:</a>
<ul>
<li><a href="#ApacheHttpClient_networking_layer_configuration_Options:">ApacheHttpClient networking layer configuration Options:</a></li>
<li><a href="#ApacheHttpClient_classpath_requirements:"> ApacheHttpClient classpath requirements:</a></li></ul></li></ul></li>
<li><a href="#Other_configuration_options"> Other configuration options</a>
<ul>
<li><a href="#Client_Correlation_Options"> Client Correlation Options</a>
<ul>
<li><a href="#a1._Client_CorrelationId_Option"> 1. Client CorrelationId Option</a></li>
<li><a href="#a1._Correlation_IDs_Display_Options"> 1. Correlation IDs Display Options</a></li></ul></li>
<li><a href="#Flush_Options"> Flush Options</a>
<ul>
<li><a href="#a1._Azure_Blob_File_System_Flush_Options"> 1. Azure Blob File System Flush Options</a></li>
<li><a href="#a2._OutputStream_Flush_Options"> 2. OutputStream Flush Options</a></li></ul></li>
<li><a href="#Hundred_Continue_Options"> Hundred Continue Options</a></li>
<li><a href="#Account_level_throttling_Options"> Account level throttling Options</a></li>
<li><a href="#HNS_Check_Options"> HNS Check Options</a></li>
<li><a href="#Access_Options"> Access Options</a></li>
<li><a href="#Operation_Idempotency"> Operation Idempotency</a></li>
<li><a href="#Primary_User_Group_Options"> Primary User Group Options</a></li>
<li><a href="#IO_Options"> IO Options</a></li>
<li><a href="#Security_Options"> Security Options</a></li>
<li><a href="#Encryption_Options"> Encryption Options</a>
<ul>
<li><a href="#Customer-Provided_Global_Key"> Customer-Provided Global Key</a></li>
<li><a href="#Encryption_Context_Provider"> Encryption Context Provider</a></li></ul></li>
<li><a href="#Server_Options"> Server Options</a></li>
<li><a href="#Throttling_Options"> Throttling Options</a></li>
<li><a href="#Rename_Options"> Rename Options</a></li>
<li><a href="#Infinite_Lease_Options"> Infinite Lease Options</a></li>
<li><a href="#Perf_Options"> Perf Options</a>
<ul>
<li><a href="#a1._HTTP_Request_Tracking_Options"> 1. HTTP Request Tracking Options</a></li></ul></li>
<li><a href="#Driver_Metric_Options"> Driver Metric Options</a></li></ul></li>
<li><a href="#Troubleshooting"> Troubleshooting</a>
<ul>
<li><a href="#ClassNotFoundException:_org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem">ClassNotFoundException: org.apache.hadoop.fs.azurebfs.AzureBlobFileSystem</a></li>
<li><a href="#ClassNotFoundException:_com.microsoft.azure.storage.StorageErrorCode">ClassNotFoundException: com.microsoft.azure.storage.StorageErrorCode</a></li>
<li><a href="#Server_failed_to_authenticate_the_request">Server failed to authenticate the request</a></li>
<li><a href="#Configuration_property__something_.dfs.core.windows.net_not_found">Configuration property _something_.dfs.core.windows.net not found</a></li>
<li><a href="#No_such_file_or_directory_when_trying_to_list_a_container">No such file or directory when trying to list a container</a></li>
<li><a href="#a.E2.80.9CHTTP_connection_to_https:.2F.2Flogin.microsoftonline.com.2Fsomething_failed_for_getting_token_from_AzureAD._Http_response:_200_OK.E2.80.9D">“HTTP connection to https://login.microsoftonline.com/something failed for getting token from AzureAD. Http response: 200 OK”</a></li>
<li><a href="#java.io.IOException:_The_ownership_on_the_staging_directory_.2Ftmp.2Fhadoop-yarn.2Fstaging.2Fuser1.2F.staging_is_not_as_expected._It_is_owned_by_.3Cprincipal_id.3E._The_directory_must_be_owned_by_the_submitter_user1_or_user1">java.io.IOException: The ownership on the staging directory /tmp/hadoop-yarn/staging/user1/.staging is not as expected. It is owned by <principal_id>. The directory must be owned by the submitter user1 or user1</a></li></ul></li>
<li><a href="#Known_Issues"> Known Issues</a></li>
<li><a href="#Testing_ABFS"> Testing ABFS</a></li></ul>
<section>
<h2><a name="Introduction"></a><a name="introduction"></a> Introduction</h2>
<p>The <code>hadoop-azure</code> module provides support for the Azure Data Lake Storage Gen2 storage layer through the “abfs” connector</p>
<p>To make it part of Apache Hadoop’s default classpath, make sure that <code>HADOOP_OPTIONAL_TOOLS</code> environment variable has <code>hadoop-azure</code> in the list, <i>on every machine in the cluster</i></p>
<div class="source">
<div class="source">
<pre>export HADOOP_OPTIONAL_TOOLS=hadoop-azure
</pre></div></div>
<p>You can set this locally in your <code>.profile</code>/<code>.bashrc</code>, but note it won’t propagate to jobs running in-cluster.</p></section><section>
<h2><a name="Features_of_the_ABFS_connector."></a><a name="features"></a> Features of the ABFS connector.</h2>
<ul>
<li>Supports reading and writing data stored in an Azure Blob Storage account.</li>
<li><i>Fully Consistent</i> view of the storage across all clients.</li>
<li>Can read data written through the <code>wasb:</code> connector.</li>
<li>Presents a hierarchical file system view by implementing the standard Hadoop <a href="../api/org/apache/hadoop/fs/FileSystem.html"><code>FileSystem</code></a> interface.</li>
<li>Supports configuration of multiple Azure Blob Storage accounts.</li>
<li>Can act as a source or destination of data in Hadoop MapReduce, Apache Hive, Apache Spark.</li>
<li>Tested at scale on both Linux and Windows by Microsoft themselves.</li>
<li>Can be used as a replacement for HDFS on Hadoop clusters deployed in Azure infrastructure.</li>
</ul>
<p>For details on ABFS, consult the following documents:</p>
<ul>
<li><a class="externalLink" href="https://azure.microsoft.com/en-gb/blog/a-closer-look-at-azure-data-lake-storage-gen2/">A closer look at Azure Data Lake Storage Gen2</a>; MSDN Article from June 28, 2018.</li>
<li><a class="externalLink" href="https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blob-storage-tiers">Storage Tiers</a></li>
</ul></section><section>
<h2><a name="Getting_started"></a>Getting started</h2><section>
<h3><a name="Concepts"></a>Concepts</h3>
<p>The Azure Storage data model presents 3 core concepts:</p>
<ul>
<li><b>Storage Account</b>: All access is done through a storage account.</li>
<li><b>Container</b>: A container is a grouping of multiple blobs. A storage account may have multiple containers. In Hadoop, an entire file system hierarchy is stored in a single container.</li>
<li><b>Blob</b>: A file of any type and size stored with the existing wasb connector</li>
</ul>
<p>The ABFS connector connects to classic containers, or those created with Hierarchical Namespaces.</p></section></section><section>
<h2><a name="Hierarchical_Namespaces_.28and_WASB_Compatibility.29"></a><a name="namespaces"></a> Hierarchical Namespaces (and WASB Compatibility)</h2>
<p>A key aspect of ADLS Gen 2 is its support for <a class="externalLink" href="https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-namespace">hierachical namespaces</a> These are effectively directories and offer high performance rename and delete operations —something which makes a significant improvement in performance in query engines writing data to, including MapReduce, Spark, Hive, as well as DistCp.</p>
<p>This feature is only available if the container was created with “namespace” support.</p>
<p>You enable namespace support when creating a new Storage Account, by checking the “Hierarchical Namespace” option in the Portal UI, or, when creating through the command line, using the option <code>--hierarchical-namespace true</code></p>
<p><i>You cannot enable Hierarchical Namespaces on an existing storage account</i></p>
<p>Containers in a storage account with Hierarchical Namespaces are not (currently) readable through the <code>wasb:</code> connector.</p>
<p>Some of the <code>az storage</code> command line commands fail too, for example:</p>
<div class="source">
<div class="source">
<pre>$ az storage container list --account-name abfswales1
Blob API is not yet supported for hierarchical namespace accounts. ErrorCode: BlobApiNotYetSupportedForHierarchicalNamespaceAccounts
</pre></div></div>
<section>
<h3><a name="Creating_an_Azure_Storage_Account"></a><a name="creating"></a> Creating an Azure Storage Account</h3>
<p>The best documentation on getting started with Azure Datalake Gen2 with the abfs connector is <a class="externalLink" href="https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-use-hdi-cluster">Using Azure Data Lake Storage Gen2 with Azure HDInsight clusters</a></p>
<p>It includes instructions to create it from <a class="externalLink" href="https://docs.microsoft.com/en-us/cli/azure/install-azure-cli?view=azure-cli-latest">the Azure command line tool</a>, which can be installed on Windows, MacOS (via Homebrew) and Linux (apt or yum).</p>
<p>The <a class="externalLink" href="https://docs.microsoft.com/en-us/cli/azure/storage?view=azure-cli-latest">az storage</a> subcommand handles all storage commands, <a class="externalLink" href="https://docs.microsoft.com/en-us/cli/azure/storage/account?view=azure-cli-latest#az-storage-account-create"><code>az storage account create</code></a> does the creation.</p>
<p>Until the ADLS gen2 API support is finalized, you need to add an extension to the ADLS command.</p>
<div class="source">
<div class="source">
<pre>az extension add --name storage-preview
</pre></div></div>
<p>Check that all is well by verifying that the usage command includes <code>--hierarchical-namespace</code>:</p>
<div class="source">
<div class="source">
<pre>$ az storage account
usage: az storage account create [-h] [--verbose] [--debug]
[--output {json,jsonc,table,tsv,yaml,none}]
[--query JMESPATH] --resource-group
RESOURCE_GROUP_NAME --name ACCOUNT_NAME
[--sku {Standard_LRS,Standard_GRS,Standard_RAGRS,Standard_ZRS,Premium_LRS,Premium_ZRS}]
[--location LOCATION]
[--kind {Storage,StorageV2,BlobStorage,FileStorage,BlockBlobStorage}]
[--tags [TAGS [TAGS ...]]]
[--custom-domain CUSTOM_DOMAIN]
[--encryption-services {blob,file,table,queue} [{blob,file,table,queue} ...]]
[--access-tier {Hot,Cool}]
[--https-only [{true,false}]]
[--file-aad [{true,false}]]
[--hierarchical-namespace [{true,false}]]
[--bypass {None,Logging,Metrics,AzureServices} [{None,Logging,Metrics,AzureServices} ...]]
[--default-action {Allow,Deny}]
[--assign-identity]
[--subscription _SUBSCRIPTION]
</pre></div></div>
<p>You can list locations from <code>az account list-locations</code>, which lists the name to refer to in the <code>--location</code> argument:</p>
<div class="source">
<div class="source">
<pre>$ az account list-locations -o table
DisplayName Latitude Longitude Name
------------------- ---------- ----------- ------------------
East Asia 22.267 114.188 eastasia
Southeast Asia 1.283 103.833 southeastasia
Central US 41.5908 -93.6208 centralus
East US 37.3719 -79.8164 eastus
East US 2 36.6681 -78.3889 eastus2
West US 37.783 -122.417 westus
North Central US 41.8819 -87.6278 northcentralus
South Central US 29.4167 -98.5 southcentralus
North Europe 53.3478 -6.2597 northeurope
West Europe 52.3667 4.9 westeurope
Japan West 34.6939 135.5022 japanwest
Japan East 35.68 139.77 japaneast
Brazil South -23.55 -46.633 brazilsouth
Australia East -33.86 151.2094 australiaeast
Australia Southeast -37.8136 144.9631 australiasoutheast
South India 12.9822 80.1636 southindia
Central India 18.5822 73.9197 centralindia
West India 19.088 72.868 westindia
Canada Central 43.653 -79.383 canadacentral
Canada East 46.817 -71.217 canadaeast
UK South 50.941 -0.799 uksouth
UK West 53.427 -3.084 ukwest
West Central US 40.890 -110.234 westcentralus
West US 2 47.233 -119.852 westus2
Korea Central 37.5665 126.9780 koreacentral
Korea South 35.1796 129.0756 koreasouth
France Central 46.3772 2.3730 francecentral
France South 43.8345 2.1972 francesouth
Australia Central -35.3075 149.1244 australiacentral
Australia Central 2 -35.3075 149.1244 australiacentral2
</pre></div></div>
<p>Once a location has been chosen, create the account</p>
<div class="source">
<div class="source">
<pre>az storage account create --verbose \
--name abfswales1 \
--resource-group devteam2 \
--kind StorageV2 \
--hierarchical-namespace true \
--location ukwest \
--sku Standard_LRS \
--https-only true \
--encryption-services blob \
--access-tier Hot \
--tags owner=engineering \
--assign-identity \
--output jsonc
</pre></div></div>
<p>The output of the command is a JSON file, whose <code>primaryEndpoints</code> command includes the name of the store endpoint:</p>
<div class="source">
<div class="source">
<pre>{
"primaryEndpoints": {
"blob": "https://abfswales1.blob.core.windows.net/",
"dfs": "https://abfswales1.dfs.core.windows.net/",
"file": "https://abfswales1.file.core.windows.net/",
"queue": "https://abfswales1.queue.core.windows.net/",
"table": "https://abfswales1.table.core.windows.net/",
"web": "https://abfswales1.z35.web.core.windows.net/"
}
}
</pre></div></div>
<p>The <code>abfswales1.dfs.core.windows.net</code> account is the name by which the storage account will be referred to.</p>
<p>Now ask for the connection string to the store, which contains the account key</p>
<div class="source">
<div class="source">
<pre>az storage account show-connection-string --name abfswales1
{
"connectionString": "DefaultEndpointsProtocol=https;EndpointSuffix=core.windows.net;AccountName=abfswales1;AccountKey=ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA=="
}
</pre></div></div>
<p>You then need to add the access key to your <code>core-site.xml</code>, JCEKs file or use your cluster management tool to set it the option <code>fs.azure.account.key.STORAGE-ACCOUNT</code> to this value.</p>
<div class="source">
<div class="source">
<pre><property>
<name>fs.azure.account.key.abfswales1.dfs.core.windows.net</name>
<value>ZGlkIHlvdSByZWFsbHkgdGhpbmsgSSB3YXMgZ29pbmcgdG8gcHV0IGEga2V5IGluIGhlcmU/IA==</value>
</property>
</pre></div></div>
<section>
<h4><a name="Creation_through_the_Azure_Portal"></a>Creation through the Azure Portal</h4>
<p>Creation through the portal is covered in <a class="externalLink" href="https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-quickstart-create-account">Quickstart: Create an Azure Data Lake Storage Gen2 storage account</a></p>
<p>Key Steps</p>
<ol style="list-style-type: decimal">
<li>Create a new Storage Account in a location which suits you.</li>
<li>“Basics” Tab: select “StorageV2”.</li>
<li>“Advanced” Tab: enable “Hierarchical Namespace”.</li>
</ol>
<p>You have now created your storage account. Next, get the key for authentication for using the default “Shared Key” authentication.</p>
<ol style="list-style-type: decimal">
<li>Go to the Azure Portal.</li>
<li>Select “Storage Accounts”</li>
<li>Select the newly created storage account.</li>
<li>In the list of settings, locate “Access Keys” and select that.</li>
<li>Copy one of the access keys to the clipboard, add to the XML option, set in cluster management tools, Hadoop JCEKS file or KMS store.</li>
</ol></section></section><section>
<h3><a name="Creating_a_new_container"></a><a name="new_container"></a> Creating a new container</h3>
<p>An Azure storage account can have multiple containers, each with the container name as the userinfo field of the URI used to reference it.</p>
<p>For example, the container “container1” in the storage account just created will have the URL <code>abfs://[email protected]/</code></p>
<p>You can create a new container through the ABFS connector, by setting the option <code>fs.azure.createRemoteFileSystemDuringInitialization</code> to <code>true</code>. Though the same is not supported when AuthType is SAS.</p>
<p>If the container does not exist, an attempt to list it with <code>hadoop fs -ls</code> will fail</p>
<div class="source">
<div class="source">
<pre>$ hadoop fs -ls abfs://[email protected]/
ls: `abfs://[email protected]/': No such file or directory
</pre></div></div>
<p>Enable remote FS creation and the second attempt succeeds, creating the container as it does so:</p>
<div class="source">
<div class="source">
<pre>$ hadoop fs -D fs.azure.createRemoteFileSystemDuringInitialization=true \
-ls abfs://[email protected]/
</pre></div></div>
<p>This is useful for creating accounts on the command line, especially before the <code>az storage</code> command supports hierarchical namespaces completely.</p></section><section>
<h3><a name="Listing_and_examining_containers_of_a_Storage_Account."></a>Listing and examining containers of a Storage Account.</h3>
<p>You can use the <a class="externalLink" href="https://azure.microsoft.com/en-us/features/storage-explorer/">Azure Storage Explorer</a></p></section></section><section>
<h2><a name="Configuring_ABFS"></a><a name="configuring"></a> Configuring ABFS</h2>
<p>Any configuration can be specified generally (or as the default when accessing all accounts) or can be tied to a specific account. For example, an OAuth identity can be configured for use regardless of which account is accessed with the property <code>fs.azure.account.oauth2.client.id</code> or you can configure an identity to be used only for a specific storage account with <code>fs.azure.account.oauth2.client.id.<account_name>.dfs.core.windows.net</code>.</p>
<p>This is shown in the Authentication section.</p></section><section>
<h2><a name="Authentication"></a><a name="authentication"></a> Authentication</h2>
<p>Authentication for ABFS is ultimately granted by <a class="externalLink" href="https://docs.microsoft.com/en-us/azure/active-directory/develop/authentication-scenarios">Azure Active Directory</a>.</p>
<p>The concepts covered there are beyond the scope of this document to cover; developers are expected to have read and understood the concepts therein to take advantage of the different authentication mechanisms.</p>
<p>What is covered here, briefly, is how to configure the ABFS client to authenticate in different deployment situations.</p>
<p>The ABFS client can be deployed in different ways, with its authentication needs driven by them.</p>
<ol style="list-style-type: decimal">
<li>With the storage account’s authentication secret in the configuration: “Shared Key”.</li>
<li>Using OAuth 2.0 tokens of one form or another.</li>
<li>Deployed in-Azure with the Azure VMs providing OAuth 2.0 tokens to the application, “Managed Instance”.</li>
<li>Using Shared Access Signature (SAS) tokens provided by a custom implementation of the SASTokenProvider interface.</li>
<li>By directly configuring a fixed Shared Access Signature (SAS) token in the account configuration settings files.</li>
</ol>
<p>Note: SAS Based Authentication should be used only with HNS Enabled accounts.</p>
<p>What can be changed is what secrets/credentials are used to authenticate the caller.</p>
<p>The authentication mechanism is set in <code>fs.azure.account.auth.type</code> (or the account specific variant). The possible values are SharedKey, OAuth, Custom and SAS. For the various OAuth options use the config <code>fs.azure.account.oauth.provider.type</code>. Following are the implementations supported ClientCredsTokenProvider, UserPasswordTokenProvider, MsiTokenProvider, RefreshTokenBasedTokenProvider and WorkloadIdentityTokenProvider. An IllegalArgumentException is thrown if the specified provider type is not one of the supported.</p>
<p>All secrets can be stored in JCEKS files. These are encrypted and password protected —use them or a compatible Hadoop Key Management Store wherever possible</p><section>
<h3><a name="AAD_Token_fetch_retries"></a><a name="aad-token-fetch-retry-logic"></a> AAD Token fetch retries</h3>
<p>The exponential retry policy used for the AAD token fetch retries can be tuned with the following configurations. * <code>fs.azure.oauth.token.fetch.retry.max.retries</code>: Sets the maximum number of retries. Default value is 5. * <code>fs.azure.oauth.token.fetch.retry.min.backoff.interval</code>: Minimum back-off interval. Added to the retry interval computed from delta backoff. By default this is set as 0. Set the interval in milli seconds. * <code>fs.azure.oauth.token.fetch.retry.max.backoff.interval</code>: Maximum back-off interval. Default value is 60000 (sixty seconds). Set the interval in milli seconds. * <code>fs.azure.oauth.token.fetch.retry.delta.backoff</code>: Back-off interval between retries. Multiples of this timespan are used for subsequent retry attempts . The default value is 2.</p></section><section>
<h3><a name="Default:_Shared_Key"></a><a name="shared-key-auth"></a> Default: Shared Key</h3>
<p>This is the simplest authentication mechanism of account + password.</p>
<p>The account name is inferred from the URL; the password, “key”, retrieved from the XML/JCECKs configuration files.</p>
<div class="source">
<div class="source">
<pre><property>
<name>fs.azure.account.auth.type.ACCOUNT_NAME.dfs.core.windows.net</name>
<value>SharedKey</value>
<description>
</description>
</property>
<property>
<name>fs.azure.account.key.ACCOUNT_NAME.dfs.core.windows.net</name>
<value>ACCOUNT_KEY</value>
<description>
The secret password. Never share these.
</description>
</property>
</pre></div></div>
<p><i>Note</i>: The source of the account key can be changed through a custom key provider; one exists to execute a shell script to retrieve it.</p>
<p>A custom key provider class can be provided with the config <code>fs.azure.account.keyprovider</code>. If a key provider class is specified the same will be used to get account key. Otherwise the Simple key provider will be used which will use the key specified for the config <code>fs.azure.account.key</code>.</p>
<p>To retrieve using shell script, specify the path to the script for the config <code>fs.azure.shellkeyprovider.script</code>. ShellDecryptionKeyProvider class use the script specified to retrieve the key.</p></section><section>
<h3><a name="OAuth_2.0_Client_Credentials"></a><a name="oauth-client-credentials"></a> OAuth 2.0 Client Credentials</h3>
<p>OAuth 2.0 credentials of (client id, client secret, endpoint) are provided in the configuration/JCEKS file.</p>
<p>The specifics of this process is covered in <a href="../hadoop-azure-datalake/index.html#Configuring_Credentials_and_FileSystem">hadoop-azure-datalake</a>; the key names are slightly different here.</p>
<div class="source">
<div class="source">
<pre><property>
<name>fs.azure.account.auth.type</name>
<value>OAuth</value>
<description>
Use OAuth authentication
</description>
</property>
<property>
<name>fs.azure.account.oauth.provider.type</name>
<value>org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider</value>
<description>
Use client credentials
</description>
</property>
<property>
<name>fs.azure.account.oauth2.client.endpoint</name>
<value></value>
<description>
URL of OAuth endpoint
</description>
</property>
<property>
<name>fs.azure.account.oauth2.client.id</name>
<value></value>
<description>
Client ID
</description>
</property>
<property>
<name>fs.azure.account.oauth2.client.secret</name>
<value></value>
<description>
Secret
</description>
</property>
</pre></div></div>
</section><section>
<h3><a name="OAuth_2.0:_Username_and_Password"></a><a name="oauth-user-and-passwd"></a> OAuth 2.0: Username and Password</h3>
<p>An OAuth 2.0 endpoint, username and password are provided in the configuration/JCEKS file.</p>
<div class="source">
<div class="source">
<pre><property>
<name>fs.azure.account.auth.type</name>
<value>OAuth</value>
<description>
Use OAuth authentication
</description>
</property>
<property>
<name>fs.azure.account.oauth.provider.type</name>
<value>org.apache.hadoop.fs.azurebfs.oauth2.UserPasswordTokenProvider</value>
<description>
Use user and password
</description>
</property>
<property>
<name>fs.azure.account.oauth2.client.endpoint</name>
<value></value>
<description>
URL of OAuth 2.0 endpoint
</description>
</property>
<property>
<name>fs.azure.account.oauth2.user.name</name>
<value></value>
<description>
username
</description>
</property>
<property>
<name>fs.azure.account.oauth2.user.password</name>
<value></value>
<description>
password for account
</description>
</property>
</pre></div></div>
</section><section>
<h3><a name="OAuth_2.0:_Refresh_Token"></a><a name="oauth-refresh-token"></a> OAuth 2.0: Refresh Token</h3>
<p>With an existing Oauth 2.0 token, make a request of the Active Directory endpoint <code>https://login.microsoftonline.com/Common/oauth2/token</code> for this token to be refreshed.</p>
<div class="source">
<div class="source">
<pre><property>
<name>fs.azure.account.auth.type</name>
<value>OAuth</value>
<description>
Use OAuth 2.0 authentication
</description>
</property>
<property>
<name>fs.azure.account.oauth.provider.type</name>
<value>org.apache.hadoop.fs.azurebfs.oauth2.RefreshTokenBasedTokenProvider</value>
<description>
Use the Refresh Token Provider
</description>
</property>
<property>
<name>fs.azure.account.oauth2.refresh.token</name>
<value></value>
<description>
Refresh token
</description>
</property>
<property>
<name>fs.azure.account.oauth2.refresh.endpoint</name>
<value></value>
<description>
Refresh token endpoint
</description>
</property>
<property>
<name>fs.azure.account.oauth2.client.id</name>
<value></value>
<description>
Optional Client ID
</description>