From ec40e59ce68b384613c1f69f487a0f56385a1b39 Mon Sep 17 00:00:00 2001 From: SparkSnail Date: Mon, 12 Aug 2019 15:36:45 +0800 Subject: [PATCH] change authFile to local path (#1455) * change authFile to local path --- docs/en_US/TrainingService/PaiMode.md | 2 +- .../training_service/pai/hdfsClientUtility.ts | 2 +- .../training_service/pai/paiTrainingService.ts | 14 +++++++++++++- tools/nni_cmd/config_schema.py | 3 +-- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/docs/en_US/TrainingService/PaiMode.md b/docs/en_US/TrainingService/PaiMode.md index 4a3543236d..78e7aa7984 100644 --- a/docs/en_US/TrainingService/PaiMode.md +++ b/docs/en_US/TrainingService/PaiMode.md @@ -54,7 +54,7 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod * shmMB * Optional key. Set the shmMB configuration of OpenPAI, it set the shared memory for one task in the task role. * authFile - * Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job). + * Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job), you can prepare the authFile and simply provide the local path of this file, NNI will upload this file to HDFS for you. Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command ``` diff --git a/src/nni_manager/training_service/pai/hdfsClientUtility.ts b/src/nni_manager/training_service/pai/hdfsClientUtility.ts index f7603afb0c..7c140f8b2f 100644 --- a/src/nni_manager/training_service/pai/hdfsClientUtility.ts +++ b/src/nni_manager/training_service/pai/hdfsClientUtility.ts @@ -32,7 +32,7 @@ export namespace HDFSClientUtility { * Get NNI experiment root directory * @param hdfsUserName HDFS user name */ - function hdfsExpRootDir(hdfsUserName: string): string { + export function hdfsExpRootDir(hdfsUserName: string): string { // tslint:disable-next-line:prefer-template return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId()); } diff --git a/src/nni_manager/training_service/pai/paiTrainingService.ts b/src/nni_manager/training_service/pai/paiTrainingService.ts index 09e2a42675..91865d906f 100644 --- a/src/nni_manager/training_service/pai/paiTrainingService.ts +++ b/src/nni_manager/training_service/pai/paiTrainingService.ts @@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService { private paiRestServerPort?: number; private nniManagerIpConfig?: NNIManagerIpConfig; private copyExpCodeDirPromise?: Promise; + private copyAuthFilePromise?: Promise; private versionCheck: boolean = true; private logCollection: string; private isMultiPhase: boolean = false; + private authFileHdfsPath: string | undefined = undefined; constructor() { this.log = getLogger(); @@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService { HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName), this.hdfsClient ); + + // Upload authFile to hdfs + if (this.paiTrialConfig.authFile) { + this.authFileHdfsPath = unixPathJoin(HDFSClientUtility.hdfsExpRootDir(this.paiClusterConfig.userName), 'authFile'); + this.copyAuthFilePromise = HDFSClientUtility.copyFileToHdfs(this.paiTrialConfig.authFile, this.authFileHdfsPath, this.hdfsClient); + } deferred.resolve(); break; @@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService { await this.copyExpCodeDirPromise; } + //Make sure authFile is copied from local to HDFS + if (this.paiTrialConfig.authFile) { + await this.copyAuthFilePromise; + } // Step 1. Prepare PAI job configuration const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId); @@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService { // Add Virutal Cluster this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(), //Task auth File - this.paiTrialConfig.authFile + this.authFileHdfsPath ); // Step 2. Upload code files in codeDir onto HDFS diff --git a/tools/nni_cmd/config_schema.py b/tools/nni_cmd/config_schema.py index 5118c793b7..f09786664b 100644 --- a/tools/nni_cmd/config_schema.py +++ b/tools/nni_cmd/config_schema.py @@ -233,8 +233,7 @@ def setPathCheck(key): 'cpuNum': setNumberRange('cpuNum', int, 0, 99999), 'memoryMB': setType('memoryMB', int), 'image': setType('image', str), - Optional('authFile'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ - error='ERROR: authFile format error, authFile format is hdfs://xxx.xxx.xxx.xxx:xxx'), + Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'), Optional('shmMB'): setType('shmMB', int), Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\ error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),