Skip to content

Commit

Permalink
change authFile to local path (microsoft#1455)
Browse files Browse the repository at this point in the history
* change authFile to local path
  • Loading branch information
SparkSnail authored and QuanluZhang committed Aug 12, 2019
1 parent 13bec75 commit ec40e59
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docs/en_US/TrainingService/PaiMode.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Compared with [LocalMode](LocalMode.md) and [RemoteMachineMode](RemoteMachineMod
* shmMB
* Optional key. Set the shmMB configuration of OpenPAI, it set the shared memory for one task in the task role.
* authFile
* Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job).
* Optional key, Set the auth file path for private registry while using PAI mode, [Refer](https://github.com/microsoft/pai/blob/2ea69b45faa018662bc164ed7733f6fdbb4c42b3/docs/faq.md#q-how-to-use-private-docker-registry-job-image-when-submitting-an-openpai-job), you can prepare the authFile and simply provide the local path of this file, NNI will upload this file to HDFS for you.

Once complete to fill NNI experiment config file and save (for example, save as exp_pai.yml), then run the following command
```
Expand Down
2 changes: 1 addition & 1 deletion src/nni_manager/training_service/pai/hdfsClientUtility.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ export namespace HDFSClientUtility {
* Get NNI experiment root directory
* @param hdfsUserName HDFS user name
*/
function hdfsExpRootDir(hdfsUserName: string): string {
export function hdfsExpRootDir(hdfsUserName: string): string {
// tslint:disable-next-line:prefer-template
return '/' + unixPathJoin(hdfsUserName, 'nni', 'experiments', getExperimentId());
}
Expand Down
14 changes: 13 additions & 1 deletion src/nni_manager/training_service/pai/paiTrainingService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,11 @@ class PAITrainingService implements TrainingService {
private paiRestServerPort?: number;
private nniManagerIpConfig?: NNIManagerIpConfig;
private copyExpCodeDirPromise?: Promise<void>;
private copyAuthFilePromise?: Promise<void>;
private versionCheck: boolean = true;
private logCollection: string;
private isMultiPhase: boolean = false;
private authFileHdfsPath: string | undefined = undefined;

constructor() {
this.log = getLogger();
Expand Down Expand Up @@ -292,6 +294,12 @@ class PAITrainingService implements TrainingService {
HDFSClientUtility.getHdfsExpCodeDir(this.paiClusterConfig.userName),
this.hdfsClient
);

// Upload authFile to hdfs
if (this.paiTrialConfig.authFile) {
this.authFileHdfsPath = unixPathJoin(HDFSClientUtility.hdfsExpRootDir(this.paiClusterConfig.userName), 'authFile');
this.copyAuthFilePromise = HDFSClientUtility.copyFileToHdfs(this.paiTrialConfig.authFile, this.authFileHdfsPath, this.hdfsClient);
}

deferred.resolve();
break;
Expand Down Expand Up @@ -373,6 +381,10 @@ class PAITrainingService implements TrainingService {
await this.copyExpCodeDirPromise;
}

//Make sure authFile is copied from local to HDFS
if (this.paiTrialConfig.authFile) {
await this.copyAuthFilePromise;
}
// Step 1. Prepare PAI job configuration

const trialLocalTempFolder: string = path.join(getExperimentRootDir(), 'trials-local', trialJobId);
Expand Down Expand Up @@ -449,7 +461,7 @@ class PAITrainingService implements TrainingService {
// Add Virutal Cluster
this.paiTrialConfig.virtualCluster === undefined ? 'default' : this.paiTrialConfig.virtualCluster.toString(),
//Task auth File
this.paiTrialConfig.authFile
this.authFileHdfsPath
);

// Step 2. Upload code files in codeDir onto HDFS
Expand Down
3 changes: 1 addition & 2 deletions tools/nni_cmd/config_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,8 +233,7 @@ def setPathCheck(key):
'cpuNum': setNumberRange('cpuNum', int, 0, 99999),
'memoryMB': setType('memoryMB', int),
'image': setType('image', str),
Optional('authFile'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
error='ERROR: authFile format error, authFile format is hdfs://xxx.xxx.xxx.xxx:xxx'),
Optional('authFile'): And(os.path.exists, error=SCHEMA_PATH_ERROR % 'authFile'),
Optional('shmMB'): setType('shmMB', int),
Optional('dataDir'): And(Regex(r'hdfs://(([0-9]{1,3}.){3}[0-9]{1,3})(:[0-9]{2,5})?(/.*)?'),\
error='ERROR: dataDir format error, dataDir format is hdfs://xxx.xxx.xxx.xxx:xxx'),
Expand Down

0 comments on commit ec40e59

Please sign in to comment.