Skip to content

Commit

Permalink
Updated IAM System
Browse files Browse the repository at this point in the history
  • Loading branch information
TepidJesus committed May 4, 2023
1 parent fae6bf3 commit 0377d36
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 13 deletions.
31 changes: 18 additions & 13 deletions client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
## TODO: Potentially add a seperate control queue for each Ec2 hashing instance
## TODO: Make it so client doens't cry if the queues already exist when it tries to create them

#### Tuesday TO DO ####
## TODO: Create IAM Role for EC2 Instance
#### Saturday TO DO ####
## TODO: Add IAM priviledges to the IAM role
## TODO: EC2 Instance Creation, IAM Role Assignemnt
## TODO: Add EC2 auto start and stop based on load

Expand Down Expand Up @@ -287,7 +287,7 @@ def __init__(self, config, mode):
exit()

self.effective_vCPU_limit = self.get_vCPU_limit() * int(self.config["AWS-Settings"]["usage_limit"])
self.instance_config = self.get_instance_config()
self.instance_config = self.get_recomended_instance_config()
elif mode == "server":
self.session = self.get_session("server")

Expand Down Expand Up @@ -359,6 +359,7 @@ def get_session(self, mode):
def get_vCPU_limit(self):
quota_client = self.session.client('service-quotas')
response = quota_client.get_service_quota(ServiceCode='ec2', QuotaCode='L-417A185B')
print(f"Your current vCPU limit is {response['Quota']['Value']}") ## DEBUG
return int(response['Quota']['Value'])

def get_instances(self):
Expand Down Expand Up @@ -410,11 +411,14 @@ def locate_queue(self, name):

def create_instance(self):
ec2 = self.session.resource('ec2')
print("Creating instance...") ## DEBUG
try:
instance = ec2.create_instances(ImageId=self.config["image_id"],
instance = ec2.create_instances(ImageId=self.config["AWS-Settings"]["image_id"],
MinCount=1,
MaxCount=1,
InstanceType=self.instance_config[0], ) ##TODO: Make IAM role and assign to instances
InstanceType=self.instance_config[0],
Iam_instance_profile={'Arn': self.get_iam_role()})
self.instances.append(instance)
except ClientError as e:
if e.response['Error']['Code'] == 'InsufficientInstanceCapacity':
print("Error: Failed to create instances. Looks like those pesky ML engineers are using all the GPU instances.")
Expand All @@ -423,8 +427,10 @@ def create_instance(self):
else:
print(f"Only Secured {self.get_num_instances()}. You can continue with this number of instances, but you will experience decreased performance.")
print("You can also try again later or try a different region. (Specify this in the settings menu)")
else:
print(e)

self.instances.append(instance)



def create_bucket(self, bucket_prefix):
Expand Down Expand Up @@ -476,7 +482,7 @@ def close_queues(self):
queue.delete()

def cleanup(self):
#self.close_instances() ## DEBUG
self.close_instances()
self.close_buckets()
self.close_queues()

Expand All @@ -487,13 +493,13 @@ def get_max_instances(self):
return self.instance_config[1]

def get_recomended_instance_config(self):
if self.effective_vCPU_limit % 96 >= 1:
if self.effective_vCPU_limit // 96 >= 1:
return ("p4d.24xlarge", self.effective_vCPU_limit // 96)
elif self.effective_vCPU_limit % 64 >= 1:
elif self.effective_vCPU_limit // 64 >= 1:
return ("p3.16xlarge", self.effective_vCPU_limit // 64)
elif self.effective_vCPU_limit % 32 >= 1:
elif self.effective_vCPU_limit // 32 >= 1:
return ("p3.8xlarge", self.effective_vCPU_limit // 32)
elif self.effective_vCPU_limit % 8 >= 1:
elif self.effective_vCPU_limit // 8 >= 1:
return ("p3.2xlarge", self.effective_vCPU_limit // 8)
elif self.effective_vCPU_limit >= 4:
return ("p2.xlarge", 1)
Expand Down Expand Up @@ -545,8 +551,7 @@ def create_iam_role(self):
PolicyDocument=str(permissions_policy)
)

role_arn = response['Role']['Arn']
return role_arn
return response['Role']['Arn']

def get_iam_role(self):
iam = boto3.client('iam')
Expand Down
2 changes: 2 additions & 0 deletions job_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ def send_job(self, job):
job.required_info = (file_name, self.wordlist_bucket_name)
if self.aws_controller.get_num_instances() < self.aws_controller.get_max_instances():
self.aws_controller.create_instance()
else:
print("Max number of instances reached. Job queued.")
response = self.aws_controller.message_queue(self.outbound_queue, job.to_json(), "Job")
if response == False:
print(f"Error: Failed to send job {job.job_id} to queue. Continuing...")
Expand Down

0 comments on commit 0377d36

Please sign in to comment.