Skip to content

Commit

Permalink
GCP: Fix cartography-cncf#311 and fix bug where we only retrieved the…
Browse files Browse the repository at this point in the history
… first 1000 GCP projects (cartography-cncf#313)

* List services enabled for each project
* Fix bug where we did not retrieve all GCP projects. 
* Fix bug where we buried exceptions in GCP compute.
  • Loading branch information
Alex Chantavy authored Jun 10, 2020
1 parent d339848 commit fb68aac
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 7 deletions.
63 changes: 58 additions & 5 deletions cartography/intel/gcp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
from collections import namedtuple

Expand All @@ -13,7 +14,16 @@
from cartography.util import timeit

logger = logging.getLogger(__name__)
Resources = namedtuple('Resources', 'crm_v1 crm_v2 compute storage container')
Resources = namedtuple('Resources', 'crm_v1 crm_v2 compute storage container serviceusage')

# Mapping of service short names to their full names as in docs. See https://developers.google.com/apis-explorer,
# and https://cloud.google.com/service-usage/docs/reference/rest/v1/services#ServiceConfig
Services = namedtuple('Services', 'compute storage gke')
service_names = Services(
compute='compute.googleapis.com',
storage='storage.googleapis.com',
gke='container.googleapis.com',
)


def _get_crm_resource_v1(credentials):
Expand Down Expand Up @@ -71,6 +81,17 @@ def _get_container_resource(credentials):
return googleapiclient.discovery.build('container', 'v1', credentials=credentials, cache_discovery=False)


def _get_serviceusage_resource(credentials):
"""
Instantiates a serviceusage resource object.
See: https://cloud.google.com/service-usage/docs/reference/rest/v1/operations/list.
:param credentials: The GoogleCredentials object
:return: A serviceusage resource object
"""
return googleapiclient.discovery.build('serviceusage', 'v1', credentials=credentials, cache_discovery=False)


def _initialize_resources(credentials):
"""
Create namedtuple of all resource objects necessary for GCP data gathering.
Expand All @@ -83,9 +104,36 @@ def _initialize_resources(credentials):
compute=_get_compute_resource(credentials),
storage=_get_storage_resource(credentials),
container=_get_container_resource(credentials),
serviceusage=_get_serviceusage_resource(credentials),
)


def _services_enabled_on_project(serviceusage, project_id):
"""
Return a list of all Google API services that are enabled on the given project ID.
See https://cloud.google.com/service-usage/docs/reference/rest/v1/services/list for data shape.
:param serviceusage: the serviceusage resource provider. See https://cloud.google.com/service-usage/docs/overview.
:param project_id: The project ID number to sync. See the `projectId` field in
https://cloud.google.com/resource-manager/reference/rest/v1/projects
:return: A set of services that are enabled on the project
"""
try:
req = serviceusage.services().list(parent=f'projects/{project_id}', filter='state:ENABLED')
res = req.execute()
if 'services' in res:
return {svc['config']['name'] for svc in res['services']}
else:
return {}
except googleapiclient.discovery.HttpError as http_error:
http_error = json.loads(http_error.content.decode('utf-8'))
logger.warning(
f"HttpError when trying to get enabled services on project {project_id}. "
f"Code: {http_error['error']['code']}, Message: {http_error['error']['message']}. "
f"Skipping.",
)
return {}


def _sync_single_project(neo4j_session, resources, project_id, gcp_update_tag, common_job_parameters):
"""
Handles graph sync for a single GCP project.
Expand All @@ -97,9 +145,14 @@ def _sync_single_project(neo4j_session, resources, project_id, gcp_update_tag, c
:param common_job_parameters: Other parameters sent to Neo4j
:return: Nothing
"""
compute.sync(neo4j_session, resources.compute, project_id, gcp_update_tag, common_job_parameters)
storage.sync_gcp_buckets(neo4j_session, resources.storage, project_id, gcp_update_tag, common_job_parameters)
gke.sync_gke_clusters(neo4j_session, resources.container, project_id, gcp_update_tag, common_job_parameters)
# Determine the resources available on the project.
enabled_services = _services_enabled_on_project(resources.serviceusage, project_id)
if service_names.compute in enabled_services:
compute.sync(neo4j_session, resources.compute, project_id, gcp_update_tag, common_job_parameters)
if service_names.storage in enabled_services:
storage.sync_gcp_buckets(neo4j_session, resources.storage, project_id, gcp_update_tag, common_job_parameters)
if service_names.gke in enabled_services:
gke.sync_gke_clusters(neo4j_session, resources.container, project_id, gcp_update_tag, common_job_parameters)


def _sync_multiple_projects(neo4j_session, resources, projects, gcp_update_tag, common_job_parameters):
Expand All @@ -115,7 +168,7 @@ def _sync_multiple_projects(neo4j_session, resources, projects, gcp_update_tag,
:param common_job_parameters: Other parameters sent to Neo4j
:return: Nothing
"""
logger.debug("Syncing %d GCP projects.", len(projects))
logger.info("Syncing %d GCP projects.", len(projects))
crm.sync_gcp_projects(neo4j_session, projects, gcp_update_tag, common_job_parameters)

for project in projects:
Expand Down
1 change: 1 addition & 0 deletions cartography/intel/gcp/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def _get_error_reason(http_error):
else:
reason = data[0]['error']['errors']['reason']
except (UnicodeDecodeError, ValueError, KeyError):
logger.warning(f"HttpError: {data}")
return ''
return reason

Expand Down
9 changes: 7 additions & 2 deletions cartography/intel/gcp/crm.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,14 @@ def get_gcp_projects(crm_v1):
:return: List of GCP projects. See https://cloud.google.com/resource-manager/reference/rest/v2/projects/list.
"""
try:
projects = []
req = crm_v1.projects().list()
res = req.execute()
return res.get('projects', [])
while req is not None:
res = req.execute()
page = res.get('projects', [])
projects.extend(page)
req = crm_v1.projects().list_next(previous_request=req, previous_response=res)
return projects
except HttpError as e:
logger.warning("HttpError occurred in crm.get_gcp_projects(), returning empty list. Details: %r", e)
return []
Expand Down
1 change: 1 addition & 0 deletions cartography/intel/gcp/gke.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ def sync_gke_clusters(neo4j_session, container, project_id, gcp_update_tag, comm
:rtype: NoneType
:return: Nothing
"""
logger.info("Syncing Compute objects for project %s.", project_id)
gke_res = get_gke_clusters(container, project_id)
load_gke_clusters(neo4j_session, gke_res, project_id, gcp_update_tag)
cleanup_gke_clusters(neo4j_session, common_job_parameters)
1 change: 1 addition & 0 deletions cartography/intel/gcp/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,7 @@ def sync_gcp_buckets(neo4j_session, storage, project_id, gcp_update_tag, common_
:rtype: NoneType
:return: Nothing
"""
logger.info("Syncing Storage objects for project %s.", project_id)
storage_res = get_gcp_buckets(storage, project_id)
bucket_list = transform_gcp_buckets(storage_res)
load_gcp_buckets(neo4j_session, bucket_list, gcp_update_tag)
Expand Down

0 comments on commit fb68aac

Please sign in to comment.