Skip to content

Commit

Permalink
Updates to make CartoDB references use carto.com instead. Added licen…
Browse files Browse the repository at this point in the history
…se and metadata to python files in vertnet/service. Set tracker deadline so that it would not throw exceptions for every call to track a query. Made a preliminary attempt to do memory house cleaning to see if it would avoid Out of Memory errors and instance shutdowns.
  • Loading branch information
tucotuco committed Aug 15, 2016
1 parent 9324814 commit 578cef5
Show file tree
Hide file tree
Showing 15 changed files with 269 additions and 80 deletions.
24 changes: 21 additions & 3 deletions vertnet/service/download.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
"""Download service."""
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "John Wieczorek"
__contributors__ = "Aaron Steele, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "download.py 2016-08-15T15:54+02:00"

# Removing dependency on Files API due to its deprecation by Google
import cloudstorage as gcs
Expand All @@ -17,9 +34,9 @@
import logging
import uuid
import sys
import gc

DOWNLOAD_VERSION='download.py 2016-08-14T12:06+02:00'

DOWNLOAD_VERSION=__version__
SEARCH_CHUNK_SIZE=1000 # limit on documents in a search result: rows per file
OPTIMUM_CHUNK_SIZE=500 # See api_cnt_performance_analysis.pdf at https://goo.gl/xbLIGz
COMPOSE_FILE_LIMIT=32 # limit on the number of files in a single compose request
Expand Down Expand Up @@ -284,6 +301,7 @@ def post(self):
else:
curs = None

gc.collect()
# Write single chunk to file, GCS does not support append
records, next_cursor, count, query_version = \
vnsearch.query(q, SEARCH_CHUNK_SIZE, curs=curs)
Expand Down
24 changes: 21 additions & 3 deletions vertnet/service/github.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
"""This module provides GitHub interop services."""
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "John Wieczorek"
__contributors__ = "Aaron Steele, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "github.py 2016-08-15T15:54+02:00"

from google.appengine.api import mail
from google.appengine.api import urlfetch
Expand All @@ -18,7 +35,8 @@ def load_githubbers():
global GITHUBBERS
if GITHUBBERS is not None:
return GITHUBBERS
cdb_url = "http://vertnet.cartodb.com/api/v1/sql?%s"
# cdb_url = "http://vertnet.cartodb.com/api/v1/sql?%s"
cdb_url = "http://vertnet.carto.com/api/v2/sql?%s"
sql = "SELECT url, split_part(url,'=', 2) as resource, icode, github_reponame as repo, github_orgname as owner FROM resource_staging order by icode, url"
rpc = urlfetch.create_rpc()
url = cdb_url % (urllib.urlencode(dict(q=sql)))
Expand Down Expand Up @@ -125,7 +143,7 @@ def create_issue(self, user, owner, repo, title, body, record, link, data):
html_url = json.loads(response)['html_url']

# FOR TESTING
email = "eightysteele@gmail.com"
email = "gtuco.btuco@gmail.com"

# Email contact
mail.send_mail("VertNet <[email protected]>",
Expand Down
29 changes: 16 additions & 13 deletions vertnet/service/model.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,21 @@
# This file is part of VertNet: https://github.com/VertNet/webapp
#
# VertNet is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# VertNet is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# http://www.apache.org/licenses/LICENSE-2.0
#
# You should have received a copy of the GNU General Public License
# along with VertNet. If not, see: http://www.gnu.org/licenses
"""Datastore models and RPC payload messages."""
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "John Wieczorek"
__contributors__ = "Aaron Steele, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "model.py 2016-08-15T15:54+02:00"

from engineauth import models
from google.appengine.ext import ndb
Expand Down
21 changes: 20 additions & 1 deletion vertnet/service/record.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "John Wieczorek"
__contributors__ = "Aaron Steele, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "record.py 2016-08-15T15:54+02:00"

"""API handlers for VertNet records."""

from vertnet.service.model import RecordIndex, Record, RecordList, RecordPayload
Expand All @@ -11,7 +30,7 @@
import logging
import json

RECORD_VERSION='record.py 2015-08-28T23:42:06+02:00'
RECORD_VERSION=__version__

def record_list(limit, cursor, q, message=False):
"""Return CommentList or triple (comments, next_cursor, more)."""
Expand Down
35 changes: 26 additions & 9 deletions vertnet/service/repochecker.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,28 @@
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "Javier Otegui"
__contributors__ = "Javier Otegui, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "search.py 2016-08-15T15:54+02:00"

import os
import json
import logging
import urllib
import urllib2

__author__ = '@jotegui'


# Get API key from file
def apikey(serv):
"""Return credentials file as a JSON object."""
Expand All @@ -18,7 +34,8 @@ def apikey(serv):
gh_key=apikey('gh')

ghb_url = 'https://api.github.com'
cdb_url = "https://vertnet.cartodb.com/api/v2/sql"
#cdb_url = "https://vertnet.cartodb.com/api/v2/sql"
cdb_url = "https://vertnet.carto.com/api/v2/sql"
testing = False
headers = {
'User-Agent': 'VertNet', # Authenticate as VertNet
Expand All @@ -27,7 +44,7 @@ def apikey(serv):
}

def get_all_repos():
"""Extract a list of all github_orgnames and github_reponames from CartoDB."""
"""Extract a list of all github_orgnames and github_reponames from Carto."""
query = "select github_orgname, github_reponame from resource_staging where ipt is true and networks like '%VertNet%';"
vals = {
'api_key': cdb_key,
Expand All @@ -39,11 +56,11 @@ def get_all_repos():
try:
res = urllib2.urlopen(req)
except:
logging.error("Something went wrong querying CartoDB")
logging.error("Something went wrong querying Carto")
return None

all_repos = json.loads(res.read())['rows']
logging.info("Got {0} repos currently in CartoDB".format(len(all_repos)))
logging.info("Got {0} repos currently in Carto".format(len(all_repos)))
return all_repos


Expand All @@ -70,7 +87,7 @@ def list_org(org):


def check_failed_repos():
"""Check repository name consistency between CartoDB and GitHub."""
"""Check repository name consistency between Carto and GitHub."""
failed_repos = []
all_repos = get_all_repos()

Expand Down Expand Up @@ -104,7 +121,7 @@ def main(environ, start_response):
logging.info("Response started")


logging.info("Checking consistency of repository names between CartoDB and GitHub.")
logging.info("Checking consistency of repository names between Carto and GitHub.")
failed_repos = check_failed_repos()

res = {
Expand Down
48 changes: 33 additions & 15 deletions vertnet/service/search.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,38 @@
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "John Wieczorek"
__contributors__ = "Aaron Steele, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "search.py 2016-08-15T16:43+02:00"

from google.appengine.api import namespace_manager
from google.appengine.api import search
from google.appengine.api.search import SortOptions, SortExpression
from vertnet.service import util as vnutil
from datetime import datetime
import time
import re
import htmlentitydefs
import os
import json
import logging

# In an attempt to overcome timeouts in searches that produce the following exception
# message:
#
from google.appengine.api import urlfetch
urlfetch.set_default_fetch_deadline(60)

SEARCH_VERSION='search.py 2016-08-07T19:37+02:00'
# In an attempt to overcome timeouts in searches
urlfetch.set_default_fetch_deadline(20)

SEARCH_VERSION=__version__
IS_DEV = os.environ.get('SERVER_SOFTWARE', '').startswith('Development')

def _get_rec(doc):
Expand All @@ -38,9 +54,9 @@ def query(q, limit, index_name='dwc', sort=None, curs=search.Cursor()):
if not curs:
curs = search.Cursor()

namespace = namespace_manager.get_namespace()
if q.startswith('id:'):
did = q.split(':')[1].strip()
namespace = namespace_manager.get_namespace()
results = search.Index(name=index_name, namespace=namespace).get_range(
start_id=did, limit=1)
if results:
Expand Down Expand Up @@ -89,13 +105,15 @@ def query(q, limit, index_name='dwc', sort=None, curs=search.Cursor()):
while retry_count < max_retries:
try:
query = search.Query(query_string=q, options=options)
namespace = namespace_manager.get_namespace()
logging.info('Trying Query: %s\nOptions: %s\nVersion: %s' % (q, options, SEARCH_VERSION))
# results = search.Index(name=index_name, namespace=namespace).search(query)
# Trying with an explicitly set deadline of 20s to overcome failed queries on
start_time = time.time()
results = search.Index(name=index_name, namespace=namespace).search(query)
elapsed_time = time.time() - start_time
# Try with an explicitly set deadline to overcome failed queries on
# multiple "booleans" such as haslength, hasmass, hasmedia, isfossil, etc.
results = search.Index(name=index_name, namespace=namespace).search(query, deadline=50)
# results = search.Index(name=index_name, namespace=namespace).search(query, deadline=50)
if results:
logging.info('Found %s records in %.1fs' % (results.number_found, elapsed_time))
recs = map(_get_rec, results)
# logging.info('Query: %s results from search.Index() for namespace=%s \
# index_name=%s query=%s\nVersion: %s' % (q, results.number_found,
Expand Down Expand Up @@ -151,10 +169,10 @@ def query_rec_counter(q, limit, index_name='dwc', sort=None, curs=search.Cursor(
try:
query = search.Query(query_string=q, options=options)
namespace = namespace_manager.get_namespace()
# results = search.Index(name=index_name, namespace=namespace).search(query)
# Trying with an explicitly set deadline of 20s to overcome failed queries on
results = search.Index(name=index_name, namespace=namespace).search(query)
# Try with an explicitly set deadline to overcome failed queries on
# multiple "booleans" such as haslength, hasmass, hasmedia, isfossil, etc.
results = search.Index(name=index_name, namespace=namespace).search(query, deadline=50)
# results = search.Index(name=index_name, namespace=namespace).search(query, deadline=50)
if results:
recs = len(results.results)
return recs, results.cursor, SEARCH_VERSION
Expand Down
24 changes: 22 additions & 2 deletions vertnet/service/stats.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
#!/usr/bin/env python

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

__author__ = "Javier Otegui"
__contributors__ = "Javier Otegui, John Wieczorek"
__copyright__ = "Copyright 2016 vertnet.org"
__version__ = "search.py 2016-08-15T15:54+02:00"

"""Service to generate stats for the stats page."""

import os
Expand All @@ -15,9 +34,10 @@ def main(environ, start_response):

path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'cdbkey.txt')
api_key = open(path, "r").read().rstrip()
logging.info("CARTODB KEY %s" % api_key)
logging.info("CARTO KEY %s" % api_key)

url = "https://vertnet.cartodb.com/api/v2/sql"
# url = "https://vertnet.cartodb.com/api/v2/sql"
url = "https://vertnet.carto.com/api/v2/sql"
q = 'select * from daily_portal_stats order by created_at desc limit 1'

params = {'api_key':api_key, 'q':q}
Expand Down
Loading

0 comments on commit 578cef5

Please sign in to comment.