Skip to content

Commit

Permalink
allow create organization while harvest from "publisher"
Browse files Browse the repository at this point in the history
  • Loading branch information
avdata99 committed Dec 23, 2019
1 parent 404b7e7 commit f7bdf13
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 19 deletions.
33 changes: 16 additions & 17 deletions ckanext/datajson/harvester_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

from sqlalchemy.exc import IntegrityError

from ckanext.datajson.helpers import reverse_accrual_periodicity_dict, get_data_processor_json
from ckanext.datajson.helpers import reverse_accrual_periodicity_dict, \
get_data_processor_json, \
publisher_to_org

import logging
log = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,20 +88,7 @@ def load_config(self, harvest_source):

cfg = harvest_source.config or '{}'
source_config = json.loads(cfg)

try:
ret["filters"].update(source_config["filters"])
except TypeError:
pass
except KeyError:
pass

try:
ret["defaults"].update(source_config["defaults"])
except TypeError:
pass
except KeyError:
pass
ret.update(source_config)

return ret

Expand Down Expand Up @@ -464,6 +453,7 @@ def import_stage(self, harvest_object):

# get the config
config = self.load_config(harvest_object.source)
log.info('Config used: {}'.format(config))
# Get default values.
dataset_defaults = config["defaults"]

Expand Down Expand Up @@ -541,8 +531,17 @@ def import_stage(self, harvest_object):
if org_from == 'harvest_source':
owner_org = source_dataset.owner_org
elif org_from == 'publisher':
# TODO https://github.com/datopian/ckanext-datajson/issues/4
owner_org = source_dataset.owner_org
# if we have a publisher we use as Organization, If not, we use the standard harvest source org
# TODO analyze if config "remote_orgs" could be useful here
publisher = dataset.get('publisher', {})
publisher_name = publisher.get('name', None)
if publisher_name is not None:
log.info('Publisher found: {}'.format(publisher))
org = publisher_to_org(publisher_name, self.context())
owner_org = org['id']
else:
log.error('No publisher, default to harvest source org')
owner_org = source_dataset.owner_org

group_name = config.get('default_groups', '')

Expand Down
20 changes: 20 additions & 0 deletions ckanext/datajson/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
from pylons import config
from ckan import plugins as p
from ckan.lib import helpers as h
from ckan.logic import NotFound, get_action, check_access
from ckan.lib.munge import munge_title_to_name
import re
import simplejson as json

Expand Down Expand Up @@ -207,6 +209,24 @@ def get_extra(package, key, default=None):
return packageExtraCache.get(package, key, default)


def publisher_to_org(publisher_name, context):
""" create (if not exists) an organization from a publisher """

name = munge_title_to_name(publisher_name).replace('_', '-')
check_access('organization_show', context, {'id': name})

try:
org = get_action('organization_show')(context, {'id': name})
except NotFound:
log.error('Publisher as ORG not found. Creating')
org_base = {'title': publisher_name, 'name': name}
check_access('organization_create', context, org_base)
org = get_action('organization_create')(context, org_base)

log.info('Pub: {} to org: {}'.format(publisher_name, org))
return org


class PackageExtraCache:
def __init__(self):
self.pid = None
Expand Down
6 changes: 5 additions & 1 deletion ckanext/datajson/parse_datajson.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ def parse_datajson_entry(datajson, package, defaults, schema_version):
log.info('Parsing datajson entry: {}'.format(package))

# set the default values
for k, v in defaults:
for k, v in defaults.items():
if k not in package.keys() or package[k] is None:
package[k] = v

Expand Down Expand Up @@ -79,6 +79,10 @@ def parse_datajson_entry(datajson, package, defaults, schema_version):
package["contact_email"] = \
package.get("contact_email").replace("mailto:", "", 1)

if package.get("author_email"):
package["author_email"] = \
package.get("author_email").replace("mailto:", "", 1)

# 4. extras-publisher and extras-publisher_hierarchy
if schema_version == '1.1':
publisher = find_extra(package, "publisher", {})
Expand Down
49 changes: 49 additions & 0 deletions test-docker-ckan.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
[DEFAULT]
debug = true
# Uncomment and replace with the address which should receive any error reports
#email_to = [email protected]
smtp_server = localhost
error_email_from = paste@localhost

[app:main]
use = config:../../src/ckan/test-core.ini
ckan.site_title = My Test CKAN Site
ckan.site_description = A test site for testing my CKAN extension
ckan.plugins = harvest datajson datajson_harvest

# Logging configuration
[loggers]
keys = root, ckan, sqlalchemy

# Logging configuration
[loggers]
keys = root, ckan, sqlalchemy

[handlers]
keys = console

[formatters]
keys = generic

[logger_root]
level = WARN
handlers = console

[logger_ckan]
qualname = ckan
handlers =
level = INFO

[logger_sqlalchemy]
handlers =
qualname = sqlalchemy.engine
level = WARN

[handler_console]
class = StreamHandler
args = (sys.stdout,)
level = NOTSET
formatter = generic

[formatter_generic]
format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s
2 changes: 1 addition & 1 deletion test.ini
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ smtp_server = localhost
error_email_from = paste@localhost

[app:main]
use = config:../../ckan/test-core.ini
use = config:../../src/ckan/test-core.ini
ckan.site_title = My Test CKAN Site
ckan.site_description = A test site for testing my CKAN extension
ckan.plugins = harvest datajson datajson_harvest
Expand Down

0 comments on commit f7bdf13

Please sign in to comment.