Skip to content

Commit

Permalink
Extend dataframe to profile documents
Browse files Browse the repository at this point in the history
  • Loading branch information
ale-de-vries committed Nov 25, 2017
1 parent 1407a41 commit 790ec45
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 55 deletions.
5 changes: 3 additions & 2 deletions elsapy/elsprofile.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* https://dev.elsevier.com
* https://api.elsevier.com"""

import requests, json, urllib
import requests, json, urllib, pandas as pd
from abc import ABCMeta, abstractmethod
from . import log_util
from .elsentity import ElsEntity
Expand Down Expand Up @@ -58,6 +58,8 @@ def read_docs(self, payloadType, els_client = None):
self._doc_list = None
raise e
logger.info("Documents loaded for " + self.uri)
self.docsframe = pd.DataFrame(self._doc_list)
logger.info("Documents loaded into dataframe for " + self.uri)
return True
except (requests.HTTPError, requests.RequestException) as e:
logger.warning(e.args)
Expand All @@ -68,7 +70,6 @@ def write_docs(self):
with the url-encoded URI as the filename and returns True. Else,
returns False."""
if self.doc_list:
dataPath = self.client.local_dir
dump_file = open('data/'
+ urllib.parse.quote_plus(self.uri+'?view=documents')
+ '.json', mode='w'
Expand Down
15 changes: 11 additions & 4 deletions elsapy/elssearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,20 @@ class ElsSearch():
through api.elsevier.com. Returns True if successful; else, False."""

# static / class variables
__base_url = u'https://api.elsevier.com/content/search/'
__int_resp_fields = [
_base_url = u'https://api.elsevier.com/content/search/'
_int_resp_fields = [
'document-count',
'citedby-count',
]
_date_resp_fields = [
'prism:coverDate',
]

def __init__(self, query, index):
"""Initializes a search object with a query and target index."""
self.query = query
self.index = index
self._uri = self.__base_url + self.index + '?query=' + url_encode(
self._uri = self._base_url + self.index + '?query=' + url_encode(
self.query)

# properties
Expand Down Expand Up @@ -93,10 +96,14 @@ def execute(self, els_client = None, get_all = False):
if 'link' in self.results_df.columns:
self.results_df['link'] = self.results_df.link.apply(
lambda x: dict([(e['@ref'], e['@href']) for e in x]))
for int_field in self.__int_resp_fields:
for int_field in self._int_resp_fields:
if int_field in self.results_df.columns:
self.results_df[int_field] = self.results_df[int_field].apply(
int)
for date_field in self._date_resp_fields:
if date_field in self.results_df.columns:
self.results_df[date_field] = self.results_df[date_field].apply(
pd.Timestamp)

def hasAllResults(self):
"""Returns true if the search object has retrieved all results for the
Expand Down
98 changes: 49 additions & 49 deletions exampleProg.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,25 +15,25 @@
client = ElsClient(config['apikey'])
client.inst_token = config['insttoken']

### Author example
## Initialize author with uri
#my_auth = ElsAuthor(
# uri = 'https://api.elsevier.com/content/author/author_id/7004367821')
## Read author data, then write to disk
#if my_auth.read(client):
# print ("my_auth.full_name: ", my_auth.full_name)
# my_auth.write()
#else:
# print ("Read author failed.")
#
### Affiliation example
## Initialize affiliation with ID as string
#my_aff = ElsAffil(affil_id = '60101411')
#if my_aff.read(client):
# print ("my_aff.name: ", my_aff.name)
# my_aff.write()
#else:
# print ("Read affiliation failed.")
## Author example
# Initialize author with uri
my_auth = ElsAuthor(
uri = 'https://api.elsevier.com/content/author/author_id/7004367821')
# Read author data, then write to disk
if my_auth.read(client):
print ("my_auth.full_name: ", my_auth.full_name)
my_auth.write()
else:
print ("Read author failed.")

## Affiliation example
# Initialize affiliation with ID as string
my_aff = ElsAffil(affil_id = '60101411')
if my_aff.read(client):
print ("my_aff.name: ", my_aff.name)
my_aff.write()
else:
print ("Read affiliation failed.")
#
### Scopus (Abtract) document example
## Initialize document with ID as integer
Expand Down Expand Up @@ -64,36 +64,36 @@
### Load list of documents from the API into affilation and author objects.
## Since a document list is retrieved for 25 entries at a time, this is
## a potentially lenghty operation - hence the prompt.
#print ("Load documents (Y/N)?")
#s = input('--> ')
#
#if (s == "y" or s == "Y"):
print ("Load documents (Y/N)?")
s = input('--> ')

if (s == "y" or s == "Y"):

## Read all documents for example author, then write to disk
if my_auth.read_docs(client):
print ("my_auth.doc_list has " + str(len(my_auth.doc_list)) + " items.")
my_auth.write_docs()
else:
print ("Read docs for author failed.")

## Read all documents for example affiliation, then write to disk
if my_aff.read_docs(client):
print ("my_aff.doc_list has " + str(len(my_aff.doc_list)) + " items.")
my_aff.write_docs()
else:
print ("Read docs for affiliation failed.")
#
# ## Read all documents for example author, then write to disk
# if my_auth.read_docs(client):
# print ("my_auth.doc_list has " + str(len(my_auth.doc_list)) + " items.")
# my_auth.write_docs()
# else:
# print ("Read docs for author failed.")
## Initialize author search object and execute search
#auth_srch = ElsSearch('authlast(keuskamp)','author')
#auth_srch.execute(client)
#print ("auth_srch has", len(auth_srch.results), "results.")
#
# ## Read all documents for example affiliation, then write to disk
# if my_aff.read_docs(client):
# print ("my_aff.doc_list has " + str(len(my_aff.doc_list)) + " items.")
# my_aff.write_docs()
# else:
# print ("Read docs for affiliation failed.")
### Initialize affiliation search object and execute search
#aff_srch = ElsSearch('affil(amsterdam)','affiliation')
#aff_srch.execute(client)
#print ("aff_srch has", len(aff_srch.results), "results.")
#
## Initialize author search object and execute search
auth_srch = ElsSearch('authlast(keuskamp)','author')
auth_srch.execute(client)
print ("auth_srch has", len(auth_srch.results), "results.")

## Initialize affiliation search object and execute search
aff_srch = ElsSearch('affil(amsterdam)','affiliation')
aff_srch.execute(client)
print ("aff_srch has", len(aff_srch.results), "results.")

## Initialize doc search object and execute search, retrieving all results
doc_srch = ElsSearch('star trek vs star wars','scopus')
doc_srch.execute(client, get_all = True)
print ("doc_srch has", len(doc_srch.results), "results.")
### Initialize doc search object and execute search, retrieving all results
#doc_srch = ElsSearch('star trek vs star wars','scopus')
#doc_srch.execute(client, get_all = True)
#print ("doc_srch has", len(doc_srch.results), "results.")

0 comments on commit 790ec45

Please sign in to comment.