Skip to content

Commit

Permalink
Update to include support for basic Pandas types.
Browse files Browse the repository at this point in the history
  • Loading branch information
cadmiumkitty committed May 9, 2018
1 parent 7ed0e48 commit cdbb1c0
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 32 deletions.
6 changes: 3 additions & 3 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ RdfPandas
Introduction
============

RdfPandas is a module providing RDF support for Pandas. It consists of
two simple functions for graph conversion, one is to create DataFrame from
RDFLib Graph data, and another one to create Graph data from DataFrame.
RdfPandas is a module providing RDF support for Pandas. It consists initially
of a simple function for graph conversion to create RDFLib Graph data from
Pandas DataFrame.

The graph data can then be serialized using RDFLib serialize method on the
graph.
Expand Down
2 changes: 1 addition & 1 deletion rdfpandas/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .graph import to_graph, from_graph
from .graph import to_graph
35 changes: 13 additions & 22 deletions rdfpandas/graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# -*- coding: utf-8 -*-
import pandas as pd
import rdflib
import logging


def to_graph(df: pd.DataFrame) -> rdflib.Graph:
"""
Expand All @@ -22,26 +24,15 @@ def to_graph(df: pd.DataFrame) -> rdflib.Graph:

g = rdflib.Graph()

return g

def from_graph(g: rdflib.Graph) -> pd.DataFrame:
"""
Takes RDFLib Graph and returns Pandas DataFrame using subjects as row
indices and predicates as column indices. Object types are inferred from
the object types.
Parameters
----------
g : rdflib.Graph
Graph to be converted into Pandas DataFrame
Returns
-------
pandas.DataFrame
DataFrame created from Graph.
for (index, series) in df.iterrows():
for (column, value) in series.iteritems():
if (type(value) == 'bytes'):
g.add((rdflib.URIRef(index),
rdflib.URIRef(column),
rdflib.Literal(value.decode('utf-8'))))
else:
g.add((rdflib.URIRef(index),
rdflib.URIRef(column),
rdflib.Literal(value)))

"""

df = pd.DataFrame()

return df
return g
85 changes: 79 additions & 6 deletions tests/test_data_frame_to_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
from .context import rdfpandas

import pandas as pd
import numpy as np
import rdflib
import rdflib.compare

import unittest
import logging


class DataFrameToGraphConversionTestCase(unittest.TestCase):
Expand All @@ -18,14 +20,85 @@ def test_should_convert_empty_data_frame_to_emty_graph(self):
g_expected = rdflib.Graph()
g_result = rdfpandas.to_graph(df)
self.assertEquals(rdflib.compare.isomorphic(g_expected, g_result), True)

def test_should_convert_data_frame_to_graph_with_fully_qualified_indices(self):
"""Should return Graph with a single String literal.
Assume that we rely on URIs for indices in the first release,
that String is the only datatype supported, and that language handling
of literals is not required.
"""

idx1= pd.Index(data=['http://github.com/cadmiumkitty/rdfpandas/one'])

ds01 = pd.Series(data=['Bytes'], index=[idx1], dtype = np.string_, name = 'http://github.com/cadmiumkitty/rdfpandas/stringu')

ds02 = pd.Series(data=['String'], index=[idx1], dtype = np.unicode_, name = 'http://github.com/cadmiumkitty/rdfpandas/unicodeu')

ds03 = pd.Series(data=[0], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_1')
ds04 = pd.Series(data=[-9223372036854775808], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_2')
ds05 = pd.Series(data=[9223372036854775807], index=[idx1], dtype = np.int64, name = 'http://github.com/cadmiumkitty/rdfpandas/int64_3')

ds06 = pd.Series(data=[0], index=[idx1], dtype = np.uint64, name = 'http://github.com/cadmiumkitty/rdfpandas/uint64_1')
ds07 = pd.Series(data=[18446744073709551615], index=[idx1], dtype = np.uint64, name = 'http://github.com/cadmiumkitty/rdfpandas/uint64_2')

def test_should_convert_empty_graph_to_empty_data_frame(self):
"""Should return empty DataFrame for empty Graph"""
g = rdflib.Graph()
df_expected = pd.DataFrame()
df_result = rdfpandas.from_graph(g)
self.assertEquals(df_expected.equals(df_result), True)
ds08 = pd.Series(data=[0.0], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_1')
ds09 = pd.Series(data=[-1.7976931348623157e+308], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_2')
ds10 = pd.Series(data=[1.7976931348623157e+308], index=[idx1], dtype = np.float64, name = 'http://github.com/cadmiumkitty/rdfpandas/float64_3')

ds11 = pd.Series(data=[True], index=[idx1], dtype = np.bool_, name = 'http://github.com/cadmiumkitty/rdfpandas/true')
ds12 = pd.Series(data=[False], index=[idx1], dtype = np.bool_, name = 'http://github.com/cadmiumkitty/rdfpandas/false')

df = pd.DataFrame([ds01, ds02, ds03, ds04, ds05, ds06, ds07, ds08, ds09, ds10, ds11, ds12]).T

logging.debug('DF: %s', df)

g_expected = rdflib.Graph()
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/stringu'),
rdflib.Literal('Bytes')))

g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/unicodeu'),
rdflib.Literal('String')))

g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_1'),
rdflib.Literal(0)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_2'),
rdflib.Literal(-9223372036854775808)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/int64_3'),
rdflib.Literal(9223372036854775807)))

g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/uint64_1'),
rdflib.Literal(0)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/uint64_2'),
rdflib.Literal(18446744073709551615)))

g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_1'),
rdflib.Literal(0.0)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_2'),
rdflib.Literal(-1.7976931348623157e+308)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/float64_3'),
rdflib.Literal(1.7976931348623157e+308)))

g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/true'),
rdflib.Literal(True)))
g_expected.add((rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/one'),
rdflib.URIRef('http://github.com/cadmiumkitty/rdfpandas/false'),
rdflib.Literal(False)))

g_result = rdfpandas.to_graph(df)

self.assertEquals(rdflib.compare.isomorphic(g_expected, g_result), True)


if __name__ == '__main__':
unittest.main()

0 comments on commit cdbb1c0

Please sign in to comment.