graph_sequence.py


import keras
import numpy as np
import more_itertools
import json
from neo4j.v1 import GraphDatabase, Driver

class GraphSequence(keras.utils.Sequence):

	def __init__(self, args, batch_size=32, test=False):
		self.batch_size = batch_size
		
		self.query = """
			MATCH p=
				(person:PERSON) 
					-[:WROTE]-> 
				(review:REVIEW {dataset_name:{dataset_name}, test:{test}}) 
					-[:OF]-> 
				(product:PRODUCT)
			RETURN person.style_preference + product.style as x, review.score as y
		"""

		self.query_params = {
			"dataset_name": "article_0",
			"test": test
		}

		with open('./settings.json') as f:
			self.settings = json.load(f)[args.database]

		driver = GraphDatabase.driver(
			self.settings["neo4j_url"], 
			auth=(self.settings["neo4j_user"], self.settings["neo4j_password"]))

		with driver.session() as session:
			data = session.run(self.query, **self.query_params).data()
			data = [ (np.array(i["x"]), i["y"]) for i in data]
			
			# Split the data up into "batches"
			data = more_itertools.chunked(data, self.batch_size)

			# Format our batches in the way Keras expects them:
			# An array of tuples (x_batch, y_batch)

			# An x_batch is a numpy array of shape (batch_size, 12), 
			# containing the concatenated style and style_preference vectors. 

			# A y_batch is a numpy array of shape (batch_size,1) containing the review scores.

			self.data = [ (np.array([j[0] for j in i]), np.array([j[1] for j in i])) for i in data]


	def __len__(self):
		return len(self.data)

	def __getitem__(self, idx):
		return self.data[idx]