diff --git a/Gemfile b/Gemfile index 556b2c6d..32499f2f 100644 --- a/Gemfile +++ b/Gemfile @@ -51,7 +51,7 @@ gem "image_processing", "~> 1.2" group :development, :test do # See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem gem "debug", platforms: %i[mri] - gem 'factory_bot_rails' + gem "factory_bot_rails" gem "timecop", "~> 0.9.10" end @@ -66,13 +66,14 @@ group :development do # gem "spring" gem "ruby-lsp", "~> 0.14.6" + gem "ruby-lsp-rails", "~> 0.3.3" end group :test do # Use system testing [https://guides.rubyonrails.org/testing.html#system-testing] gem "capybara" gem "faker" - gem 'fuubar' + gem "fuubar" gem "rspec-uuid" gem "selenium-webdriver" end @@ -110,8 +111,13 @@ gem "motor-admin", "~> 0.4.26" gem "baran", "~> 0.1.12" # error monitoring -gem 'sentry-rails' -gem 'sentry-ruby' -gem 'sentry-sidekiq' +gem "sentry-rails" +gem "sentry-ruby" +gem "sentry-sidekiq" gem "ruby-openai", "~> 7.1" + +# graph DB +gem "activegraph", "11.5.0.beta.2" +gem "async", "2.11.0" +gem "neo4j-ruby-driver", "~> 4.4" diff --git a/Gemfile.lock b/Gemfile.lock index 12e4b55b..180280b5 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -53,6 +53,12 @@ GEM erubi (~> 1.11) rails-dom-testing (~> 2.2) rails-html-sanitizer (~> 1.6) + activegraph (11.5.0.beta.2) + activemodel (>= 7) + i18n (!= 1.8.8) + neo4j-ruby-driver (>= 4.4.1, < 5) + orm_adapter (>= 0.5.0) + sorted_set activejob (7.1.3.2) activesupport (= 7.1.3.2) globalid (>= 0.3.6) @@ -88,6 +94,13 @@ GEM ar_lazy_preload (1.1.2) rails (>= 5.2) ast (2.4.2) + async (2.11.0) + console (~> 1.25, >= 1.25.2) + fiber-annotation + io-event (~> 1.5, >= 1.5.1) + timers (~> 4.1) + async-io (1.43.2) + async audited (5.6.0) activerecord (>= 5.2, < 7.2) activesupport (>= 5.2, < 7.2) @@ -112,6 +125,10 @@ GEM coderay (1.1.3) concurrent-ruby (1.2.3) connection_pool (2.4.1) + console (1.27.0) + fiber-annotation + fiber-local (~> 1.1) + json crass (1.0.6) date (3.3.4) debug (1.9.1) @@ -144,6 +161,10 @@ GEM faraday-net_http (3.1.0) net-http ffi (1.16.3) + fiber-annotation (0.2.0) + fiber-local (1.1.0) + fiber-storage + fiber-storage (1.0.0) font-awesome-rails (4.7.0.8) railties (>= 3.2, < 8.0) fugit (1.10.1) @@ -168,13 +189,14 @@ GEM activesupport (>= 6.0.0) railties (>= 6.0.0) io-console (0.7.2) + io-event (1.6.5) irb (1.12.0) rdoc reline (>= 0.4.2) jbuilder (2.11.5) actionview (>= 5.0.0) activesupport (>= 5.0.0) - json (2.7.1) + json (2.7.2) kaminari (1.2.2) activesupport (>= 4.1.0) kaminari-actionview (= 1.2.2) @@ -213,6 +235,11 @@ GEM multi_xml (0.6.0) multipart-post (2.4.1) mutex_m (0.2.0) + neo4j-ruby-driver (4.4.5) + activesupport (>= 5.2.0) + async-io + connection_pool + zeitwerk (>= 2.1.10) nested_form (0.3.2) net-http (0.4.1) uri @@ -312,6 +339,7 @@ GEM zeitwerk (~> 2.6) rainbow (3.1.1) rake (13.1.0) + rbtree (0.4.6) rdoc (6.6.2) psych (>= 4.0.0) redis (5.1.0) @@ -377,6 +405,12 @@ GEM language_server-protocol (~> 3.17.0) prism (>= 0.22.0, < 0.25) sorbet-runtime (>= 0.5.10782) + ruby-lsp-rails (0.3.3) + actionpack (>= 6.0) + activerecord (>= 6.0) + railties (>= 6.0) + ruby-lsp (>= 0.14.2, < 0.15.0) + sorbet-runtime (>= 0.5.9897) ruby-openai (7.1.0) event_stream_parser (>= 0.3.0, < 2.0.0) faraday (>= 1) @@ -408,6 +442,7 @@ GEM sentry-sidekiq (5.18.1) sentry-ruby (~> 5.18.1) sidekiq (>= 3.0) + set (1.1.0) sidekiq (7.2.2) concurrent-ruby (< 2) connection_pool (>= 2.3.0) @@ -418,6 +453,9 @@ GEM globalid (>= 1.0.1) sidekiq (>= 6) sorbet-runtime (0.5.11294) + sorted_set (1.0.3) + rbtree + set (~> 1.0) sprockets (4.2.1) concurrent-ruby (~> 1.0) rack (>= 2.2.4, < 4) @@ -444,6 +482,7 @@ GEM tilt (2.3.0) timecop (0.9.10) timeout (0.4.1) + timers (4.3.5) ttfunk (1.8.0) bigdecimal (~> 3.1) turbo-rails (2.0.5) @@ -480,7 +519,9 @@ PLATFORMS DEPENDENCIES aasm (~> 5.5) + activegraph (= 11.5.0.beta.2) activestorage (~> 7.1) + async (= 2.11.0) baran (~> 0.1.12) bootsnap capybara @@ -495,6 +536,7 @@ DEPENDENCIES importmap-rails jbuilder motor-admin (~> 0.4.26) + neo4j-ruby-driver (~> 4.4) pagy (~> 8.0) pdf-reader (~> 2.12) pg @@ -509,6 +551,7 @@ DEPENDENCIES rubocop-rails (~> 2.24) rubocop-rspec (~> 2.28) ruby-lsp (~> 0.14.6) + ruby-lsp-rails (~> 0.3.3) ruby-openai (~> 7.1) sassc-rails selenium-webdriver diff --git a/app/controllers/collections_controller.rb b/app/controllers/collections_controller.rb index 0675b2d6..8f57be8c 100644 --- a/app/controllers/collections_controller.rb +++ b/app/controllers/collections_controller.rb @@ -53,6 +53,7 @@ def destroy @collection.destroy! DestroyChromaCollectionJob.perform_async(@collection.slug) + CleanGraphJob.perform_async(@collection.id) respond_to do |format| format.html { redirect_to collections_url } diff --git a/app/services/graph/clean_graph.rb b/app/services/graph/clean_graph.rb new file mode 100644 index 00000000..91f1133a --- /dev/null +++ b/app/services/graph/clean_graph.rb @@ -0,0 +1,13 @@ +module Graph + class CleanGraph + def initialize(collection_id = nil) + @collection_id = collection_id + end + + def execute + Nodes::Entity.find_each do |entity| + entity.destroy if entity.collection == @collection_id + end + end + end +end diff --git a/app/services/graph/graph_collection_entities.rb b/app/services/graph/graph_collection_entities.rb index a00288ac..94d28ca9 100644 --- a/app/services/graph/graph_collection_entities.rb +++ b/app/services/graph/graph_collection_entities.rb @@ -16,7 +16,7 @@ def graph_node(entity) node = node(entity) entity.relationships_from.each do |relationship| - other_node = node(relationship.to) + other_node = node(relationship.to_entity) relationship(node, other_node, relationship) end end diff --git a/app/sidekiq/clean_graph_job.rb b/app/sidekiq/clean_graph_job.rb new file mode 100644 index 00000000..fc9761a5 --- /dev/null +++ b/app/sidekiq/clean_graph_job.rb @@ -0,0 +1,10 @@ +class CleanGraphJob + include Sidekiq::Job + + def perform(*args) + collection_id = args.first + + # need to pass the ID here, since the Collection may be gone from the database + Graph::CleanGraph.new(collection_id).clean! + end +end diff --git a/config/application.rb b/config/application.rb index 0c60f3f6..c400221e 100644 --- a/config/application.rb +++ b/config/application.rb @@ -39,5 +39,10 @@ class Application < Rails::Application config.run_sidekiq = ENV.fetch("RUN_SIDEKIQ", "false") == "true" Sidekiq.logger.class.include ActiveSupport::LoggerSilence + + require 'active_graph/railtie' + config.neo4j.driver.url = ENV.fetch('NEO4J_URL') { 'neo4j://localhost:7687' } + config.neo4j.driver.username = ENV.fetch('NEO4J_USERNAME') { 'neo4j' } + config.neo4j.driver.password = ENV.fetch('NEO4J_PASSWORD') { 'password' } end end diff --git a/config/neo4j.yml b/config/neo4j.yml new file mode 100644 index 00000000..76e23b94 --- /dev/null +++ b/config/neo4j.yml @@ -0,0 +1,17 @@ +development: + url: neo4j://localhost:7687 + username: neo4j + password: password + +test: + url: neo4j://localhost:7688 + username: neo4j + password: password + +production: + url: + - neo4j://core1:7687 + - neo4j://core2:7687 + - neo4j://core3:7687 + username: neo4j + password: password diff --git a/db/neo4j/migrate/20240723170834_force_create_nodes_entity_uuid_constraint.rb b/db/neo4j/migrate/20240723170834_force_create_nodes_entity_uuid_constraint.rb new file mode 100644 index 00000000..ffb13f2c --- /dev/null +++ b/db/neo4j/migrate/20240723170834_force_create_nodes_entity_uuid_constraint.rb @@ -0,0 +1,9 @@ +class ForceCreateNodesEntityUuidConstraint < ActiveGraph::Migrations::Base + def up + add_constraint :"Nodes::Entity", :uuid, force: true + end + + def down + drop_constraint :"Nodes::Entity", :uuid + end +end diff --git a/db/neo4j/migrate/20240723171655_force_create_nodesentity_uuid_constraint.rb b/db/neo4j/migrate/20240723171655_force_create_nodesentity_uuid_constraint.rb new file mode 100644 index 00000000..b130db40 --- /dev/null +++ b/db/neo4j/migrate/20240723171655_force_create_nodesentity_uuid_constraint.rb @@ -0,0 +1,9 @@ +class ForceCreateNodesentityUuidConstraint < ActiveGraph::Migrations::Base + def up + add_constraint :"Nodes::entity", :uuid, force: true + end + + def down + drop_constraint :"Nodes::entity", :uuid + end +end diff --git a/db/neo4j/migrate/20240723174748_force_create_entity_uuid_constraint.rb b/db/neo4j/migrate/20240723174748_force_create_entity_uuid_constraint.rb new file mode 100644 index 00000000..0b7e3bec --- /dev/null +++ b/db/neo4j/migrate/20240723174748_force_create_entity_uuid_constraint.rb @@ -0,0 +1,9 @@ +class ForceCreateEntityUuidConstraint < ActiveGraph::Migrations::Base + def up + add_constraint :Entity, :uuid, force: true + end + + def down + drop_constraint :Entity, :uuid + end +end diff --git a/db/neo4j/schema.yml b/db/neo4j/schema.yml new file mode 100644 index 00000000..5942ca96 --- /dev/null +++ b/db/neo4j/schema.yml @@ -0,0 +1,26 @@ +# This file is auto-generated from the current state of the database. Instead +# of editing this file, please use the migrations feature of Node to +# incrementally modify your database, and then regenerate this schema definition. +# +# Note that this schema.yml definition is the authoritative source for your +# database schema. If you need to create the application database on another +# system, you should be using neo4j:schema:load, not running all the migrations +# from scratch. The latter is a flawed and unsustainable approach (the more migrations +# you'll amass, the slower it'll run and the greater likelihood for issues). +# +# It's strongly recommended that you check this file into your version control system. + +--- +:constraints: +- CREATE CONSTRAINT `constraint_af0384` FOR (n:`Nodes::Entity`) REQUIRE (n.`uuid`) + IS UNIQUE +- CREATE CONSTRAINT `constraint_c11d06c2` FOR (n:`Nodes::entity`) REQUIRE (n.`uuid`) + IS UNIQUE +- CREATE CONSTRAINT `constraint_db82530b` FOR (n:`Entity`) REQUIRE (n.`uuid`) IS UNIQUE +- CREATE CONSTRAINT `constraint_dbcee0a4` FOR (n:`ActiveGraph::Migrations::SchemaMigration`) + REQUIRE (n.`migration_id`) IS UNIQUE +:indexes: [] +:versions: +- '20240723170834' +- '20240723171655' +- '20240723174748' diff --git a/docs/knowledge_graph.md b/docs/knowledge_graph.md index a8d5d960..2979492c 100644 --- a/docs/knowledge_graph.md +++ b/docs/knowledge_graph.md @@ -87,3 +87,29 @@ digraph ingest_pipeline { rank=same { GraphCollectionJob GraphCollectionEntities } } ``` + +## Querying Neo4j + +To graph all nodes in the database: + +```neo4j +match (n) return (n) +``` + +To graph nodes from one Archyve Collection, called "Greek Mythology": + +```neo4j +match (n:`Nodes::Entity` {collection_name: "Greek Mythology"}) return (n) +``` + +To remove all nodes from the database: + +```neo4j +match (n) detach delete (n) +``` + +To remove nodes from just one Collection: + +```neo4j +match (n:`Nodes::Entity` {collection_name: "Greek Mythology"}) detach delete (n) +```