web: integrate new search engine with search box

FINNGEN · Nov 15, 2024 · d15db3b · d15db3b
2 parents cc15450 + d11b615
commit d15db3b
Show file tree

Hide file tree

Showing 10 changed files with 975 additions and 269 deletions.
diff --git a/web/lib/risteys/fg_endpoint.ex b/web/lib/risteys/fg_endpoint.ex
@@ -3,23 +3,26 @@ defmodule Risteys.FGEndpoint do
   The FGEndpoint context.
   """
   import Ecto.Query, warn: false
-  alias Risteys.Repo
-  alias Risteys.Icd10
-  alias Risteys.YearDistribution
+
   alias Risteys.AgeDistribution
-  alias Risteys.MortalityParams
-  alias Risteys.MortalityBaseline
-  alias Risteys.MortalityCounts
   alias Risteys.CoxHR
-  alias Risteys.Genomics
   alias Risteys.DrugStats
-  alias Risteys.FGEndpoint.GeneticCorrelation
+  alias Risteys.FGEndpoint
+  alias Risteys.FGEndpoint.CaseOverlapsFR
   alias Risteys.FGEndpoint.Correlation
   alias Risteys.FGEndpoint.Definition
   alias Risteys.FGEndpoint.DefinitionICD10
   alias Risteys.FGEndpoint.ExplainerStep
+  alias Risteys.FGEndpoint.GeneticCorrelation
   alias Risteys.FGEndpoint.StatsCumulativeIncidence
-  alias Risteys.FGEndpoint.CaseOverlapsFR
+  alias Risteys.Genomics
+  alias Risteys.Icd10
+  alias Risteys.Icd9
+  alias Risteys.MortalityBaseline
+  alias Risteys.MortalityCounts
+  alias Risteys.MortalityParams
+  alias Risteys.Repo
+  alias Risteys.YearDistribution
 
   # -- Endpoint --
   def list_endpoint_names() do
@@ -168,6 +171,102 @@ defmodule Risteys.FGEndpoint do
     Repo.all(query)
   end
 
+  def find_matching_records(user_keywords) do
+    searchable_attributes = [
+      :endpoint_name,
+      :endpoint_longname,
+      :icd10_code,
+      :icd10_description,
+      :icd9_code,
+      :icd9_description
+    ]
+
+    init_query =
+      from endpoint in Definition,
+        left_join: key_figures in Risteys.KeyFigures,
+        on: endpoint.id == key_figures.fg_endpoint_id,
+        left_join: assoc_icd10 in FGEndpoint.DefinitionICD10,
+        on: endpoint.id == assoc_icd10.fg_endpoint_id,
+        left_join: icd10 in Icd10,
+        on: assoc_icd10.icd10_id == icd10.id,
+        left_join: assoc_icd9 in FGEndpoint.DefinitionICD9,
+        on: endpoint.id == assoc_icd9.fg_endpoint_id,
+        left_join: icd9 in Icd9,
+        on: assoc_icd9.icd9_id == icd9.id,
+        where: key_figures.dataset == "FG",
+        select: %{
+          endpoint_dbid: endpoint.id,
+          endpoint_name: endpoint.name,
+          endpoint_longname: endpoint.longname,
+          icd10_code: icd10.code,
+          icd10_description: icd10.description,
+          icd9_code: icd9.code,
+          icd9_description: icd9.description,
+          stats_n_gws_hits: endpoint.gws_hits,
+          stats_n_cases: key_figures.nindivs_all
+        }
+
+    filters =
+      for field_name <- searchable_attributes, keyword <- user_keywords do
+        {field_name, "%" <> keyword <> "%"}
+      end
+
+    running_query =
+      Enum.reduce(
+        filters,
+        subquery(init_query),
+        fn {field_name, where_value}, query ->
+          from qq in query, or_where: field(qq, ^field_name) |> ilike(^where_value)
+        end
+      )
+
+    Repo.all(
+      from subq in running_query,
+        group_by: [
+          subq.endpoint_dbid,
+          subq.endpoint_name,
+          subq.endpoint_longname,
+          subq.stats_n_gws_hits,
+          subq.stats_n_cases
+        ],
+        select: %{
+          dbid: subq.endpoint_dbid,
+          name: subq.endpoint_name,
+          longname: subq.endpoint_longname,
+          n_gws_hits: subq.stats_n_gws_hits,
+          n_cases: subq.stats_n_cases,
+          icd10_codes: fragment("array_remove(array_agg(DISTINCT ?), NULL)", subq.icd10_code),
+          icd10_descriptions:
+            fragment(
+              "lower(array_to_string(array_remove(array_agg(?), NULL), ' '))",
+              subq.icd10_description
+            ),
+          icd9_codes: fragment("array_remove(array_agg(DISTINCT ?), NULL)", subq.icd9_code),
+          icd9_descriptions:
+            fragment(
+              "lower(array_to_string(array_remove(array_agg(?), NULL), ' '))",
+              subq.icd9_description
+            )
+        }
+    )
+
+    # NOTE(Vincent 2024-11-06)  At some point I wanted to do as much processing as possible by the
+    # database, but the code became quite complex and the output data structure was kind of messy
+    # to work with.
+    # Here is the code that attempted to have dynamic fields indicating matches:
+    #
+    #    query =
+    #      Enum.reduce(filters, from(_ in query, select: %{}), fn {attr, lookup}, query ->
+    #        key = Atom.to_string(attr)
+    #
+    #        from qq in query, select_merge: %{{^key, ^lookup} => field(qq, ^attr) |> ilike(^lookup)}
+    #      end)
+    #
+    #    return_attributes = [:name, :longname, :gws_hits]
+    #
+    #    query = from qq in query, select_merge: map(qq, ^return_attributes)
+  end
+
   # -- Endpoint Explainer --
   def get_explainer_steps(endpoint) do
     steps = [

diff --git a/web/lib/risteys/key_figures.ex b/web/lib/risteys/key_figures.ex
@@ -40,13 +40,31 @@ defmodule Risteys.KeyFigures do
     |> validate_exclusion(:nindivs_female, 1..4)
     |> validate_number(:nindivs_male, greater_than_or_equal_to: 0)
     |> validate_exclusion(:nindivs_male, 1..4)
-    |> validate_number(:median_age_all, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 120.0)
-    |> validate_number(:median_age_female, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 120.0)
-    |> validate_number(:median_age_male, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 120.0)
-    |> validate_number(:prevalence_all, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 100.0)
-    |> validate_number(:prevalence_female, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 100.0)
-    |> validate_number(:prevalence_male, greater_than_or_equal_to: 0.0, less_than_or_equal_to: 100.0)
+    |> validate_number(:median_age_all,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 120.0
+    )
+    |> validate_number(:median_age_female,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 120.0
+    )
+    |> validate_number(:median_age_male,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 120.0
+    )
+    |> validate_number(:prevalence_all,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 100.0
+    )
+    |> validate_number(:prevalence_female,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 100.0
+    )
+    |> validate_number(:prevalence_male,
+      greater_than_or_equal_to: 0.0,
+      less_than_or_equal_to: 100.0
+    )
     |> validate_inclusion(:dataset, ["FG", "FR", "FR_index"])
-    |> unique_constraint(:fg_endpoint_id, name: :key_figures_fg_endpoint_id_dataset)
+    |> unique_constraint([:fg_endpoint_id, :dataset], name: :key_figures_fg_endpoint_id_dataset)
   end
 end
diff --git a/web/lib/risteys/lab_test_stats.ex b/web/lib/risteys/lab_test_stats.ex
@@ -1816,4 +1816,74 @@ defmodule Risteys.LabTestStats do
     end)
     |> Enum.reject(&is_nil/1)
   end
+
+  def find_matching_records(user_keywords) do
+    searchable_attributes = [
+      {:omop, :concept_id},
+      {:qc_table, :test_name},
+      {:omop, :concept_name}
+    ]
+
+    init_query =
+      from omop_concept in OMOP.Concept,
+        as: :omop,
+        left_join: qc_table in QCTable,
+        as: :qc_table,
+        on: omop_concept.id == qc_table.omop_concept_dbid,
+        left_join: npeople in NPeople,
+        as: :npeople,
+        on: omop_concept.id == npeople.omop_concept_dbid,
+        left_join: people_with_two_plus_records in PeopleWithTwoPlusRecords,
+        as: :people_with_two_plus_records,
+        on: omop_concept.id == people_with_two_plus_records.omop_concept_dbid
+
+    filters =
+      for {as_table, field_name} <- searchable_attributes, keyword <- user_keywords do
+        {{as_table, field_name}, "%" <> keyword <> "%"}
+      end
+
+    running_query =
+      Enum.reduce(filters, init_query, fn {{as_table, field_name}, where_value}, query ->
+        from qq in query, or_where: field(as(^as_table), ^field_name) |> ilike(^where_value)
+      end)
+
+    running_query =
+      from qq in running_query,
+        select: %{
+          omop_concept_dbid: as(:omop).id,
+          omop_concept_id: as(:omop).concept_id,
+          omop_concept_name: as(:omop).concept_name,
+          omop_concept_npeople:
+            coalesce(as(:npeople).female_count, 0) + coalesce(as(:npeople).male_count, 0),
+          omop_concept_percent_people_with_two_plus_records:
+            as(:people_with_two_plus_records).percent_people,
+          test_name: as(:qc_table).test_name,
+          test_npeople: as(:qc_table).npeople
+        }
+
+    Repo.all(
+      from subq in subquery(running_query),
+        group_by: [
+          subq.omop_concept_dbid,
+          subq.omop_concept_id,
+          subq.omop_concept_name,
+          subq.omop_concept_npeople,
+          subq.omop_concept_percent_people_with_two_plus_records
+        ],
+        select: %{
+          omop_concept_dbid: subq.omop_concept_dbid,
+          omop_concept_id: subq.omop_concept_id,
+          omop_concept_name: subq.omop_concept_name,
+          omop_concept_npeople: subq.omop_concept_npeople,
+          omop_concept_percent_people_with_two_plus_records:
+            subq.omop_concept_percent_people_with_two_plus_records,
+          list_test_names:
+            fragment(
+              "array_to_string(array_agg(? ORDER BY ? DESC), ' ')",
+              subq.test_name,
+              subq.test_npeople
+            )
+        }
+    )
+  end
 end