Skip to content

Commit

Permalink
Remove sqlite model
Browse files Browse the repository at this point in the history
  • Loading branch information
rexim committed Mar 26, 2023
1 parent 80b90c3 commit cea76a0
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 225 deletions.
50 changes: 0 additions & 50 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,5 @@ edition = "2021"
[dependencies]
serde = { version = "1.0.152", features = ["derive"] }
serde_json = "1.0.91"
sqlite = "0.30.3"
sqlite3-sys = "0.14.0"
tiny_http = "0.12.0"
xml-rs = "0.8.4"
16 changes: 1 addition & 15 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -140,27 +140,13 @@ fn entry() -> Result<(), ()> {
let mut args = env::args();
let program = args.next().expect("path to program is provided");

let mut subcommand = None;
let mut use_sqlite_mode = false;

while let Some(arg) = args.next() {
match arg.as_str() {
"--sqlite" => use_sqlite_mode = true,
_ => {
subcommand = Some(arg);
break
}
}
}

let subcommand = subcommand.ok_or_else(|| {
let subcommand = args.next().ok_or_else(|| {
usage(&program);
eprintln!("ERROR: no subcommand is provided");
})?;

match subcommand.as_str() {
"serve" => {
assert!(!use_sqlite_mode);
let dir_path = args.next().ok_or_else(|| {
usage(&program);
eprintln!("ERROR: no directory is provided for {subcommand} subcommand");
Expand Down
161 changes: 3 additions & 158 deletions src/model.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,159 +5,6 @@ use std::result::Result;
use super::lexer::Lexer;
use std::time::SystemTime;

pub trait Model {
fn search_query(&self, query: &[char]) -> Result<Vec<(PathBuf, f32)>, ()>;
fn requires_reindexing(&mut self, path: &Path, last_modified: SystemTime) -> Result<bool, ()>;
fn add_document(&mut self, path: PathBuf, last_modified: SystemTime, content: &[char]) -> Result<(), ()>;
}

pub struct SqliteModel {
connection: sqlite::Connection,
}

impl SqliteModel {
fn execute(&self, statement: &str) -> Result<(), ()> {
self.connection.execute(statement).map_err(|err| {
eprintln!("ERROR: could not execute query {statement}: {err}");
})?;
Ok(())
}

pub fn begin(&self) -> Result<(), ()> {
self.execute("BEGIN;")
}

pub fn commit(&self) -> Result<(), ()> {
self.execute("COMMIT;")
}

pub fn open(path: &Path) -> Result<Self, ()> {
let connection = sqlite::open(path).map_err(|err| {
eprintln!("ERROR: could not open sqlite database {path}: {err}", path = path.display());
})?;
let this = Self {connection};

this.execute("
CREATE TABLE IF NOT EXISTS Documents (
id INTEGER NOT NULL PRIMARY KEY,
path TEXT,
term_count INTEGER,
UNIQUE(path)
);
")?;

this.execute("
CREATE TABLE IF NOT EXISTS TermFreq (
term TEXT,
doc_id INTEGER,
freq INTEGER,
UNIQUE(term, doc_id),
FOREIGN KEY(doc_id) REFERENCES Documents(id)
);
")?;

this.execute("
CREATE TABLE IF NOT EXISTS DocFreq (
term TEXT,
freq INTEGER,
UNIQUE(term)
);
")?;

Ok(this)
}
}

impl Model for SqliteModel {
fn search_query(&self, _query: &[char]) -> Result<Vec<(PathBuf, f32)>, ()> {
todo!()
}

fn requires_reindexing(&mut self, _path: &Path, _last_modified: SystemTime) -> Result<bool, ()> {
Ok(true)
}

fn add_document(&mut self, path: PathBuf, _last_modified: SystemTime, content: &[char]) -> Result<(), ()> {
let terms = Lexer::new(content).collect::<Vec<_>>();

let doc_id = {
let query = "INSERT INTO Documents (path, term_count) VALUES (:path, :count)";
let log_err = |err| {
eprintln!("ERROR: Could not execute query {query}: {err}");
};
let mut stmt = self.connection.prepare(query).map_err(log_err)?;
// TODO: using path.display() is probably bad in here
// Find a better way to represent the path in the database
stmt.bind_iter::<_, (_, sqlite::Value)>([
(":path", path.display().to_string().as_str().into()),
(":count", (terms.len() as i64).into()),
]).map_err(log_err)?;
stmt.next().map_err(log_err)?;
unsafe {
sqlite3_sys::sqlite3_last_insert_rowid(self.connection.as_raw())
}
};

let mut tf = TermFreq::new();
for term in Lexer::new(content) {
if let Some(freq) = tf.get_mut(&term) {
*freq += 1;
} else {
tf.insert(term, 1);
}
}

for (term, freq) in &tf {
// TermFreq
{
let query = "INSERT INTO TermFreq(doc_id, term, freq) VALUES (:doc_id, :term, :freq)";
let log_err = |err| {
eprintln!("ERROR: Could not execute query {query}: {err}");
};
let mut stmt = self.connection.prepare(query).map_err(log_err)?;
stmt.bind_iter::<_, (_, sqlite::Value)>([
(":doc_id", doc_id.into()),
(":term", term.as_str().into()),
(":freq", (*freq as i64).into()),
]).map_err(log_err)?;
stmt.next().map_err(log_err)?;
}

// DocFreq
{
let freq = {
let query = "SELECT freq FROM DocFreq WHERE term = :term";
let log_err = |err| {
eprintln!("ERROR: Could not execute query {query}: {err}");
};
let mut stmt = self.connection.prepare(query).map_err(log_err)?;
stmt.bind_iter::<_, (_, sqlite::Value)>([
(":term", term.as_str().into()),
]).map_err(log_err)?;
match stmt.next().map_err(log_err)? {
sqlite::State::Row => stmt.read::<i64, _>("freq").map_err(log_err)?,
sqlite::State::Done => 0
}
};

// TODO: find a better way to auto increment the frequency
let query = "INSERT OR REPLACE INTO DocFreq(term, freq) VALUES (:term, :freq)";
let log_err = |err| {
eprintln!("ERROR: Could not execute query {query}: {err}");
};
let mut stmt = self.connection.prepare(query).map_err(log_err)?;
stmt.bind_iter::<_, (_, sqlite::Value)>([
(":term", term.as_str().into()),
(":freq", (freq + 1).into()),
]).map_err(log_err)?;
stmt.next().map_err(log_err)?;
}
}

Ok(())
}
}

type DocFreq = HashMap<String, usize>;
type TermFreq = HashMap<String, usize>;
#[derive(Deserialize, Serialize)]
Expand Down Expand Up @@ -185,17 +32,15 @@ impl InMemoryModel {
}
}
}
}

impl Model for InMemoryModel {
fn requires_reindexing(&mut self, file_path: &Path, last_modified: SystemTime) -> Result<bool, ()> {
pub fn requires_reindexing(&mut self, file_path: &Path, last_modified: SystemTime) -> Result<bool, ()> {
if let Some(doc) = self.docs.get(file_path) {
return Ok(doc.last_modified < last_modified);
}
return Ok(true);
}

fn search_query(&self, query: &[char]) -> Result<Vec<(PathBuf, f32)>, ()> {
pub fn search_query(&self, query: &[char]) -> Result<Vec<(PathBuf, f32)>, ()> {
let mut result = Vec::new();
let tokens = Lexer::new(&query).collect::<Vec<_>>();
for (path, doc) in &self.docs {
Expand All @@ -210,7 +55,7 @@ impl Model for InMemoryModel {
Ok(result)
}

fn add_document(&mut self, file_path: PathBuf, last_modified: SystemTime, content: &[char]) -> Result<(), ()> {
pub fn add_document(&mut self, file_path: PathBuf, last_modified: SystemTime, content: &[char]) -> Result<(), ()> {
self.remove_document(&file_path);

let mut tf = TermFreq::new();
Expand Down

0 comments on commit cea76a0

Please sign in to comment.