diff --git a/Cargo.lock b/Cargo.lock index b2a17a1..a51cf0c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,21 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "arc-swap" version = "1.7.1" @@ -95,6 +110,20 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "wasm-bindgen", + "windows-targets 0.52.0", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -160,6 +189,7 @@ dependencies = [ name = "doclytics" version = "1.1.4-rc.9" dependencies = [ + "chrono", "lazy_static", "ollama-rs", "reqwest", @@ -419,6 +449,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "idna" version = "0.5.0" @@ -571,6 +624,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.16.0" @@ -1443,6 +1505,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index b7ea7b5..051397f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,4 +19,5 @@ slog-json = "2.4" slog-scope = "4.4" slog-stdlog = "4.1" lazy_static = "1.4" +chrono = "0.4.38" diff --git a/README.md b/README.md index 0a94112..47766c4 100644 --- a/README.md +++ b/README.md @@ -45,17 +45,22 @@ With these prerequisites met, you are now ready to proceed with the installation The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description: -| Environment Variable | Required | Default Value | Description | -|--------------------------|---------|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------| -| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | -| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | -| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless | -| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | -| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | -| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | -| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | -| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.
Should contain the custom fields in paperless that you want doclytics. | -| `LOG_LEVEL` | No | INFO | Log level | +| Environment Variable | Required | Default Value | Description | +|--------------------------|---------|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. | +| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. | +| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless | +| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. | +| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. | +| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. | +| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. | +| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.
Should contain the custom fields in paperless that you want doclytics. | +| `LOG_LEVEL` | No | INFO | Log level | +| `MODE` | No | 0 | :warning: **Experimental**: Mode of operation.
0 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 1 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. | + + + + Make sure to set the required environment variables (`PAPERLESS_TOKEN` and `PAPERLESS_BASE_URL`) before running the application. Optional variables have default values and will use those defaults if not explicitly set. diff --git a/src/main.rs b/src/main.rs index 3a2089f..97575a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -63,6 +63,21 @@ struct Field { data_type: String, } +#[derive(Clone, Copy)] +enum Mode { + Create, + NoCreate, +} +impl Mode { + fn from_int(value: i32) -> Self { + match value { + 1 => Mode::Create, + 0 => Mode::NoCreate, + _ => Mode::NoCreate, + } + } +} + // Initialize the HTTP client with Paperless API token and base URL fn init_paperless_client(token: &str) -> Client { @@ -98,6 +113,9 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u explanation, no introtext, the answer should start and end with curly brackets \ delimiting the json object ".to_string() ); + let mode_env = env::var("MODE").unwrap_or_else(|_| "0".to_string()); + let mode_int = mode_env.parse::().unwrap_or(0); + let mode = Mode::from_int(mode_int); let fields = query_custom_fields(client, base_url).await?; match get_data_from_paperless(&client, &base_url, filter).await { Ok(data) => { @@ -117,7 +135,7 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u slog_scope::debug!("Extracted JSON Object: {}", json_str); match serde_json::from_str(&json_str) { - Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?, + Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?, Err(e) => { slog_scope::error!("Error parsing llm response json {}", e.to_string()); slog_scope::debug!("JSON String was: {}", &json_str); diff --git a/src/paperless.rs b/src/paperless.rs index 540badb..24f265c 100644 --- a/src/paperless.rs +++ b/src/paperless.rs @@ -3,7 +3,8 @@ use std::fmt; use reqwest::Client; use serde::de::StdError; use serde_json::Value; -use crate::{CustomField, Document, Field, Response}; +use crate::{CustomField, Document, Field, Mode, Response}; +use serde::{Deserialize, Serialize}; pub async fn get_data_from_paperless( client: &Client, @@ -105,6 +106,7 @@ pub async fn update_document_fields( fields: &Vec, metadata: &HashMap>, base_url: &str, + mode: Mode, ) -> Result<(), Box> { let mut custom_fields = Vec::new(); @@ -131,15 +133,32 @@ pub async fn update_document_fields( } if let Some(field) = fields.iter().find(|&f| f.name == *key) { - let custom_field = CustomField { - field: field.id.clone(), - value: value.as_ref().cloned(), - }; + let custom_field = convert_field_to_custom_field(value, field); custom_fields.push(custom_field); } + else { + if matches!(mode, Mode::Create) { + slog_scope::info!("Creating field: {}", key); + let create_field = CreateField { + name: key.clone(), + data_type: "Text".to_string(), + default_value: None, + }; + match create_custom_field(client, &create_field, base_url).await + { + Ok(new_field) => { + let custom_field = convert_field_to_custom_field(value, &new_field); + custom_fields.push(custom_field) + }, + Err(e) => { + slog_scope::error!("Error: {} creating custom field: {}, skipping...",e, key) + } + } + } + } } // Check if tagged_field_id has a value and then proceed. - + let mut payload = serde_json::Map::new(); payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields)); @@ -170,4 +189,56 @@ pub async fn update_document_fields( Err(e.into()) } } -} \ No newline at end of file +} + +fn convert_field_to_custom_field(value: &Option, field: &Field) -> CustomField { + let custom_field = CustomField { + field: field.id.clone(), + value: value.as_ref().cloned(), + }; + custom_field +} + +#[derive(Serialize, Deserialize, Debug)] +pub struct CreateField { + name: String, + default_value: Option, + data_type: String, +} + +pub async fn create_custom_field( + client: &Client, + field: &CreateField, + base_url: &str, +) -> Result> { + // Define the URL for creating a custom field + let url = format!("{}/api/custom_fields/", base_url); + + + // Send the request to create the custom field + let res = client.post(&url).json(&field).send().await?; + let response_result = res.error_for_status(); + match response_result { + Ok(data) => { + let body = data.text().await?; + slog_scope::trace!("{}", body); + let field: Result, _> = serde_json::from_str(&body); + match field { + Ok(field) => { + Ok(field.results[0].clone()) // TODO: improve + }, + Err(e) => { + slog_scope::debug!("Creating field response: {}", body); + slog_scope::error!("Error parsing response from new field: {}", e); + Err(e.into()) + } + } + } + Err(e) => { + slog_scope::error!("Error creating custom field: {}", e); + Err(e.into()) + } + } +} + +