Skip to content

Commit

Permalink
Merge pull request #54 from B-urb/feature/automatically-create-custom…
Browse files Browse the repository at this point in the history
…fields

Feature/automatically create customfields
  • Loading branch information
B-urb authored Jun 4, 2024
2 parents 97c508b + 36ae055 commit 7af1dbc
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 19 deletions.
71 changes: 71 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,5 @@ slog-json = "2.4"
slog-scope = "4.4"
slog-stdlog = "4.1"
lazy_static = "1.4"
chrono = "0.4.38"

27 changes: 16 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,17 +45,22 @@ With these prerequisites met, you are now ready to proceed with the installation

The application requires setting environment variables for its configuration. Below is a table describing each environment variable, indicating whether it is required or optional, its default value (if any), and a brief description:

| Environment Variable | Required | Default Value | Description |
|--------------------------|---------|----------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------|
| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. |
| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. |
| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless |
| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. |
| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. |
| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. |
| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. |
| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.<br/> Should contain the custom fields in paperless that you want doclytics. |
| `LOG_LEVEL` | No | INFO | Log level |
| Environment Variable | Required | Default Value | Description |
|--------------------------|---------|----------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `PAPERLESS_TOKEN` | Yes | None | The authentication token for accessing the Paperless API. |
| `PAPERLESS_BASE_URL` | Yes | None | The base URL for the Paperless API. |
| `PAPERLESS_FILTER` | NO | "NOT tagged=true" | Filter string that filters the documents to be fetched from paperless |
| `OLLAMA_HOST` | No | "localhost" | The hostname where the Ollama service is running. |
| `OLLAMA_PORT` | No | "11434" | The port on which the Ollama service is accessible. |
| `OLLAMA_SECURE_ENDPOINT` | No | "false" | Whether to use HTTPS (`true`) or HTTP (`false`) for Ollama. |
| `OLLAMA_MODEL` | No | "llama2:13b" | The specific Ollama model to be used for processing. |
| `BASE_PROMPT` | No | see [Example Prompt](example/example.prompt) | Prompt given to the model, for requesting metadata.<br/> Should contain the custom fields in paperless that you want doclytics. |
| `LOG_LEVEL` | No | INFO | Log level |
| `MODE` | No | 0 | :warning: **Experimental**: Mode of operation. <br/> 0 = NoCreate (Doclytics does not create custom fields automatically in Paperless), 1 = Create (Doclytics automatically creates custom fields that do not exist in Paperless). All fields will be created as type "Text" at the moment. In stable support, the type will be inferred. |






Make sure to set the required environment variables (`PAPERLESS_TOKEN` and `PAPERLESS_BASE_URL`) before running the application. Optional variables have default values and will use those defaults if not explicitly set.
Expand Down
20 changes: 19 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ struct Field {
data_type: String,
}

#[derive(Clone, Copy)]
enum Mode {
Create,
NoCreate,
}
impl Mode {
fn from_int(value: i32) -> Self {
match value {
1 => Mode::Create,
0 => Mode::NoCreate,
_ => Mode::NoCreate,
}
}
}


// Initialize the HTTP client with Paperless API token and base URL
fn init_paperless_client(token: &str) -> Client {
Expand Down Expand Up @@ -98,6 +113,9 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u
explanation, no introtext, the answer should start and end with curly brackets \
delimiting the json object ".to_string()
);
let mode_env = env::var("MODE").unwrap_or_else(|_| "0".to_string());
let mode_int = mode_env.parse::<i32>().unwrap_or(0);
let mode = Mode::from_int(mode_int);
let fields = query_custom_fields(client, base_url).await?;
match get_data_from_paperless(&client, &base_url, filter).await {
Ok(data) => {
Expand All @@ -117,7 +135,7 @@ async fn process_documents(client: &Client, ollama: &Ollama, model: &str, base_u
slog_scope::debug!("Extracted JSON Object: {}", json_str);

match serde_json::from_str(&json_str) {
Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url).await?,
Ok(json) => update_document_fields(client, document.id, &fields, &json, base_url, mode).await?,
Err(e) => {
slog_scope::error!("Error parsing llm response json {}", e.to_string());
slog_scope::debug!("JSON String was: {}", &json_str);
Expand Down
85 changes: 78 additions & 7 deletions src/paperless.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ use std::fmt;
use reqwest::Client;
use serde::de::StdError;
use serde_json::Value;
use crate::{CustomField, Document, Field, Response};
use crate::{CustomField, Document, Field, Mode, Response};
use serde::{Deserialize, Serialize};

pub async fn get_data_from_paperless(
client: &Client,
Expand Down Expand Up @@ -105,6 +106,7 @@ pub async fn update_document_fields(
fields: &Vec<Field>,
metadata: &HashMap<String, Option<Value>>,
base_url: &str,
mode: Mode,
) -> Result<(), Box<dyn std::error::Error>> {
let mut custom_fields = Vec::new();

Expand All @@ -131,15 +133,32 @@ pub async fn update_document_fields(
}

if let Some(field) = fields.iter().find(|&f| f.name == *key) {
let custom_field = CustomField {
field: field.id.clone(),
value: value.as_ref().cloned(),
};
let custom_field = convert_field_to_custom_field(value, field);
custom_fields.push(custom_field);
}
else {
if matches!(mode, Mode::Create) {
slog_scope::info!("Creating field: {}", key);
let create_field = CreateField {
name: key.clone(),
data_type: "Text".to_string(),
default_value: None,
};
match create_custom_field(client, &create_field, base_url).await
{
Ok(new_field) => {
let custom_field = convert_field_to_custom_field(value, &new_field);
custom_fields.push(custom_field)
},
Err(e) => {
slog_scope::error!("Error: {} creating custom field: {}, skipping...",e, key)
}
}
}
}
}
// Check if tagged_field_id has a value and then proceed.

let mut payload = serde_json::Map::new();

payload.insert("custom_fields".to_string(), serde_json::json!(custom_fields));
Expand Down Expand Up @@ -170,4 +189,56 @@ pub async fn update_document_fields(
Err(e.into())
}
}
}
}

fn convert_field_to_custom_field(value: &Option<Value>, field: &Field) -> CustomField {
let custom_field = CustomField {
field: field.id.clone(),
value: value.as_ref().cloned(),
};
custom_field
}

#[derive(Serialize, Deserialize, Debug)]
pub struct CreateField {
name: String,
default_value: Option<String>,
data_type: String,
}

pub async fn create_custom_field(
client: &Client,
field: &CreateField,
base_url: &str,
) -> Result<Field, Box<dyn std::error::Error>> {
// Define the URL for creating a custom field
let url = format!("{}/api/custom_fields/", base_url);


// Send the request to create the custom field
let res = client.post(&url).json(&field).send().await?;
let response_result = res.error_for_status();
match response_result {
Ok(data) => {
let body = data.text().await?;
slog_scope::trace!("{}", body);
let field: Result<Response<Field>, _> = serde_json::from_str(&body);
match field {
Ok(field) => {
Ok(field.results[0].clone()) // TODO: improve
},
Err(e) => {
slog_scope::debug!("Creating field response: {}", body);
slog_scope::error!("Error parsing response from new field: {}", e);
Err(e.into())
}
}
}
Err(e) => {
slog_scope::error!("Error creating custom field: {}", e);
Err(e.into())
}
}
}


0 comments on commit 7af1dbc

Please sign in to comment.