concentrate

yisding · Mar 11, 2023 · 50404bd · 50404bd
1 parent 8033666
commit 50404bd
Show file tree

Hide file tree

Showing 2 changed files with 76 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,2 +1,22 @@
-# llmon
+# LLMON
 LLMON (pronounced limón) is a structured data format optimized for large language models
+
+# LLMONconcentrate
+
+Convert the structured input into a format that's more concise, yet still understandable by LLMs
+
+## LLMONslice
+
+LLMONslices are subtrees of a larger data tree that can be fed into a large language model. The basic idea is that each slice should be of small enough size to be consumed in a single LLM prompt (or bite).
+
+## LLMONpeel
+
+LLMONpeels are the outer layer of the structured data, and can be used to autogenerate queries against the data.
+
+## LLMONjuice
+
+LLMONjuice is a plain text description of the JSON data, which is easier for large language models to consume.
+
+## LLMONbrothers
+
+LLMONbrothers are pairs of data and its LLMONjus. The LLMONjus is used for embedding similarity matching and the original LLMON is used for question and answer to LLMs.
diff --git a/concentrate.ts b/concentrate.ts
@@ -0,0 +1,55 @@
+/**
+ * Concentrate JSON to use fewer tokens
+ *
+ * arguments:
+ *  -f <file>
+ *  -f <file> is the JSON file to concentrate
+ *  -f <file> is required
+ *
+ *  --yaml
+ *  --yaml outputs yaml
+ *
+ *  --toml
+ *  --toml outputs toml
+ */
+
+import { readFile } from "node:fs/promises";
+import TOML from "@iarna/toml";
+import YAML from "yaml";
+import yargs from "yargs";
+
+(async () => {
+  const options = yargs
+    .usage("Usage: -f <file> -o <file> --yaml --toml")
+    .option("f", {
+      alias: "file",
+      describe: "JSON file to concentrate",
+      type: "string",
+      demandOption: true,
+    })
+    .option("yaml", {
+      describe: "Output yaml",
+      type: "boolean",
+      demandOption: false,
+    })
+    .option("toml", {
+      describe: "Output toml",
+      type: "boolean",
+      demandOption: false,
+    }).argv;
+
+  const { file, output, yaml, toml } = options;
+
+  const input = await readFile(file, { encoding: "utf8" });
+  const data = JSON.parse(input);
+
+  if (yaml) {
+    console.log(YAML.stringify(data));
+    return;
+  }
+  if (toml) {
+    console.log(TOML.stringify(data));
+    return;
+  }
+  console.log(JSON.stringify(data));
+})();