-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
2 changed files
with
76 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,22 @@ | ||
# llmon | ||
# LLMON | ||
LLMON (pronounced limón) is a structured data format optimized for large language models | ||
|
||
# LLMONconcentrate | ||
|
||
Convert the structured input into a format that's more concise, yet still understandable by LLMs | ||
|
||
## LLMONslice | ||
|
||
LLMONslices are subtrees of a larger data tree that can be fed into a large language model. The basic idea is that each slice should be of small enough size to be consumed in a single LLM prompt (or bite). | ||
|
||
## LLMONpeel | ||
|
||
LLMONpeels are the outer layer of the structured data, and can be used to autogenerate queries against the data. | ||
|
||
## LLMONjuice | ||
|
||
LLMONjuice is a plain text description of the JSON data, which is easier for large language models to consume. | ||
|
||
## LLMONbrothers | ||
|
||
LLMONbrothers are pairs of data and its LLMONjus. The LLMONjus is used for embedding similarity matching and the original LLMON is used for question and answer to LLMs. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
/** | ||
* Concentrate JSON to use fewer tokens | ||
* | ||
* arguments: | ||
* -f <file> | ||
* -f <file> is the JSON file to concentrate | ||
* -f <file> is required | ||
* | ||
* --yaml | ||
* --yaml outputs yaml | ||
* | ||
* --toml | ||
* --toml outputs toml | ||
*/ | ||
|
||
import { readFile } from "node:fs/promises"; | ||
import TOML from "@iarna/toml"; | ||
import YAML from "yaml"; | ||
import yargs from "yargs"; | ||
|
||
(async () => { | ||
const options = yargs | ||
.usage("Usage: -f <file> -o <file> --yaml --toml") | ||
.option("f", { | ||
alias: "file", | ||
describe: "JSON file to concentrate", | ||
type: "string", | ||
demandOption: true, | ||
}) | ||
.option("yaml", { | ||
describe: "Output yaml", | ||
type: "boolean", | ||
demandOption: false, | ||
}) | ||
.option("toml", { | ||
describe: "Output toml", | ||
type: "boolean", | ||
demandOption: false, | ||
}).argv; | ||
|
||
const { file, output, yaml, toml } = options; | ||
|
||
const input = await readFile(file, { encoding: "utf8" }); | ||
const data = JSON.parse(input); | ||
|
||
if (yaml) { | ||
console.log(YAML.stringify(data)); | ||
return; | ||
} | ||
if (toml) { | ||
console.log(TOML.stringify(data)); | ||
return; | ||
} | ||
console.log(JSON.stringify(data)); | ||
})(); |