Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add filter for query in ts templates #172

Merged
merged 17 commits into from
Jul 22, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/curvy-penguins-work.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"create-llama": patch
---

add filter for query in ts templates
7 changes: 7 additions & 0 deletions helpers/typescript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ export const installTSTemplate = async ({
nextConfigJson.output = "export";
nextConfigJson.images = { unoptimized: true };
console.log("\nUsing static site generation\n");

// if having backend, copy overwrite next.config.simple.mjs to next.config.mjs
await fs.copyFile(
path.join(root, "next.config.simple.mjs"),
path.join(root, "next.config.mjs"),
);
} else {
if (vectorDb === "milvus") {
nextConfigJson.experimental.serverComponentsExternalPackages =
Expand All @@ -64,6 +70,7 @@ export const installTSTemplate = async ({
);
}
}
await fs.rm(path.join(root, "next.config.simple.mjs"));
await fs.writeFile(
nextConfigJsonFile,
JSON.stringify(nextConfigJson, null, 2) + os.EOL,
Expand Down
38 changes: 36 additions & 2 deletions templates/components/engines/typescript/agent/chat.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
import { BaseToolWithCall, OpenAIAgent, QueryEngineTool } from "llamaindex";
import {
BaseToolWithCall,
MetadataFilters,
OpenAIAgent,
QueryEngineTool,
} from "llamaindex";
import fs from "node:fs/promises";
import path from "node:path";
import { getDataSource } from "./index";
Expand All @@ -14,7 +19,7 @@ export async function createChatEngine(documentIds?: string[]) {
tools.push(
new QueryEngineTool({
queryEngine: index.asQueryEngine({
preFilters: undefined, // TODO: Add filters once LITS supports it (getQueryFilters)
preFilters: generateFilters(documentIds || []),
}),
metadata: {
name: "data_query_engine",
Expand All @@ -41,3 +46,32 @@ export async function createChatEngine(documentIds?: string[]) {
systemPrompt: process.env.SYSTEM_PROMPT,
});
}

function generateFilters(documentIds: string[]): MetadataFilters | undefined {
if (!documentIds.length) {
return {
filters: [
{
key: "private",
value: "true",
operator: "!=",
},
],
};
thucpn marked this conversation as resolved.
Show resolved Hide resolved
}
return {
filters: [
{
key: "private",
value: "true",
operator: "!=",
},
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
{
key: "doc_id",
value: documentIds,
operator: "in",
},
],
condition: "or",
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export async function runPipeline(documents: Document[], filename: string) {
for (const document of documents) {
document.metadata = {
...document.metadata,
doc_id: document.id_,
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
file_name: filename,
private: "true", // to separate from other public documents
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ export function retrieveDocumentIds(annotations?: JSONValue[]): string[] {
) {
const files = data.files as DocumentFile[];
for (const file of files) {
if (Array.isArray(file.content)) {
if (Array.isArray(file.content.value)) {
// it's an array, so it's an array of doc IDs
for (const id of file.content) {
for (const id of file.content.value) {
ids.push(id);
}
}
Expand Down
12 changes: 6 additions & 6 deletions templates/components/llamaindex/typescript/streaming/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,18 @@ export function createCallbackManager(stream: StreamData) {
const callbackManager = new CallbackManager();

callbackManager.on("retrieve-end", async (data) => {
const { nodes, query } = data.detail.payload;
const { nodes, query } = data.detail;
await appendSourceData(stream, nodes);
appendEventData(stream, `Retrieving context for query: '${query}'`);
appendEventData(
stream,
`Retrieved ${nodes.length} sources to use as context for the query`,
);
LLamaCloudFileService.downloadFiles(nodes); // don't await to avoid blocking chat streaming
});

callbackManager.on("llm-tool-call", (event) => {
const { name, input } = event.detail.payload.toolCall;
const { name, input } = event.detail.toolCall;
const inputString = Object.entries(input)
.map(([key, value]) => `${key}: ${value}`)
.join(", ");
Expand All @@ -98,7 +99,7 @@ export function createCallbackManager(stream: StreamData) {
});

callbackManager.on("llm-tool-result", (event) => {
const { toolCall, toolResult } = event.detail.payload;
const { toolCall, toolResult } = event.detail;
appendToolData(stream, toolCall, toolResult);
});

Expand All @@ -118,9 +119,8 @@ async function getNodeUrl(metadata: Metadata) {
const pipelineId = metadata["pipeline_id"];
if (pipelineId && !isLocalFile) {
// file is from LlamaCloud and was not ingested locally
// TODO trigger but don't await file download and just use convention to generate the URL (see Python code)
// return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${pipelineId}\$${fileName}`;
return await LLamaCloudFileService.getFileUrl(fileName, pipelineId);
const name = LLamaCloudFileService.toDownloadedName(pipelineId, fileName);
return `${process.env.FILESERVER_URL_PREFIX}/output/llamacloud/${name}`;
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
}
const isPrivate = metadata["private"] === "true";
const folder = isPrivate ? "output/uploaded" : "data";
Expand Down
135 changes: 75 additions & 60 deletions templates/components/llamaindex/typescript/streaming/service.ts
Original file line number Diff line number Diff line change
@@ -1,86 +1,66 @@
import { Metadata, NodeWithScore } from "llamaindex";
import fs from "node:fs";
import https from "node:https";
import path from "node:path";

const LLAMA_CLOUD_OUTPUT_DIR = "output/llamacloud";
const LLAMA_CLOUD_BASE_URL = "https://cloud.llamaindex.ai/api/v1";
const FILE_DELIMITER = "$"; // delimiter between pipelineId and filename

export interface LlamaCloudFile {
interface LlamaCloudFile {
name: string;
file_id: string;
project_id: string;
}

export class LLamaCloudFileService {
static async getFiles(pipelineId: string): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
public static async downloadFiles(nodes: NodeWithScore<Metadata>[]) {
console.log("Downloading files from LlamaCloud...");
const files = this.nodesToDownloadFiles(nodes);
for (const file of files) {
await this.downloadFile(file.pipelineId, file.fileName);
}
thucpn marked this conversation as resolved.
Show resolved Hide resolved
}

static async getFileDetail(
projectId: string,
fileId: string,
): Promise<{ url: string }> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data;
public static toDownloadedName(pipelineId: string, fileName: string) {
return `${pipelineId}${FILE_DELIMITER}${fileName}`;
}

static async getFileUrl(
name: string,
pipelineId: string,
): Promise<string | null> {
try {
const files = await this.getFiles(pipelineId);
for (const file of files) {
if (file.name === name) {
const fileId = file.file_id;
const projectId = file.project_id;
const fileDetail = await this.getFileDetail(projectId, fileId);
const localFileUrl = this.downloadFile(fileDetail.url, fileId, name);
return localFileUrl;
}
private static nodesToDownloadFiles(nodes: NodeWithScore<Metadata>[]) {
const downloadFiles: Array<{
pipelineId: string;
fileName: string;
}> = [];
for (const node of nodes) {
const isLocalFile = node.node.metadata["is_local_file"] === "true";
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
if (isLocalFile) continue;
const pipelineId = node.node.metadata["pipeline_id"];
const fileName = node.node.metadata["file_name"];
if (!pipelineId || !fileName) continue;
const isDuplicate = downloadFiles.some(
(f) => f.pipelineId === pipelineId && f.fileName === fileName,
);
if (!isDuplicate) {
downloadFiles.push({ pipelineId, fileName });
}
return null;
} catch (error) {
console.error("Error fetching file from LlamaCloud:", error);
return null;
}
return downloadFiles;
}

static downloadFile(url: string, fileId: string, filename: string) {
const FILE_DELIMITER = "$"; // delimiter between fileId and filename
const downloadedFileName = `${fileId}${FILE_DELIMITER}${filename}`;
const downloadedFilePath = path.join(
LLAMA_CLOUD_OUTPUT_DIR,
downloadedFileName,
);
const urlPrefix = `${process.env.FILESERVER_URL_PREFIX}/${LLAMA_CLOUD_OUTPUT_DIR}`;
const fileUrl = `${urlPrefix}/${downloadedFileName}`;

private static async downloadFile(pipelineId: string, fileName: string) {
try {
const downloadedName = this.toDownloadedName(pipelineId, fileName);
const downloadedPath = path.join(LLAMA_CLOUD_OUTPUT_DIR, downloadedName);

// Check if file already exists
if (fs.existsSync(downloadedFilePath)) return fileUrl;
if (fs.existsSync(downloadedPath)) return;

// Create directory if it doesn't exist
if (!fs.existsSync(LLAMA_CLOUD_OUTPUT_DIR)) {
fs.mkdirSync(LLAMA_CLOUD_OUTPUT_DIR, { recursive: true });
}
const urlToDownload = await this.getFileUrlByName(pipelineId, fileName);
if (!urlToDownload) throw new Error("File not found in LlamaCloud");

const file = fs.createWriteStream(downloadedFilePath);
const file = fs.createWriteStream(downloadedPath);
https
.get(url, (response) => {
.get(urlToDownload, (response) => {
response.pipe(file);
file.on("finish", () => {
file.close(() => {
Expand All @@ -89,15 +69,50 @@ export class LLamaCloudFileService {
});
})
.on("error", (err) => {
fs.unlink(downloadedFilePath, () => {
fs.unlink(downloadedPath, () => {
console.error("Error downloading file:", err);
throw err;
});
});

return fileUrl;
} catch (error) {
throw new Error(`Error downloading file from LlamaCloud: ${error}`);
}
}

private static async getFileUrlByName(
pipelineId: string,
name: string,
): Promise<string | null> {
const files = await this.getAllFiles(pipelineId);
const file = files.find((file) => file.name === name);
if (!file) return null;
return await this.getFileUrlById(file.project_id, file.file_id);
}
thucpn marked this conversation as resolved.
Show resolved Hide resolved

private static async getFileUrlById(
projectId: string,
fileId: string,
): Promise<string> {
const url = `${LLAMA_CLOUD_BASE_URL}/files/${fileId}/content?project_id=${projectId}`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = (await response.json()) as { url: string };
return data.url;
}

private static async getAllFiles(
pipelineId: string,
): Promise<LlamaCloudFile[]> {
const url = `${LLAMA_CLOUD_BASE_URL}/pipelines/${pipelineId}/files`;
const headers = {
Accept: "application/json",
Authorization: `Bearer ${process.env.LLAMA_CLOUD_API_KEY}`,
};
const response = await fetch(url, { method: "GET", headers });
const data = await response.json();
return data;
}
}
2 changes: 1 addition & 1 deletion templates/types/streaming/express/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"dotenv": "^16.3.1",
"duck-duck-scrape": "^2.2.5",
"express": "^4.18.2",
"llamaindex": "0.4.14",
"llamaindex": "0.5.6",
"pdf2json": "3.0.5",
"ajv": "^8.12.0",
"@e2b/code-interpreter": "^0.0.5",
Expand Down
3 changes: 2 additions & 1 deletion templates/types/streaming/nextjs/next.config.mjs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import withLlamaIndex from "llamaindex/next";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
export default withLlamaIndex(nextConfig);
marcusschiesser marked this conversation as resolved.
Show resolved Hide resolved
8 changes: 8 additions & 0 deletions templates/types/streaming/nextjs/next.config.simple.mjs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
/** @type {import('next').NextConfig} */
import fs from "fs";
import webpack from "./webpack.config.mjs";

const nextConfig = JSON.parse(fs.readFileSync("./next.config.json", "utf-8"));
nextConfig.webpack = webpack;

export default nextConfig;
2 changes: 1 addition & 1 deletion templates/types/streaming/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"duck-duck-scrape": "^2.2.5",
"formdata-node": "^6.0.3",
"got": "^14.4.1",
"llamaindex": "0.4.14",
"llamaindex": "0.5.6",
"lucide-react": "^0.294.0",
"next": "^14.2.4",
"react": "^18.2.0",
Expand Down
Loading