Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update langchain to 0.0.102 #19

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@ yarn-error.log*
next-env.d.ts

ingested_data/
langchain.readthedocs.io/
docs.langchain.com/
2 changes: 2 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{
}
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Data ingestion happens in two steps.
First, you should run

```bash
sh download.sh
yarn download
```

This will download our data source (in this case the Langchain docs ).
Expand Down
2 changes: 1 addition & 1 deletion data/args.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"space":"ip","numDimensions":1536}
{"space":"cosine","numDimensions":1536}
2 changes: 1 addition & 1 deletion data/docstore.json

Large diffs are not rendered by default.

Binary file modified data/hnswlib.index
Binary file not shown.
2 changes: 1 addition & 1 deletion download.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
# Error if any command fails
set -e
echo Downloading docs...
wget -q -r -A.html https://langchain.readthedocs.io/en/latest/
wget -q -r -A.html https://docs.langchain.com/docs/
8 changes: 4 additions & 4 deletions ingest.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import { HNSWLib } from "langchain/vectorstores";
import { OpenAIEmbeddings } from "langchain/embeddings";
import { HNSWLib } from "langchain/vectorstores/hnswlib";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import * as fs from "fs";
import { Document } from "langchain/document";
import { BaseDocumentLoader } from "langchain/document_loaders";
import { BaseDocumentLoader } from "langchain/document_loaders/base";
import path from "path";
import { load } from "cheerio";

Expand Down Expand Up @@ -58,7 +58,7 @@ class ReadTheDocsLoader extends BaseDocumentLoader {
}
}

const directoryPath = "langchain.readthedocs.io";
const directoryPath = "docs.langchain.com";
const loader = new ReadTheDocsLoader(directoryPath);

export const run = async () => {
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"build": "next build",
"start": "next start",
"lint": "next lint",
"download": "sh ingest/download.sh",
"download": "sh download.sh",
"ingest": "tsx -r dotenv/config ingest.ts"
},
"dependencies": {
Expand All @@ -21,8 +21,8 @@
"dotenv": "^16.0.3",
"eslint": "8.34.0",
"eslint-config-next": "13.1.6",
"hnswlib-node": "^1.2.0",
"langchain": "0.0.15",
"hnswlib-node": "^1.4.2",
"langchain": "^0.0.102",
"next": "13.1.6",
"openai": "^3.1.0",
"react": "18.2.0",
Expand Down
65 changes: 38 additions & 27 deletions pages/api/chat-stream.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from 'next'
import type { NextApiRequest, NextApiResponse } from "next";
import type { Server as HttpServer } from "http";
import type { Server as HttpsServer } from "https";
import { WebSocketServer } from 'ws';
import { WebSocketServer } from "ws";
import { HNSWLib } from "langchain/vectorstores";
import { OpenAIEmbeddings } from 'langchain/embeddings';
import { makeChain } from "./util";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { formatHistory, makeChain } from "./util";

export default async function handler(req: NextApiRequest, res: NextApiResponse) {
export default async function handler(
req: NextApiRequest,
res: NextApiResponse
) {
if ((res.socket as any).server.wss) {
res.end();
return;
Expand All @@ -16,52 +19,60 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
const server = (res.socket as any).server as HttpsServer | HttpServer;
const wss = new WebSocketServer({ noServer: true });
(res.socket as any).server.wss = wss;
server.on('upgrade', (req, socket, head) => {
if (!req.url?.includes('/_next/webpack-hmr')) {

server.on("upgrade", (req, socket, head) => {
if (!req.url?.includes("/_next/webpack-hmr")) {
wss.handleUpgrade(req, socket, head, (ws) => {
wss.emit('connection', ws, req);
wss.emit("connection", ws, req);
});
}
});

wss.on('connection', (ws) => {
const sendResponse = ({ sender, message, type }: { sender: string, message: string, type: string }) => {
wss.on("connection", (ws) => {
const sendResponse = ({
sender,
message,
type,
}: {
sender: string;
message: string;
type: string;
}) => {
ws.send(JSON.stringify({ sender, message, type }));
};

const onNewToken = (token: string) => {
sendResponse({ sender: 'bot', message: token, type: 'stream' });
}
const onNewToken = async (token: string) => {
sendResponse({ sender: "bot", message: token, type: "stream" });
};

const chainPromise = HNSWLib.load("data", new OpenAIEmbeddings()).then((vs) => makeChain(vs, onNewToken));
const chainPromise = HNSWLib.load("data", new OpenAIEmbeddings()).then(
(vs) => makeChain(vs, onNewToken)
);
const chatHistory: [string, string][] = [];
const encoder = new TextEncoder();


ws.on('message', async (data) => {
ws.on("message", async (data) => {
try {
const question = data.toString();
sendResponse({ sender: 'you', message: question, type: 'stream' });
sendResponse({ sender: "you", message: question, type: "stream" });

sendResponse({ sender: 'bot', message: "", type: 'start' });
sendResponse({ sender: "bot", message: "", type: "start" });
const chain = await chainPromise;

const result = await chain.call({
question,
chat_history: chatHistory,
question,
chat_history: formatHistory(chatHistory),
});
chatHistory.push([question, result.answer]);

sendResponse({ sender: 'bot', message: "", type: 'end' });
sendResponse({ sender: "bot", message: "", type: "end" });
} catch (e) {
sendResponse({
sender: 'bot',
message: "Sorry, something went wrong. Try again.",
type: 'error'
sender: "bot",
message: "Sorry, something went wrong. Try again.",
type: "error",
});
}
})
});
});

res.end();
Expand Down
10 changes: 5 additions & 5 deletions pages/api/chat.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
// Next.js API route support: https://nextjs.org/docs/api-routes/introduction
import type { NextApiRequest, NextApiResponse } from "next";
import path from "path";
import { HNSWLib } from "langchain/vectorstores";
import { OpenAIEmbeddings } from "langchain/embeddings";
import { makeChain } from "./util";
import { HNSWLib } from "langchain/vectorstores/hnswlib";
import { OpenAIEmbeddings } from "langchain/embeddings/openai";
import { formatHistory, makeChain } from "./util";

export default async function handler(
req: NextApiRequest,
Expand All @@ -27,14 +27,14 @@ export default async function handler(
};

sendData(JSON.stringify({ data: "" }));
const chain = makeChain(vectorstore, (token: string) => {
const chain = makeChain(vectorstore, async (token: string) => {
sendData(JSON.stringify({ data: token }));
});

try {
await chain.call({
question: body.question,
chat_history: body.history,
chat_history: formatHistory(body.history),
});
} catch (err) {
console.error(err);
Expand Down
63 changes: 42 additions & 21 deletions pages/api/util.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
import { OpenAI } from "langchain/llms";
import { LLMChain, ChatVectorDBQAChain, loadQAChain } from "langchain/chains";
import { OpenAI } from "langchain/llms/openai";
import { ChatOpenAI } from "langchain/chat_models/openai";
import {
LLMChain,
ConversationalRetrievalQAChain,
loadQAStuffChain,
} from "langchain/chains";
import { HNSWLib } from "langchain/vectorstores";
import { PromptTemplate } from "langchain/prompts";
import {
ChatPromptTemplate,
HumanMessagePromptTemplate,
MessagesPlaceholder,
PromptTemplate,
SystemMessagePromptTemplate,
} from "langchain/prompts";
import { CallbackManager } from "langchain/callbacks";
import { AIChatMessage, HumanChatMessage } from "langchain/schema";

const CONDENSE_PROMPT = PromptTemplate.fromTemplate(`Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.

Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:`);
const CONDENSE_PROMPT = ChatPromptTemplate.fromPromptMessages([
SystemMessagePromptTemplate.fromTemplate(
`Given the following conversation between a user and an assistant, rephrase the last question from the user to be a standalone question.`
),
new MessagesPlaceholder("chat_history"),
HumanMessagePromptTemplate.fromTemplate(`Last question: {question}`),
]);

const QA_PROMPT = PromptTemplate.fromTemplate(
`You are an AI assistant for the open source library LangChain. The documentation is located at https://langchain.readthedocs.io.
`You are an AI assistant for the open source library LangChain. The documentation is located at https://docs.langchain.com/docs/.
You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
If the question includes a request for code, provide a code block directly from the documentation.
Expand All @@ -21,28 +35,35 @@ Question: {question}
=========
{context}
=========
Answer in Markdown:`);
Answer in Markdown:`
);

export const makeChain = (vectorstore: HNSWLib, onTokenStream?: (token: string) => void) => {
export const makeChain = (
vectorstore: HNSWLib,
onTokenStream?: (token: string) => Promise<void>
) => {
const questionGenerator = new LLMChain({
// Using ChatOpenAI here gives `TypeError: message._getType is not a function`, so we use regular OpenAI.
llm: new OpenAI({ temperature: 0 }),
prompt: CONDENSE_PROMPT,
});
const docChain = loadQAChain(
new OpenAI({
const docChain = loadQAStuffChain(
new ChatOpenAI({
temperature: 0,
streaming: Boolean(onTokenStream),
callbackManager: {
handleNewToken: onTokenStream,
}
callbackManager: CallbackManager.fromHandlers({
handleLLMNewToken: onTokenStream,
}),
}),
{ prompt: QA_PROMPT },
{ prompt: QA_PROMPT }
);

return new ChatVectorDBQAChain({
vectorstore,
return new ConversationalRetrievalQAChain({
retriever: vectorstore.asRetriever(),
combineDocumentsChain: docChain,
questionGeneratorChain: questionGenerator,
});
}
};

export const formatHistory = (history: [string, string][]) =>
history.flatMap(([q, a]) => [new HumanChatMessage(q), new AIChatMessage(a)]);
2 changes: 1 addition & 1 deletion pages/index.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ export default function Home() {
</div>
<div className = {styles.navlinks}>
<a
href="https://langchain.readthedocs.io/en/latest/"
href="https://docs.langchain.com/docs/"
target="_blank"
rel="noreferrer"
>
Expand Down
Loading