sullivan-sean · AronVanAmmers · Mar 21, 2023 · Mar 21, 2023 · Apr 4, 2023 · Apr 4, 2023
diff --git a/.gitignore b/.gitignore
@@ -39,4 +39,4 @@ yarn-error.log*
 next-env.d.ts
 
 ingested_data/
-langchain.readthedocs.io/
+docs.langchain.com/
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,2 @@
+{
+}
diff --git a/README.md b/README.md
@@ -23,7 +23,7 @@ Data ingestion happens in two steps.
 First, you should run
 
 ```bash
-sh download.sh
+yarn download
 ```
 
 This will download our data source (in this case the Langchain docs ).

diff --git a/data/args.json b/data/args.json
@@ -1 +1 @@
-{"space":"ip","numDimensions":1536}
+{"space":"cosine","numDimensions":1536}
diff --git a/data/docstore.json b/data/docstore.json
diff --git a/data/hnswlib.index b/data/hnswlib.index
diff --git a/download.sh b/download.sh
@@ -3,4 +3,4 @@
 # Error if any command fails
 set -e
 echo Downloading docs...
-wget -q -r -A.html https://langchain.readthedocs.io/en/latest/
+wget -q -r -A.html https://docs.langchain.com/docs/
diff --git a/ingest.ts b/ingest.ts
@@ -1,9 +1,9 @@
-import { HNSWLib } from "langchain/vectorstores";
-import { OpenAIEmbeddings } from "langchain/embeddings";
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
 import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
 import * as fs from "fs";
 import { Document } from "langchain/document";
-import { BaseDocumentLoader } from "langchain/document_loaders";
+import { BaseDocumentLoader } from "langchain/document_loaders/base";
 import path from "path";
 import { load } from "cheerio";
 
@@ -58,7 +58,7 @@ class ReadTheDocsLoader extends BaseDocumentLoader {
   }
 }
 
-const directoryPath = "langchain.readthedocs.io";
+const directoryPath = "docs.langchain.com";
 const loader = new ReadTheDocsLoader(directoryPath);
 
 export const run = async () => {

diff --git a/package.json b/package.json
@@ -8,7 +8,7 @@
     "build": "next build",
     "start": "next start",
     "lint": "next lint",
-    "download": "sh ingest/download.sh",
+    "download": "sh download.sh",
     "ingest": "tsx -r dotenv/config ingest.ts"
   },
   "dependencies": {
@@ -21,8 +21,8 @@
     "dotenv": "^16.0.3",
     "eslint": "8.34.0",
     "eslint-config-next": "13.1.6",
-    "hnswlib-node": "^1.2.0",
-    "langchain": "0.0.15",
+    "hnswlib-node": "^1.4.2",
+    "langchain": "^0.0.102",
     "next": "13.1.6",
     "openai": "^3.1.0",
     "react": "18.2.0",

diff --git a/pages/api/chat-stream.ts b/pages/api/chat-stream.ts
@@ -1,13 +1,16 @@
 // Next.js API route support: https://nextjs.org/docs/api-routes/introduction
-import type { NextApiRequest, NextApiResponse } from 'next'
+import type { NextApiRequest, NextApiResponse } from "next";
 import type { Server as HttpServer } from "http";
 import type { Server as HttpsServer } from "https";
-import { WebSocketServer } from 'ws';
+import { WebSocketServer } from "ws";
 import { HNSWLib } from "langchain/vectorstores";
-import { OpenAIEmbeddings } from 'langchain/embeddings';
-import { makeChain } from "./util";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { formatHistory, makeChain } from "./util";
 
-export default async function handler(req: NextApiRequest, res: NextApiResponse) {
+export default async function handler(
+  req: NextApiRequest,
+  res: NextApiResponse
+) {
   if ((res.socket as any).server.wss) {
     res.end();
     return;
@@ -16,52 +19,60 @@ export default async function handler(req: NextApiRequest, res: NextApiResponse)
   const server = (res.socket as any).server as HttpsServer | HttpServer;
   const wss = new WebSocketServer({ noServer: true });
   (res.socket as any).server.wss = wss;
-  
-  server.on('upgrade', (req, socket, head) => {
-    if (!req.url?.includes('/_next/webpack-hmr')) {
+
+  server.on("upgrade", (req, socket, head) => {
+    if (!req.url?.includes("/_next/webpack-hmr")) {
       wss.handleUpgrade(req, socket, head, (ws) => {
-        wss.emit('connection', ws, req);
+        wss.emit("connection", ws, req);
       });
     }
   });
 
-  wss.on('connection', (ws) => {
-    const sendResponse = ({ sender, message, type }: { sender: string, message: string, type: string }) => {
+  wss.on("connection", (ws) => {
+    const sendResponse = ({
+      sender,
+      message,
+      type,
+    }: {
+      sender: string;
+      message: string;
+      type: string;
+    }) => {
       ws.send(JSON.stringify({ sender, message, type }));
     };
 
-    const onNewToken = (token: string) => {
-      sendResponse({ sender: 'bot', message: token, type: 'stream' });
-    }
+    const onNewToken = async (token: string) => {
+      sendResponse({ sender: "bot", message: token, type: "stream" });
+    };
 
-    const chainPromise = HNSWLib.load("data", new OpenAIEmbeddings()).then((vs) => makeChain(vs, onNewToken));
+    const chainPromise = HNSWLib.load("data", new OpenAIEmbeddings()).then(
+      (vs) => makeChain(vs, onNewToken)
+    );
     const chatHistory: [string, string][] = [];
-    const encoder = new TextEncoder();
-
 
-    ws.on('message', async (data) => {
+    ws.on("message", async (data) => {
       try {
         const question = data.toString();
-        sendResponse({ sender: 'you', message: question, type: 'stream' });
+        sendResponse({ sender: "you", message: question, type: "stream" });
 
-        sendResponse({ sender: 'bot', message: "", type: 'start' });
+        sendResponse({ sender: "bot", message: "", type: "start" });
         const chain = await chainPromise;
 
         const result = await chain.call({
-            question,
-            chat_history: chatHistory,
+          question,
+          chat_history: formatHistory(chatHistory),
         });
         chatHistory.push([question, result.answer]);
 
-        sendResponse({ sender: 'bot', message: "", type: 'end' });
+        sendResponse({ sender: "bot", message: "", type: "end" });
       } catch (e) {
         sendResponse({
-            sender: 'bot',
-            message: "Sorry, something went wrong. Try again.",
-            type: 'error'
+          sender: "bot",
+          message: "Sorry, something went wrong. Try again.",
+          type: "error",
         });
       }
-    })
+    });
   });
 
   res.end();

diff --git a/pages/api/chat.ts b/pages/api/chat.ts
@@ -1,9 +1,9 @@
 // Next.js API route support: https://nextjs.org/docs/api-routes/introduction
 import type { NextApiRequest, NextApiResponse } from "next";
 import path from "path";
-import { HNSWLib } from "langchain/vectorstores";
-import { OpenAIEmbeddings } from "langchain/embeddings";
-import { makeChain } from "./util";
+import { HNSWLib } from "langchain/vectorstores/hnswlib";
+import { OpenAIEmbeddings } from "langchain/embeddings/openai";
+import { formatHistory, makeChain } from "./util";
 
 export default async function handler(
   req: NextApiRequest,
@@ -27,14 +27,14 @@ export default async function handler(
   };
 
   sendData(JSON.stringify({ data: "" }));
-  const chain = makeChain(vectorstore, (token: string) => {
+  const chain = makeChain(vectorstore, async (token: string) => {
     sendData(JSON.stringify({ data: token }));
   });
 
   try {
     await chain.call({
       question: body.question,
-      chat_history: body.history,
+      chat_history: formatHistory(body.history),
     });
   } catch (err) {
     console.error(err);

diff --git a/pages/api/util.ts b/pages/api/util.ts
@@ -1,17 +1,31 @@
-import { OpenAI } from "langchain/llms";
-import { LLMChain, ChatVectorDBQAChain, loadQAChain } from "langchain/chains";
+import { OpenAI } from "langchain/llms/openai";
+import { ChatOpenAI } from "langchain/chat_models/openai";
+import {
+  LLMChain,
+  ConversationalRetrievalQAChain,
+  loadQAStuffChain,
+} from "langchain/chains";
 import { HNSWLib } from "langchain/vectorstores";
-import { PromptTemplate } from "langchain/prompts";
+import {
+  ChatPromptTemplate,
+  HumanMessagePromptTemplate,
+  MessagesPlaceholder,
+  PromptTemplate,
+  SystemMessagePromptTemplate,
+} from "langchain/prompts";
+import { CallbackManager } from "langchain/callbacks";
+import { AIChatMessage, HumanChatMessage } from "langchain/schema";
 
-const CONDENSE_PROMPT = PromptTemplate.fromTemplate(`Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
-
-Chat History:
-{chat_history}
-Follow Up Input: {question}
-Standalone question:`);
+const CONDENSE_PROMPT = ChatPromptTemplate.fromPromptMessages([
+  SystemMessagePromptTemplate.fromTemplate(
+    `Given the following conversation between a user and an assistant, rephrase the last question from the user to be a standalone question.`
+  ),
+  new MessagesPlaceholder("chat_history"),
+  HumanMessagePromptTemplate.fromTemplate(`Last question: {question}`),
+]);
 
 const QA_PROMPT = PromptTemplate.fromTemplate(
-  `You are an AI assistant for the open source library LangChain. The documentation is located at https://langchain.readthedocs.io.
+  `You are an AI assistant for the open source library LangChain. The documentation is located at https://docs.langchain.com/docs/.
 You are given the following extracted parts of a long document and a question. Provide a conversational answer with a hyperlink to the documentation.
 You should only use hyperlinks that are explicitly listed as a source in the context. Do NOT make up a hyperlink that is not listed.
 If the question includes a request for code, provide a code block directly from the documentation.
@@ -21,28 +35,35 @@ Question: {question}
 =========
 {context}
 =========
-Answer in Markdown:`);
+Answer in Markdown:`
+);
 
-export const makeChain = (vectorstore: HNSWLib, onTokenStream?: (token: string) => void) => {
+export const makeChain = (
+  vectorstore: HNSWLib,
+  onTokenStream?: (token: string) => Promise<void>
+) => {
   const questionGenerator = new LLMChain({
+    // Using ChatOpenAI here gives `TypeError: message._getType is not a function`, so we use regular OpenAI.
     llm: new OpenAI({ temperature: 0 }),
     prompt: CONDENSE_PROMPT,
   });
-  const docChain = loadQAChain(
-    new OpenAI({
+  const docChain = loadQAStuffChain(
+    new ChatOpenAI({
       temperature: 0,
       streaming: Boolean(onTokenStream),
-      callbackManager: {
-        handleNewToken: onTokenStream,
-      }
+      callbackManager: CallbackManager.fromHandlers({
+        handleLLMNewToken: onTokenStream,
+      }),
     }),
-    { prompt: QA_PROMPT },
+    { prompt: QA_PROMPT }
   );
 
-  return new ChatVectorDBQAChain({
-    vectorstore,
+  return new ConversationalRetrievalQAChain({
+    retriever: vectorstore.asRetriever(),
     combineDocumentsChain: docChain,
     questionGeneratorChain: questionGenerator,
   });
-}
+};
 
+export const formatHistory = (history: [string, string][]) =>
+  history.flatMap(([q, a]) => [new HumanChatMessage(q), new AIChatMessage(a)]);
diff --git a/pages/index.tsx b/pages/index.tsx
@@ -127,7 +127,7 @@ export default function Home() {
         </div>
         <div className = {styles.navlinks}>
           <a
-            href="https://langchain.readthedocs.io/en/latest/"
+            href="https://docs.langchain.com/docs/"
             target="_blank"
             rel="noreferrer"
           >
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"space":"ip","numDimensions":1536}
		{"space":"cosine","numDimensions":1536}