Skip to content

Commit ff3bd58

Browse files
Implementation of API between Docs and CSE Copilot (#52892)
Co-authored-by: Evan Bonsignori <[email protected]> Co-authored-by: Evan Bonsignori <[email protected]>
1 parent d35127d commit ff3bd58

11 files changed

+490
-6
lines changed

.env.example

+6-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,9 @@ BUILD_RECORDS_MAX_CONCURRENT=100
2020
BUILD_RECORDS_MIN_TIME=
2121

2222
# Set to true to enable the /fastly-cache-test route for debugging Fastly headers
23-
ENABLE_FASTLY_TESTING=
23+
ENABLE_FASTLY_TESTING=
24+
25+
# Needed to auth for AI search
26+
CSE_COPILOT_SECRET=
27+
CSE_COPILOT_ENDPOINT=https://cse-copilot-staging.service.iad.github.net
28+

package-lock.json

+6-5
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/frame/middleware/api.ts

+18
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { createProxyMiddleware } from 'http-proxy-middleware'
33

44
import events from '@/events/middleware.js'
55
import anchorRedirect from '@/rest/api/anchor-redirect.js'
6+
import aiSearch from '@/search/middleware/ai-search'
67
import search from '@/search/middleware/search-routes.js'
78
import pageInfo from '@/pageinfo/middleware'
89
import pageList from '@/pagelist/middleware'
@@ -23,6 +24,23 @@ router.use('/pagelist', pageList)
2324
// local laptop, they don't have an Elasticsearch. Neither a running local
2425
// server or the known credentials to a remote Elasticsearch. Whenever
2526
// that's the case, they can just HTTP proxy to the production server.
27+
if (process.env.CSE_COPILOT_ENDPOINT || process.env.NODE_ENV === 'test') {
28+
router.use('/ai-search', aiSearch)
29+
} else {
30+
console.log(
31+
'Proxying AI Search requests to docs.github.com. To use the cse-copilot endpoint, set the CSE_COPILOT_ENDPOINT environment variable.',
32+
)
33+
router.use(
34+
'/ai-search',
35+
createProxyMiddleware({
36+
target: 'https://docs.github.com',
37+
changeOrigin: true,
38+
pathRewrite: function (path, req: ExtendedRequest) {
39+
return req.originalUrl
40+
},
41+
}),
42+
)
43+
}
2644
if (process.env.ELASTICSEARCH_URL) {
2745
router.use('/search', search)
2846
} else {

src/search/lib/ai-search-proxy.ts

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import { Request, Response } from 'express'
2+
import got from 'got'
3+
import { getHmacWithEpoch } from '@/search/lib/helpers/get-cse-copilot-auth'
4+
import { getCSECopilotSource } from '#src/search/lib/helpers/cse-copilot-docs-versions.js'
5+
6+
export const aiSearchProxy = async (req: Request, res: Response) => {
7+
const { query, version, language } = req.body
8+
const errors = []
9+
10+
// Validate request body
11+
if (!query) {
12+
errors.push({ message: `Missing required key 'query' in request body` })
13+
} else if (typeof query !== 'string') {
14+
errors.push({ message: `Invalid 'query' in request body. Must be a string` })
15+
}
16+
if (!version) {
17+
errors.push({ message: `Missing required key 'version' in request body` })
18+
}
19+
if (!language) {
20+
errors.push({ message: `Missing required key 'language' in request body` })
21+
}
22+
23+
let docsSource = ''
24+
try {
25+
docsSource = getCSECopilotSource(version, language)
26+
} catch (error: any) {
27+
errors.push({ message: error?.message || 'Invalid version or language' })
28+
}
29+
30+
if (errors.length) {
31+
res.status(400).json({ errors })
32+
return
33+
}
34+
35+
const body = {
36+
chat_context: 'defaults',
37+
docs_source: docsSource,
38+
query,
39+
stream: true,
40+
}
41+
42+
try {
43+
const stream = got.post(`${process.env.CSE_COPILOT_ENDPOINT}/answers`, {
44+
json: body,
45+
headers: {
46+
Authorization: getHmacWithEpoch(),
47+
'Content-Type': 'application/json',
48+
},
49+
isStream: true,
50+
})
51+
52+
// Set response headers
53+
res.setHeader('Content-Type', 'application/x-ndjson')
54+
res.flushHeaders()
55+
56+
// Pipe the got stream directly to the response
57+
stream.pipe(res)
58+
59+
// Handle stream errors
60+
stream.on('error', (error) => {
61+
console.error('Error streaming from cse-copilot:', error)
62+
// Only send error response if headers haven't been sent
63+
if (!res.headersSent) {
64+
res.status(500).json({ errors: [{ message: 'Internal server error' }] })
65+
} else {
66+
res.end()
67+
}
68+
})
69+
70+
// Ensure response ends when stream ends
71+
stream.on('end', () => {
72+
res.end()
73+
})
74+
} catch (error) {
75+
console.error('Error posting /answers to cse-copilot:', error)
76+
res.status(500).json({ errors: [{ message: 'Internal server error' }] })
77+
}
78+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// Versions used by cse-copilot
2+
import { allVersions } from '@/versions/lib/all-versions'
3+
const CSE_COPILOT_DOCS_VERSIONS = ['dotcom', 'ghec', 'ghes']
4+
5+
// Languages supported by cse-copilot
6+
const DOCS_LANGUAGES = ['en']
7+
export function supportedCSECopilotLanguages() {
8+
return DOCS_LANGUAGES
9+
}
10+
11+
export function getCSECopilotSource(
12+
version: (typeof CSE_COPILOT_DOCS_VERSIONS)[number],
13+
language: (typeof DOCS_LANGUAGES)[number],
14+
) {
15+
const cseCopilotDocsVersion = getMiscBaseNameFromVersion(version)
16+
if (!CSE_COPILOT_DOCS_VERSIONS.includes(cseCopilotDocsVersion)) {
17+
throw new Error(
18+
`Invalid 'version' in request body: '${version}'. Must be one of: ${CSE_COPILOT_DOCS_VERSIONS.join(', ')}`,
19+
)
20+
}
21+
if (!DOCS_LANGUAGES.includes(language)) {
22+
throw new Error(
23+
`Invalid 'language' in request body '${language}'. Must be one of: ${DOCS_LANGUAGES.join(', ')}`,
24+
)
25+
}
26+
return `docs_${version}_${language}`
27+
}
28+
29+
function getMiscBaseNameFromVersion(Version: string): string {
30+
const miscBaseName =
31+
Object.values(allVersions).find(
32+
(info) =>
33+
info.shortName === Version ||
34+
info.plan === Version ||
35+
info.miscVersionName === Version ||
36+
info.currentRelease === Version,
37+
)?.miscBaseName || ''
38+
39+
if (!miscBaseName) {
40+
return ''
41+
}
42+
43+
return miscBaseName
44+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
import crypto from 'crypto'
2+
3+
// github/cse-copilot's API requires an HMAC-SHA256 signature with each request
4+
export function getHmacWithEpoch() {
5+
const epochTime = getEpochTime().toString()
6+
// CSE_COPILOT_SECRET needs to be set for the api-ai-search tests to work
7+
if (process.env.NODE_ENV === 'test') {
8+
process.env.CSE_COPILOT_SECRET = 'mock-secret'
9+
}
10+
if (!process.env.CSE_COPILOT_SECRET) {
11+
throw new Error('CSE_COPILOT_SECRET is not defined')
12+
}
13+
const hmac = generateHmacSha256(process.env.CSE_COPILOT_SECRET, epochTime)
14+
return `${epochTime}.${hmac}`
15+
}
16+
17+
// In seconds
18+
function getEpochTime(): number {
19+
return Math.floor(Date.now() / 1000)
20+
}
21+
22+
function generateHmacSha256(key: string, data: string): string {
23+
return crypto.createHmac('sha256', key).update(data).digest('hex')
24+
}

src/search/middleware/ai-search.ts

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import express, { Request, Response } from 'express'
2+
3+
import catchMiddlewareError from '#src/observability/middleware/catch-middleware-error.js'
4+
import { aiSearchProxy } from '../lib/ai-search-proxy'
5+
6+
const router = express.Router()
7+
8+
router.post(
9+
'/v1',
10+
catchMiddlewareError(async (req: Request, res: Response) => {
11+
await aiSearchProxy(req, res)
12+
}),
13+
)
14+
15+
// Redirect to most recent version
16+
router.post('/', (req, res) => {
17+
res.redirect(307, req.originalUrl.replace('/ai-search', '/ai-search/v1'))
18+
})
19+
20+
export default router

src/search/tests/api-ai-search.ts

+148
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import { expect, test, describe, beforeAll, afterAll } from 'vitest'
2+
3+
import { post } from 'src/tests/helpers/e2etest.js'
4+
import { startMockServer, stopMockServer } from '@/tests/mocks/start-mock-server'
5+
6+
describe('AI Search Routes', () => {
7+
beforeAll(() => {
8+
startMockServer()
9+
})
10+
afterAll(() => stopMockServer())
11+
12+
test('/api/ai-search/v1 should handle a successful response', async () => {
13+
let apiBody = { query: 'How do I create a Repository?', language: 'en', version: 'dotcom' }
14+
15+
const response = await fetch('http://localhost:4000/api/ai-search/v1', {
16+
method: 'POST',
17+
headers: { 'Content-Type': 'application/json' },
18+
body: JSON.stringify(apiBody),
19+
})
20+
21+
expect(response.ok).toBe(true)
22+
expect(response.headers.get('content-type')).toBe('application/x-ndjson')
23+
expect(response.headers.get('transfer-encoding')).toBe('chunked')
24+
25+
if (!response.body) {
26+
throw new Error('ReadableStream not supported in this environment.')
27+
}
28+
29+
const decoder = new TextDecoder('utf-8')
30+
const reader = response.body.getReader()
31+
let done = false
32+
const chunks = []
33+
34+
while (!done) {
35+
const { value, done: readerDone } = await reader.read()
36+
done = readerDone
37+
38+
if (value) {
39+
// Decode the Uint8Array chunk into a string
40+
const chunkStr = decoder.decode(value, { stream: true })
41+
chunks.push(chunkStr)
42+
}
43+
}
44+
45+
// Combine all chunks into a single string
46+
const fullResponse = chunks.join('')
47+
// Split the response into individual chunk lines
48+
const chunkLines = fullResponse.split('\n').filter((line) => line.trim() !== '')
49+
50+
// Assertions:
51+
52+
// 1. First chunk should be the SOURCES chunk
53+
expect(chunkLines.length).toBeGreaterThan(0)
54+
const firstChunkMatch = chunkLines[0].match(/^Chunk: (.+)$/)
55+
expect(firstChunkMatch).not.toBeNull()
56+
57+
const sourcesChunk = JSON.parse(firstChunkMatch?.[1] || '')
58+
expect(sourcesChunk).toHaveProperty('chunkType', 'SOURCES')
59+
expect(sourcesChunk).toHaveProperty('sources')
60+
expect(Array.isArray(sourcesChunk.sources)).toBe(true)
61+
expect(sourcesChunk.sources.length).toBe(3)
62+
63+
// 2. Subsequent chunks should be MESSAGE_CHUNKs
64+
for (let i = 1; i < chunkLines.length; i++) {
65+
const line = chunkLines[i]
66+
const messageChunk = JSON.parse(line)
67+
expect(messageChunk).toHaveProperty('chunkType', 'MESSAGE_CHUNK')
68+
expect(messageChunk).toHaveProperty('text')
69+
expect(typeof messageChunk.text).toBe('string')
70+
}
71+
72+
// 3. Verify the complete message is expected
73+
const expectedMessage =
74+
'Creating a repository on GitHub is something you should already know how to do :shrug:'
75+
const receivedMessage = chunkLines
76+
.slice(1)
77+
.map((line) => JSON.parse(line).text)
78+
.join('')
79+
expect(receivedMessage).toBe(expectedMessage)
80+
})
81+
82+
test('should handle validation errors: query missing', async () => {
83+
let body = { language: 'en', version: 'dotcom' }
84+
const response = await post('/api/ai-search/v1', {
85+
body: JSON.stringify(body),
86+
headers: { 'Content-Type': 'application/json' },
87+
})
88+
89+
const responseBody = JSON.parse(response.body)
90+
91+
expect(response.ok).toBe(false)
92+
expect(responseBody['errors']).toEqual([
93+
{ message: `Missing required key 'query' in request body` },
94+
])
95+
})
96+
97+
test('should handle validation errors: language missing', async () => {
98+
let body = { query: 'example query', version: 'dotcom' }
99+
const response = await post('/api/ai-search/v1', {
100+
body: JSON.stringify(body),
101+
headers: { 'Content-Type': 'application/json' },
102+
})
103+
104+
const responseBody = JSON.parse(response.body)
105+
106+
expect(response.ok).toBe(false)
107+
expect(responseBody['errors']).toEqual([
108+
{ message: `Missing required key 'language' in request body` },
109+
{ message: `Invalid 'language' in request body 'undefined'. Must be one of: en` },
110+
])
111+
})
112+
113+
test('should handle validation errors: version missing', async () => {
114+
let body = { query: 'example query', language: 'en' }
115+
const response = await post('/api/ai-search/v1', {
116+
body: JSON.stringify(body),
117+
headers: { 'Content-Type': 'application/json' },
118+
})
119+
120+
const responseBody = JSON.parse(response.body)
121+
122+
expect(response.ok).toBe(false)
123+
expect(responseBody['errors']).toEqual([
124+
{ message: `Missing required key 'version' in request body` },
125+
{
126+
message: `Invalid 'version' in request body: 'undefined'. Must be one of: dotcom, ghec, ghes`,
127+
},
128+
])
129+
})
130+
131+
test('should handle multiple validation errors: query missing, invalid language and version', async () => {
132+
let body = { language: 'fr', version: 'fpt' }
133+
const response = await post('/api/ai-search/v1', {
134+
body: JSON.stringify(body),
135+
headers: { 'Content-Type': 'application/json' },
136+
})
137+
138+
const responseBody = JSON.parse(response.body)
139+
140+
expect(response.ok).toBe(false)
141+
expect(responseBody['errors']).toEqual([
142+
{ message: `Missing required key 'query' in request body` },
143+
{
144+
message: `Invalid 'language' in request body 'fr'. Must be one of: en`,
145+
},
146+
])
147+
})
148+
})

0 commit comments

Comments
 (0)