{"payload":{"feedbackUrl":"https://github.com/orgs/community/discussions/53140","repo":{"id":599547518,"defaultBranch":"main","name":"vllm","ownerLogin":"vllm-project","currentUserCanPush":false,"isFork":false,"isEmpty":false,"createdAt":"2023-02-09T11:23:20.000Z","ownerAvatar":"https://avatars.githubusercontent.com/u/136984999?v=4","public":true,"private":false,"isOrgOwned":true},"refInfo":{"name":"","listCacheKey":"v0:1726773246.0","currentOid":""},"activityList":{"items":[{"before":"18ae428a0d8792d160d811a9cd5bb004d68ea8bd","after":"9e5ec35b1f8239453b1aaab28e7a02307db4ab1f","ref":"refs/heads/main","pushedAt":"2024-09-20T03:49:54.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[bugfix] [AMD] add multi-step advance_step to ROCmFlashAttentionMetadata (#8474)","shortMessageHtmlLink":"[bugfix] [AMD] add multi-step advance_step to ROCmFlashAttentionMetad…"}},{"before":"de6f90a13d7b98c4958ba107ec16cb6f95efb10f","after":"18ae428a0d8792d160d811a9cd5bb004d68ea8bd","ref":"refs/heads/main","pushedAt":"2024-09-20T00:54:02.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[Bugfix] Fix Phi3.5 mini and MoE LoRA inference (#8571)","shortMessageHtmlLink":"[Bugfix] Fix Phi3.5 mini and MoE LoRA inference (#8571)"}},{"before":"6cb748e190a94e20987314025614b8bd806602f2","after":"de6f90a13d7b98c4958ba107ec16cb6f95efb10f","ref":"refs/heads/main","pushedAt":"2024-09-19T22:36:30.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[Misc] guard against change in cuda library name (#8609)","shortMessageHtmlLink":"[Misc] guard against change in cuda library name (#8609)"}},{"before":"9e99407e3ccbb290bae77af230da38c70a52a055","after":"6cb748e190a94e20987314025614b8bd806602f2","ref":"refs/heads/main","pushedAt":"2024-09-19T20:06:32.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[CI/Build] Re-enabling Entrypoints tests on ROCm, excluding ones that fail (#8551)","shortMessageHtmlLink":"[CI/Build] Re-enabling Entrypoints tests on ROCm, excluding ones that…"}},{"before":"ea4647b7d77c4738c5ed2ab77a2c9f5ad335f6fb","after":"9e99407e3ccbb290bae77af230da38c70a52a055","ref":"refs/heads/main","pushedAt":"2024-09-19T19:16:28.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"Create SECURITY.md (#8642)","shortMessageHtmlLink":"Create SECURITY.md (#8642)"}},{"before":"cf8eb362b56a0ed1761399a6e628ce619b3f086f","after":"4696720db9fd1ce5f06454e8d7a1b0fedc2b2d73","ref":"refs/heads/security-md","pushedAt":"2024-09-19T19:16:08.000Z","pushType":"push","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"Update SECURITY.md","shortMessageHtmlLink":"Update SECURITY.md"}},{"before":"e42c634acbd1b86b5becca51e8b8108a32a438d5","after":"ea4647b7d77c4738c5ed2ab77a2c9f5ad335f6fb","ref":"refs/heads/main","pushedAt":"2024-09-19T19:15:55.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Doc] Add documentation for GGUF quantization (#8618)","shortMessageHtmlLink":"[Doc] Add documentation for GGUF quantization (#8618)"}},{"before":null,"after":"cf8eb362b56a0ed1761399a6e628ce619b3f086f","ref":"refs/heads/security-md","pushedAt":"2024-09-19T19:14:06.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"Create SECURITY.md","shortMessageHtmlLink":"Create SECURITY.md"}},{"before":"9cc373f39036af789fb1ffc1e06b23766996d3f4","after":"e42c634acbd1b86b5becca51e8b8108a32a438d5","ref":"refs/heads/main","pushedAt":"2024-09-19T18:28:25.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"comaniac","name":"Cody Yu","path":"/comaniac","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/8262694?s=80&v=4"},"commit":{"message":"[Core] simplify logits resort in _apply_top_k_top_p (#8619)","shortMessageHtmlLink":"[Core] simplify logits resort in _apply_top_k_top_p (#8619)"}},{"before":"76515f303b44cb3ffc6de63c49148d5081a77119","after":"9cc373f39036af789fb1ffc1e06b23766996d3f4","ref":"refs/heads/main","pushedAt":"2024-09-19T17:37:57.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Kernel][Amd] Add fp8 kv cache support for rocm custom paged attention (#8577)","shortMessageHtmlLink":"[Kernel][Amd] Add fp8 kv cache support for rocm custom paged attention ("}},{"before":"855c8ae2c9a4085b1ebd66d9a978fb23f47f822c","after":"76515f303b44cb3ffc6de63c49148d5081a77119","ref":"refs/heads/main","pushedAt":"2024-09-19T16:51:06.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Frontend] Use MQLLMEngine for embeddings models too (#8584)","shortMessageHtmlLink":"[Frontend] Use MQLLMEngine for embeddings models too (#8584)"}},{"before":"6a5ab372732af5b2e16973e03eac92ef41fb0faf","after":null,"ref":"refs/heads/revert-8521-disable-guided-mistral","pushedAt":"2024-09-19T07:31:29.000Z","pushType":"branch_deletion","commitsCount":0,"pusher":{"login":"youkaichao","name":"youkaichao","path":"/youkaichao","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/23236638?s=80&v=4"}},{"before":"c52ec5f03471008fa1312d82fb17d40b95a3ca5d","after":"855c8ae2c9a4085b1ebd66d9a978fb23f47f822c","ref":"refs/heads/main","pushedAt":"2024-09-19T05:33:20.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[MISC] remove engine_use_ray in benchmark_throughput.py (#8615)","shortMessageHtmlLink":"[MISC] remove engine_use_ray in benchmark_throughput.py (#8615)"}},{"before":"02c9afa2d04a85269faa2760e9af30527a61d7f6","after":"c52ec5f03471008fa1312d82fb17d40b95a3ca5d","ref":"refs/heads/main","pushedAt":"2024-09-19T05:24:24.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"[Bugfix] fixing sonnet benchmark bug in benchmark_serving.py (#8616)","shortMessageHtmlLink":"[Bugfix] fixing sonnet benchmark bug in benchmark_serving.py (#8616)"}},{"before":"3118f63385c0d767fba8b6d2039fc35440678da9","after":"02c9afa2d04a85269faa2760e9af30527a61d7f6","ref":"refs/heads/main","pushedAt":"2024-09-19T04:14:28.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"Revert \"[Misc][Bugfix] Disable guided decoding for mistral tokenizer\" (#8593)","shortMessageHtmlLink":"Revert \"[Misc][Bugfix] Disable guided decoding for mistral tokenizer\" ("}},{"before":"4c34ce8916da0e4967eadefcb7f91eb58dd7ac61","after":"3118f63385c0d767fba8b6d2039fc35440678da9","ref":"refs/heads/main","pushedAt":"2024-09-19T02:24:15.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"comaniac","name":"Cody Yu","path":"/comaniac","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/8262694?s=80&v=4"},"commit":{"message":"[Bugfix] [Encoder-Decoder] Bugfix for encoder specific metadata construction during decode of encoder-decoder models. (#8545)","shortMessageHtmlLink":"[Bugfix] [Encoder-Decoder] Bugfix for encoder specific metadata const…"}},{"before":null,"after":"6a5ab372732af5b2e16973e03eac92ef41fb0faf","ref":"refs/heads/revert-8521-disable-guided-mistral","pushedAt":"2024-09-19T02:14:03.000Z","pushType":"branch_creation","commitsCount":0,"pusher":{"login":"ywang96","name":"Roger Wang","path":"/ywang96","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/136131678?s=80&v=4"},"commit":{"message":"Revert \"[Misc][Bugfix] Disable guided decoding for mistral tokenizer (#8521)\"\n\nThis reverts commit ee2bceaaa67bd2f420f62a924da5834a7c1c862b.","shortMessageHtmlLink":"Revert \"[Misc][Bugfix] Disable guided decoding for mistral tokenizer (#…"}},{"before":"0d47bf3bf40edfe9fcfd7e5cd909388497535bc5","after":"4c34ce8916da0e4967eadefcb7f91eb58dd7ac61","ref":"refs/heads/main","pushedAt":"2024-09-19T01:42:50.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"tlrmchlsmth","name":"Tyler Michael Smith","path":"/tlrmchlsmth","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/1236979?s=80&v=4"},"commit":{"message":"[Kernel] Remove marlin moe templating on thread_m_blocks (#8573)\n\nCo-authored-by: lwilkinson@neuralmagic.com","shortMessageHtmlLink":"[Kernel] Remove marlin moe templating on thread_m_blocks (#8573)"}},{"before":"d9cd78eb718c233ebc5b84377fc2226af7ef0fa2","after":"0d47bf3bf40edfe9fcfd7e5cd909388497535bc5","ref":"refs/heads/main","pushedAt":"2024-09-18T22:10:01.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Bugfix] add `dead_error` property to engine client (#8574)\n\nSigned-off-by: Joe Runde ","shortMessageHtmlLink":"[Bugfix] add dead_error property to engine client (#8574)"}},{"before":"db9120cdedba5033037432775417df0b6117495d","after":"d9cd78eb718c233ebc5b84377fc2226af7ef0fa2","ref":"refs/heads/main","pushedAt":"2024-09-18T20:17:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[BugFix] Nonzero exit code if MQLLMEngine startup fails (#8572)","shortMessageHtmlLink":"[BugFix] Nonzero exit code if MQLLMEngine startup fails (#8572)"}},{"before":"b3195bc9e4d57b6107af2222afea26c51475e262","after":"db9120cdedba5033037432775417df0b6117495d","ref":"refs/heads/main","pushedAt":"2024-09-18T20:05:06.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Kernel] Change interface to Mamba selective_state_update for continuous batching (#8039)","shortMessageHtmlLink":"[Kernel] Change interface to Mamba selective_state_update for continu…"}},{"before":"e18749ff09c277f7cdab278895ebdd9b1041b6e8","after":"b3195bc9e4d57b6107af2222afea26c51475e262","ref":"refs/heads/main","pushedAt":"2024-09-18T17:41:08.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[AMD][ROCm]Quantization methods on ROCm; Fix _scaled_mm call (#8380)\n\nCo-authored-by: Alexei-V-Ivanov-AMD <156011006+Alexei-V-Ivanov-AMD@users.noreply.github.com>\r\nCo-authored-by: Michael Goin ","shortMessageHtmlLink":"[AMD][ROCm]Quantization methods on ROCm; Fix _scaled_mm call (#8380)"}},{"before":"d65798f78c76f03f068fc2f69a68cff430ee6b6f","after":"e18749ff09c277f7cdab278895ebdd9b1041b6e8","ref":"refs/heads/main","pushedAt":"2024-09-18T17:04:00.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"mgoin","name":"Michael Goin","path":"/mgoin","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/3195154?s=80&v=4"},"commit":{"message":"[Model] Support Solar Model (#8386)\n\nCo-authored-by: Michael Goin ","shortMessageHtmlLink":"[Model] Support Solar Model (#8386)"}},{"before":"a8c1d161a7d87dbc6c7cccfce303dcbe2e4ed6be","after":"d65798f78c76f03f068fc2f69a68cff430ee6b6f","ref":"refs/heads/main","pushedAt":"2024-09-18T16:10:27.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Core] zmq: bind only to 127.0.0.1 for local-only usage (#8543)\n\nSigned-off-by: Russell Bryant ","shortMessageHtmlLink":"[Core] zmq: bind only to 127.0.0.1 for local-only usage (#8543)"}},{"before":"7c7714d856eee6fa94aade729b67f00584f72a4c","after":"a8c1d161a7d87dbc6c7cccfce303dcbe2e4ed6be","ref":"refs/heads/main","pushedAt":"2024-09-18T15:38:43.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"comaniac","name":"Cody Yu","path":"/comaniac","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/8262694?s=80&v=4"},"commit":{"message":"[Core] *Prompt* logprobs support in Multi-step (#8199)","shortMessageHtmlLink":"[Core] *Prompt* logprobs support in Multi-step (#8199)"}},{"before":"9d104b5beb7bbb51c64b680e007f39169489ea86","after":"7c7714d856eee6fa94aade729b67f00584f72a4c","ref":"refs/heads/main","pushedAt":"2024-09-18T13:56:58.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"robertgshaw2-neuralmagic","name":"Robert Shaw","path":"/robertgshaw2-neuralmagic","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/114415538?s=80&v=4"},"commit":{"message":"[Core][Bugfix][Perf] Introduce `MQLLMEngine` to avoid `asyncio` OH (#8157)\n\nCo-authored-by: Nick Hill \r\nCo-authored-by: rshaw@neuralmagic.com \r\nCo-authored-by: Robert Shaw <114415538+robertgshaw2-neuralmagic@users.noreply.github.com>\r\nCo-authored-by: Simon Mo ","shortMessageHtmlLink":"[Core][Bugfix][Perf] Introduce MQLLMEngine to avoid asyncio OH (#…"}},{"before":"6ffa3f314c59e42238f1c5f923ff2839e0af9698","after":"9d104b5beb7bbb51c64b680e007f39169489ea86","ref":"refs/heads/main","pushedAt":"2024-09-18T11:00:56.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[CI/Build] Update Ruff version (#8469)\n\nSigned-off-by: Aaron Pham \r\nCo-authored-by: Cyrus Leung ","shortMessageHtmlLink":"[CI/Build] Update Ruff version (#8469)"}},{"before":"e351572900f7d87e14fe203ea3a49c1c7ddae0d6","after":"6ffa3f314c59e42238f1c5f923ff2839e0af9698","ref":"refs/heads/main","pushedAt":"2024-09-18T10:38:11.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[CI/Build] Avoid CUDA initialization (#8534)","shortMessageHtmlLink":"[CI/Build] Avoid CUDA initialization (#8534)"}},{"before":"95965d31b6ac2c9557816a6ffabe4a3117a5ccb2","after":"e351572900f7d87e14fe203ea3a49c1c7ddae0d6","ref":"refs/heads/main","pushedAt":"2024-09-18T09:51:59.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"simon-mo","name":"Simon Mo","path":"/simon-mo","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/21118851?s=80&v=4"},"commit":{"message":"[Misc] Add argument to disable FastAPI docs (#8554)","shortMessageHtmlLink":"[Misc] Add argument to disable FastAPI docs (#8554)"}},{"before":"8110e44529f431d54b02060528601c0d3e3f7d02","after":"95965d31b6ac2c9557816a6ffabe4a3117a5ccb2","ref":"refs/heads/main","pushedAt":"2024-09-18T02:49:53.000Z","pushType":"pr_merge","commitsCount":1,"pusher":{"login":"DarkLight1337","name":"Cyrus Leung","path":"/DarkLight1337","primaryAvatarUrl":"https://avatars.githubusercontent.com/u/44970335?s=80&v=4"},"commit":{"message":"[CI/Build] fix Dockerfile.cpu on podman (#8540)","shortMessageHtmlLink":"[CI/Build] fix Dockerfile.cpu on podman (#8540)"}}],"hasNextPage":true,"hasPreviousPage":false,"activityType":"all","actor":null,"timePeriod":"all","sort":"DESC","perPage":30,"cursor":"djE6ks8AAAAEu4eRZQA","startCursor":null,"endCursor":null}},"title":"Activity · vllm-project/vllm"}