-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paththroughput_tradeoff_interactive.html
More file actions
7 lines (7 loc) · 22.2 KB
/
throughput_tradeoff_interactive.html
File metadata and controls
7 lines (7 loc) · 22.2 KB
1
2
3
4
5
6
7
<html>
<head><meta charset="utf-8" /></head>
<body>
<div> <script type="text/javascript">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>
<script charset="utf-8" src="https://cdn.plot.ly/plotly-2.35.2.min.js"></script> <div id="1c0618fc-1148-43c2-ae3b-f5d3061e50d1" class="plotly-graph-div" style="height:820px; width:1200px;"></div> <script type="text/javascript"> window.PLOTLYENV=window.PLOTLYENV || {}; if (document.getElementById("1c0618fc-1148-43c2-ae3b-f5d3061e50d1")) { Plotly.newPlot( "1c0618fc-1148-43c2-ae3b-f5d3061e50d1", [{"customdata":["\u003cb\u003eGemma 2 2B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e264.7 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 2B params","\u003cb\u003eSmolLM3 3B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e229.5 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3B params","\u003cb\u003eLlama 3.2 3B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e223.5 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3B params","\u003cb\u003ePhi-3 mini\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e190.9 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3.8B params","\u003cb\u003eGemma 3 4B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e83.9 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 4B params","\u003cb\u003ePhi-4 mini\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e188.6 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 4B params","\u003cb\u003eDS-R1 Qwen 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e105.6 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eQwen2.5 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e105.3 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eMistral 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e106.6 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eLlama 3.1 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e101.7 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eQwen3 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e98.3 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eGranite 3.3 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e92.8 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eDS-R1 Llama 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e101.8 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eGemma 2 9B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eThroughput: \u003cb\u003e79.8 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 9B params"],"hovertemplate":"%{customdata}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"vLLM","marker":{"color":"#4C9BE8"},"name":"vLLM","opacity":0.9,"text":["265","230","224","191","84","189","106","105","107","102","98","93","102","80"],"textfont":{"size":10},"textposition":"outside","x":["Gemma 2 2B","SmolLM3 3B","Llama 3.2 3B","Phi-3 mini","Gemma 3 4B","Phi-4 mini","DS-R1 Qwen 7B","Qwen2.5 7B","Mistral 7B","Llama 3.1 8B","Qwen3 8B","Granite 3.3 8B","DS-R1 Llama 8B","Gemma 2 9B"],"y":[264.7,229.5,223.5,190.9,83.9,188.6,105.6,105.3,106.6,101.7,98.3,92.8,101.8,79.8],"type":"bar","xaxis":"x","yaxis":"y"},{"customdata":["\u003cb\u003eGemma 2 2B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e258.0 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 2B params","\u003cb\u003eSmolLM3 3B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e204.6 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3B params","\u003cb\u003eLlama 3.2 3B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e226.0 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3B params","\u003cb\u003ePhi-3 mini\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e187.2 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 3.8B params","\u003cb\u003eGemma 3 4B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e149.2 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 4B params","\u003cb\u003ePhi-4 mini\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e175.9 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 4B params","\u003cb\u003eDS-R1 Qwen 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e106.1 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eQwen2.5 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e106.3 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eMistral 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e106.8 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 7B params","\u003cb\u003eLlama 3.1 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e102.1 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eQwen3 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e98.5 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eGranite 3.3 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e92.8 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eDS-R1 Llama 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e102.0 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 8B params","\u003cb\u003eGemma 2 9B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eThroughput: \u003cb\u003e77.5 tok\u002fs\u003c\u002fb\u003e\u003cbr\u003eModel size: 9B params"],"hovertemplate":"%{customdata}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"SGLang","marker":{"color":"#F97316"},"name":"SGLang","opacity":0.9,"text":["258","205","226","187","149","176","106","106","107","102","98","93","102","78"],"textfont":{"size":10},"textposition":"outside","x":["Gemma 2 2B","SmolLM3 3B","Llama 3.2 3B","Phi-3 mini","Gemma 3 4B","Phi-4 mini","DS-R1 Qwen 7B","Qwen2.5 7B","Mistral 7B","Llama 3.1 8B","Qwen3 8B","Granite 3.3 8B","DS-R1 Llama 8B","Gemma 2 9B"],"y":[258.0,204.6,226.0,187.2,149.2,175.9,106.1,106.3,106.8,102.1,98.5,92.8,102.0,77.5],"type":"bar","xaxis":"x","yaxis":"y"},{"customdata":["\u003cb\u003eGemma 2 2B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e20 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 23 ms","\u003cb\u003eSmolLM3 3B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e24 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 24 ms","\u003cb\u003eLlama 3.2 3B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e23 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 23 ms","\u003cb\u003ePhi-3 mini\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e25 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 25 ms","\u003cb\u003eGemma 3 4B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e87 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 90 ms","\u003cb\u003ePhi-4 mini\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e33 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 33 ms","\u003cb\u003eDS-R1 Qwen 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e40 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 63 ms","\u003cb\u003eQwen2.5 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e41 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 63 ms","\u003cb\u003eMistral 7B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e41 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 62 ms","\u003cb\u003eLlama 3.1 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e43 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 44 ms","\u003cb\u003eQwen3 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e44 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 45 ms","\u003cb\u003eGranite 3.3 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e46 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 47 ms","\u003cb\u003eDS-R1 Llama 8B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e42 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 43 ms","\u003cb\u003eGemma 2 9B\u003c\u002fb\u003e \u2014 vLLM\u003cbr\u003eTTFT median: \u003cb\u003e74 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 106 ms"],"hovertemplate":"%{customdata}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"vLLM","marker":{"color":"#4C9BE8"},"name":"vLLM","opacity":0.9,"showlegend":false,"text":["20","24","23","25","87","33","40","41","41","43","44","46","42","74"],"textfont":{"size":10},"textposition":"outside","x":["Gemma 2 2B","SmolLM3 3B","Llama 3.2 3B","Phi-3 mini","Gemma 3 4B","Phi-4 mini","DS-R1 Qwen 7B","Qwen2.5 7B","Mistral 7B","Llama 3.1 8B","Qwen3 8B","Granite 3.3 8B","DS-R1 Llama 8B","Gemma 2 9B"],"y":[20,24,23,25,87,33,40,41,41,43,44,46,42,74],"type":"bar","xaxis":"x2","yaxis":"y2"},{"customdata":["\u003cb\u003eGemma 2 2B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e30 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 32 ms","\u003cb\u003eSmolLM3 3B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e57 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 60 ms","\u003cb\u003eLlama 3.2 3B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e32 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 33 ms","\u003cb\u003ePhi-3 mini\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e43 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 51 ms","\u003cb\u003eGemma 3 4B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e78 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 81 ms","\u003cb\u003ePhi-4 mini\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e40 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 40 ms","\u003cb\u003eDS-R1 Qwen 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e66 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 66 ms","\u003cb\u003eQwen2.5 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e66 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 66 ms","\u003cb\u003eMistral 7B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e62 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 63 ms","\u003cb\u003eLlama 3.1 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e67 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 67 ms","\u003cb\u003eQwen3 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e72 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 72 ms","\u003cb\u003eGranite 3.3 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e76 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 77 ms","\u003cb\u003eDS-R1 Llama 8B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e69 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 69 ms","\u003cb\u003eGemma 2 9B\u003c\u002fb\u003e \u2014 SGLang\u003cbr\u003eTTFT median: \u003cb\u003e83 ms\u003c\u002fb\u003e\u003cbr\u003eTTFT P95: 83 ms"],"hovertemplate":"%{customdata}\u003cextra\u003e\u003c\u002fextra\u003e","legendgroup":"SGLang","marker":{"color":"#F97316"},"name":"SGLang","opacity":0.9,"showlegend":false,"text":["30","57","32","43","78","40","66","66","62","67","72","76","69","83"],"textfont":{"size":10},"textposition":"outside","x":["Gemma 2 2B","SmolLM3 3B","Llama 3.2 3B","Phi-3 mini","Gemma 3 4B","Phi-4 mini","DS-R1 Qwen 7B","Qwen2.5 7B","Mistral 7B","Llama 3.1 8B","Qwen3 8B","Granite 3.3 8B","DS-R1 Llama 8B","Gemma 2 9B"],"y":[30,57,32,43,78,40,66,66,62,67,72,76,69,83],"type":"bar","xaxis":"x2","yaxis":"y2"}], {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"tickfont":{"size":11},"tickangle":-35},"yaxis":{"anchor":"x","domain":[0.5700000000000001,1.0],"gridcolor":"#1e293b","zeroline":false,"title":{"text":"tok\u002fs"}},"xaxis2":{"anchor":"y2","domain":[0.0,1.0],"tickfont":{"size":11},"tickangle":-35},"yaxis2":{"anchor":"x2","domain":[0.0,0.43],"gridcolor":"#1e293b","zeroline":false,"title":{"text":"ms"}},"annotations":[{"font":{"size":16},"showarrow":false,"text":"Peak Throughput (tok\u002fs) \u2014 throughput ramp, concurrency 1\u219232","x":0.5,"xanchor":"center","xref":"paper","y":1.0,"yanchor":"bottom","yref":"paper"},{"font":{"size":16},"showarrow":false,"text":"Time to First Token \u2014 median (ms) \u2014 single request, concurrency 1","x":0.5,"xanchor":"center","xref":"paper","y":0.43,"yanchor":"bottom","yref":"paper"},{"font":{"color":"#64748b","size":10},"showarrow":false,"text":"\u003ci\u003e2\u20133B\u003c\u002fi\u003e","x":1.0,"xanchor":"center","xref":"x","y":1.06,"yref":"paper"},{"font":{"color":"#64748b","size":10},"showarrow":false,"text":"\u003ci\u003e3.8\u20134B\u003c\u002fi\u003e","x":4.0,"xanchor":"center","xref":"x","y":1.06,"yref":"paper"},{"font":{"color":"#64748b","size":10},"showarrow":false,"text":"\u003ci\u003e7B\u003c\u002fi\u003e","x":7.0,"xanchor":"center","xref":"x","y":1.06,"yref":"paper"},{"font":{"color":"#64748b","size":10},"showarrow":false,"text":"\u003ci\u003e8B\u003c\u002fi\u003e","x":10.5,"xanchor":"center","xref":"x","y":1.06,"yref":"paper"},{"font":{"color":"#64748b","size":10},"showarrow":false,"text":"\u003ci\u003e9B\u003c\u002fi\u003e","x":13.0,"xanchor":"center","xref":"x","y":1.06,"yref":"paper"}],"shapes":[{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":0.5,"x1":0.5,"xref":"x","y0":0,"y1":1,"yref":"y domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":0.5,"x1":0.5,"xref":"x2","y0":0,"y1":1,"yref":"y2 domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":2.5,"x1":2.5,"xref":"x","y0":0,"y1":1,"yref":"y domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":2.5,"x1":2.5,"xref":"x2","y0":0,"y1":1,"yref":"y2 domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":4.5,"x1":4.5,"xref":"x","y0":0,"y1":1,"yref":"y domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":4.5,"x1":4.5,"xref":"x2","y0":0,"y1":1,"yref":"y2 domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":5.5,"x1":5.5,"xref":"x","y0":0,"y1":1,"yref":"y domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":5.5,"x1":5.5,"xref":"x2","y0":0,"y1":1,"yref":"y2 domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":8.5,"x1":8.5,"xref":"x","y0":0,"y1":1,"yref":"y domain"},{"line":{"color":"#334155","dash":"dot","width":1},"type":"line","x0":8.5,"x1":8.5,"xref":"x2","y0":0,"y1":1,"yref":"y2 domain"}],"title":{"font":{"size":18},"text":"vLLM vs SGLang \u2014 Performance Across 14 Models","x":0.5},"font":{"color":"#e2e8f0","family":"Inter, system-ui, sans-serif","size":12},"legend":{"font":{"size":13},"orientation":"h","x":0.5,"y":1.04,"xanchor":"center","bgcolor":"rgba(15,23,42,0)"},"margin":{"t":100,"b":60,"l":60,"r":30},"barmode":"group","bargap":0.18,"bargroupgap":0.04,"plot_bgcolor":"#0f172a","paper_bgcolor":"#0f172a","height":820,"width":1200,"hovermode":"x unified"}, {"responsive": true} ) }; </script> </div>
</body>
</html>