https://preview.redd.it/od6suf6j7g4h1.png?width=619&format=png&auto=webp&s=d31fb903ea68f58e3a641bfd275d59eeb5cce445
Missing a button in llama-serve webchat to toggle reasoning on/off like in LM Studio?
This is a snippet that runs in https://www.tampermonkey.net/ a browser extension that injects extra functionalities in existing web pages, so you can compile llama.cp every day without bothering patching, it stays in your browser.
You need to install the extension and add this script:
// ==UserScript== // @name QWEN3.6 reasoning toggle // @namespace http://tampermonkey.net/ // @version 3.1 // @description Reasoning toggle button for llama.cp chat // @author Eaman // @match http://localhost:8080/* // @match http://127.0.0.1:8080/* // @grant none // @run-at document-start // ==/UserScript== (function() { 'use strict'; window.__reasoningEnabled = (localStorage.getItem('qwen_reasoning') !== 'false'); // ========================================== // 1. NETWORK INTERCEPT // ========================================== const originalFetch = window.fetch; window.fetch = async function(...args) { const url = args[0]; const options = args[1]; if (typeof url === 'string' && (url.includes('/v1/chat/completions') || url.includes('/chat/completions')) && options && options.body) { try { let data = JSON.parse(options.body); if (!window.__reasoningEnabled) { if (!data.chat_template_kwargs) data.chat_template_kwargs = {}; data.chat_template_kwargs.enable_thinking = false; data.reasoning_budget = 0; } else { if (!data.chat_template_kwargs) data.chat_template_kwargs = {}; data.chat_template_kwargs.enable_thinking = true; } options.body = JSON.stringify(data); } catch (e) { console.error(e); } } return originalFetch.apply(this, args); }; // ========================================== // 2. NATIVE INLINE INJECTION // ========================================== function drawToggleBtn() { if (document.getElementById('llama-native-inline-toggle')) return; const plusBtn = document.querySelector('.file-upload-button'); if (!plusBtn || !plusBtn.parentNode) return; const btn = document.createElement('button'); btn.id = "llama-native-inline-toggle"; btn.type = "button"; updateButtonUI(btn, window.__reasoningEnabled); // Layout properties btn.style.display = 'inline-flex'; btn.style.alignItems = 'center'; btn.style.justifyContent = 'center'; btn.style.shrink = '0'; // Circular pill framing matching native elements btn.style.padding = '0px 10px'; btn.style.fontSize = '10px'; btn.style.borderRadius = '9999px'; btn.style.cursor = 'pointer'; btn.style.fontWeight = 'bold'; btn.style.height = '32px'; btn.style.whiteSpace = 'nowrap'; btn.style.transition = 'all 0.15s ease'; btn.addEventListener('mousedown', (e) => { e.preventDefault(); }); btn.addEventListener('click', (e) => { e.preventDefault(); e.stopPropagation(); window.__reasoningEnabled = !window.__reasoningEnabled; localStorage.setItem('qwen_reasoning', window.__reasoningEnabled); updateButtonUI(btn, window.__reasoningEnabled); const textarea = document.querySelector('textarea'); if (textarea) textarea.focus(); }); plusBtn.parentNode.insertBefore(btn, plusBtn.nextSibling); } function updateButtonUI(element, enabled) { if (enabled) { // ON State: Pure White background, dark text (Matches Submit Arrow) element.innerText = "🧠 ON"; element.title = "Reasoning Enabled"; element.style.backgroundColor = '#ffffff'; element.style.color = '#121212'; element.style.border = '1px solid #ffffff'; } else { // OFF State: Muted Dark Gray background, white text (Matches Plus Button) element.innerText = "⚡ OFF"; element.title = "Reasoning Disabled"; element.style.backgroundColor = 'rgba(255, 255, 255, 0.15)'; element.style.color = '#ffffff'; element.style.border = '1px solid rgba(255, 255, 255, 0.05)'; } } const observer = new MutationObserver(() => { drawToggleBtn(); }); observer.observe(document.body, { childList: true, subtree: true }); })();
What does it do?
Button press changes the state of llama.cp chat_template_kwargs which is like passing the old deprecated
--chat-template-kwargs '{"enable_thinking":false}'
on launch or setting you own setting -> custom json to:
{ "chat_template_kwargs": { "enable_thinking": false }, "reasoning_budget": 0 }
Disclaimer: I only tried this with QWENS3.6, dunno if it may work with other models.
EDIT: fuck reddit filter escaping, original code here: https://store.piffa.net/lm/reasoning_toggle_button.js
submitted by
Discussion (0)
Sign in to join the discussion. Free account, 30 seconds — email code or GitHub.
Sign in →No comments yet. Sign in and be the first to say something.