⭕ | | 67.95 | 77.86 | 70.71 | 75.5 | 83.81 | 33.39 | 66.45 | 70.55 | nvidia/Llama-3.1-Nemotron-70B-Instruct-HF |
🟢 | | 67.92 | 95.48 | 73.44 | 71.5 | 74.82 | 32.79 | 59.47 | 72.71 | Qwen/Qwen2.5-72B |
🔶 | | 66.05 | 75.95 | 81.76 | 61.53 | 71.94 | 35.37 | 69.77 | 7.25 | 618AI/dictalm2-it-qa-fine-tune |
🟢 | | 66.03 | 81.19 | 78.01 | 66.73 | 80.94 | 30.81 | 58.47 | 24.01 | mistralai/Mistral-Small-3.1-24B-Base-2503 |
🟢 | | 65.11 | 83.57 | 78.4 | 69.77 | 77.34 | 29.1 | 52.49 | 56.3 | nvidia/Nemotron-H-56B-Base-8K |
🟦 | | 64.72 | 87.38 | 75.71 | 73.87 | 77.34 | 29.49 | 44.52 | 32.76 | Qwen/QwQ-32B-Preview |
⭕ | | 64.19 | 80.71 | 72.9 | 74.4 | 76.62 | 29.65 | 50.83 | 49.9 | nvidia/Llama-3_3-Nemotron-Super-49B-v1_5 |
🟢 | | 63.89 | 79.05 | 75.69 | 59.67 | 66.91 | 35.57 | 66.45 | 7.25 | dicta-il/dictalm2.0 |
⭕ | | 63.42 | 90.24 | 67.76 | 77.27 | 76.26 | 28.11 | 40.86 | 32.76 | rombodawg/Rombos-LLM-V2.5-Qwen-32b |
⭕ | | 62.94 | 73.57 | 76.9 | 56.3 | 69.42 | 35.3 | 66.11 | 7.25 | dicta-il/dictalm2.0-instruct |
⭕ | | 62.67 | 89.05 | 75.03 | 72.07 | 70.14 | 28.52 | 41.2 | 30.53 | Qwen/Qwen3-30B-A3B-Thinking-2507 |
⭕ | | 62.61 | 90.48 | 75.02 | 71.37 | 69.42 | 27.83 | 41.53 | 30.53 | Qwen/Qwen3-30B-A3B-Instruct-2507 |
🟢 | | 62.35 | 93.33 | 75.96 | 64.63 | 68.71 | 29.26 | 42.19 | 30.53 | Qwen/Qwen3-30B-A3B-Base |
🟢 | | 62.27 | 84.76 | 76.29 | 70.2 | 73.74 | 30.12 | 38.54 | 14.77 | Qwen/Qwen3-14B-Base |
🟢 | | 62.01 | 90 | 74.39 | 71.73 | 68.35 | 28.71 | 38.87 | 14.77 | Qwen/Qwen2.5-14B |
🟦 | | 61.91 | 88.57 | 71.89 | 70.7 | 71.22 | 29.85 | 39.2 | 9.24 | UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3 |
🟦 | | 61.83 | 85.71 | 73.82 | 74 | 70.86 | 27.69 | 38.87 | 32.76 | deepseek-ai/DeepSeek-R1-Distill-Qwen-32B |
🔶 | | 61.76 | 85.95 | 73.83 | 73.7 | 70.5 | 27.71 | 38.87 | 32.76 | deepseek-ai/DeepSeek-R1-Distill-Qwen-32B |
🟢 | | 61.53 | 76.9 | 74.74 | 69.63 | 75.54 | 29.51 | 42.86 | 14.66 | microsoft/phi-4 |
⭕ | | 61.52 | 88.57 | 71.18 | 71.73 | 67.99 | 29.13 | 40.53 | 32.76 | Qwen/Qwen3-32B |
⭕ | | 60.78 | 67.38 | 73.59 | 59.97 | 65.11 | 33.51 | 65.12 | 7.25 | ronigold/dictalm2.0-instruct-fine-tuned-alpaca-gpt4-hebrew |
🔶 | | 60.6 | 73.1 | 73.48 | 63.7 | 78.42 | 28.39 | 46.51 | 0 | SicariusSicariiStuff/Impish_Nemo_12B |
⭕ | | 60.43 | 86.43 | 77.76 | 64.1 | 70.14 | 30.9 | 33.22 | 7.24 | SicariusSicariiStuff/Zion_Alpha_Instruction_Tuned |
⭕ | | 60.28 | 83.57 | 75.13 | 71.67 | 66.55 | 27.87 | 36.88 | 14.77 | Qwen/Qwen3-14B |
🔶 | | 59.72 | 79.05 | 76.31 | 67.03 | 70.86 | 27.17 | 37.87 | 8.54 | SeaLLMs/SeaLLM-7B-v2.5 |
🟢 | | 59.2 | 90 | 75.05 | 74.07 | 42.09 | 30.81 | 43.19 | 32.76 | Qwen/Qwen2.5-32B |
🟢 | | 58.8 | 75.71 | 75.59 | 70.4 | 67.27 | 27.64 | 36.21 | 8.19 | Qwen/Qwen3-8B-Base |
🔶 | | 58.6 | 55.74 | 83.31 | 49.17 | 64.75 | 33.95 | 64.67 | 7.25 | ronigold/dictalm2.0-instruct-fine-tuned |
⭕ | | 58.47 | 78.1 | 79.66 | 70.3 | 61.87 | 27.7 | 33.22 | 7.24 | SicariusSicariiStuff/Zion_Alpha_Instruction_Tuned_SLERP |
🔶 | | 58.44 | 84.05 | 72.6 | 65.67 | 65.83 | 27.93 | 34.55 | 7.24 | SicariusSicariiStuff/Zion_Alpha |
🔶 | | 57.96 | 85.48 | 73.53 | 69.2 | 63.67 | 22.99 | 32.89 | 14.77 | deepseek-ai/DeepSeek-R1-Distill-Qwen-14B |
🟢 | | 57.35 | 81.9 | 73.84 | 66.73 | 62.59 | 25.5 | 33.55 | 7.62 | Qwen/Qwen2.5-7B |
⭕ | | 56.99 | 80 | 74.69 | 68.3 | 63.31 | 25.73 | 29.9 | 8.19 | Qwen/Qwen3-8B |
🟦 | | 55.42 | 78.81 | 67.52 | 57.57 | 66.55 | 26.2 | 35.88 | 8.03 | mlabonne/NeuralDaredevil-8B-abliterated |
🔶 | | 55.28 | 64.52 | 69.88 | 66.77 | 67.63 | 26.68 | 36.21 | 8.03 | NousResearch/Hermes-3-Llama-3.1-8B |
🟢 | | 55.02 | 71.43 | 73.82 | 64.47 | 64.03 | 21.51 | 34.88 | 12.3 | nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base |
🟦 | | 54.63 | 79.29 | 65.02 | 63.73 | 62.95 | 25.56 | 31.23 | 8.03 | vicgalle/Configurable-Hermes-2-Pro-Llama-3-8B |
🟢 | | 54.27 | 67.86 | 71.61 | 66.93 | 61.15 | 26.52 | 31.56 | 32.51 | Qwen/Qwen1.5-32B |
🟢 | | 54.23 | 72.86 | 74.58 | 60.17 | 61.51 | 24.71 | 31.56 | 4.02 | Qwen/Qwen3-4B-Base |
🟢 | | 53.42 | 59.52 | 73.52 | 48.57 | 64.75 | 28.98 | 45.18 | 7.5 | yam-peleg/Hebrew-Mistral-7B |
🟦 | | 53.33 | 77.86 | 70.72 | 52.67 | 67.63 | 25.54 | 25.58 | 8.03 | MohamedRashad/Arabic-Orpo-Llama-3-8B-Instruct |
🟦 | | 52.95 | 74.52 | 68.84 | 51.4 | 64.75 | 24.61 | 33.55 | 8.03 | Danielbrdz/Barcenas-Llama3-8b-ORPO |
⭕ | | 52.55 | 55.48 | 72.89 | 69.7 | 59.71 | 27.32 | 30.23 | 10.48 | yam-peleg/Hebrew-Gemma-11B-Instruct |
⭕ | | 52.36 | 64.05 | 73.24 | 69.07 | 58.99 | 21.25 | 27.57 | 4.02 | Qwen/Qwen3-4B |
🟢 | | 51.45 | 66.67 | 73.23 | 54.7 | 60.07 | 24.81 | 29.24 | 7.62 | Qwen/Qwen2-7B |
🔶 | | 51.07 | 71.67 | 64.51 | 51.2 | 62.95 | 24.03 | 32.06 | 8.03 | DeepMount00/Llama-3-8b-Ita |
🟢 | | 50.7 | 56.67 | 74.6 | 59.5 | 62.23 | 22.29 | 28.9 | 8.41 | nvidia/Mistral-NeMo-Minitron-8B-Base |
🔶 | | 50.69 | 73.1 | 69.19 | 50.87 | 60.79 | 25.26 | 24.92 | 8.03 | lightblue/suzume-llama-3-8B-multilingual |
🟦 | | 50.49 | 75.95 | 63.6 | 46.73 | 62.23 | 24.2 | 30.23 | 8.03 | abacusai/Llama-3-Smaug-8B |
🔶 | | 50.31 | 71.9 | 65.62 | 51 | 63.31 | 24.31 | 25.69 | 8.03 | uygarkurt/llama-3-merged-linear |
🔶 | | 50.12 | 74.52 | 61.98 | 54.9 | 62.59 | 23.48 | 23.26 | 7.62 | natong19/Qwen2-7B-Instruct-abliterated |
🟦 | | 50.07 | 71.43 | 64.44 | 51.23 | 63.31 | 24.12 | 25.9 | 8.03 | VAGOsolutions/Llama-3-SauerkrautLM-8b-Instruct |
🟢 | | 49.99 | 64.52 | 73.16 | 61.33 | 55.76 | 20.94 | 24.25 | 3.09 | Qwen/Qwen2.5-3B |
🟢 | | 49.94 | 56.45 | 66.3 | 62.5 | 60.79 | 27.25 | 26.37 | 10.48 | yam-peleg/Hebrew-Gemma-11B-V2 |
🔶 | | 49.57 | 52.59 | 67.59 | 62.27 | 63.31 | 25.72 | 25.94 | 8.03 | NousResearch/Hermes-2-Pro-Llama-3-8B |
🟢 | | 49.25 | 66.43 | 69.82 | 44.6 | 60.07 | 21.39 | 33.22 | 33.64 | tiiuae/Falcon-H1-34B-Base |
🟦 | | 49.12 | 68.33 | 58.94 | 60.03 | 60.79 | 21.25 | 25.37 | 21.42 | saltlux/luxia-21.4b-alignment-v1.0 |
⭕ | | 46.91 | 55 | 67.67 | 59 | 53.24 | 15.66 | 30.9 | 3.82 | unsloth/Phi-3.5-mini-instruct |
⭕ | | 46.71 | 52.38 | 68.18 | 55.77 | 59.71 | 18.97 | 25.26 | 10.73 | upstage/SOLAR-10.7B-Instruct-v1.0 |
🟢 | | 46.68 | 55 | 67.94 | 57.37 | 50.72 | 23.63 | 25.4 | 14.17 | Qwen/Qwen1.5-14B |
🔶 | | 46.66 | 52.14 | 68.07 | 56.73 | 60.07 | 18.82 | 24.14 | 10.73 | kekmodel/StopCarbon-10.7B-v5 |
🔶 | | 46.51 | 51.43 | 67.82 | 57.03 | 59.71 | 18.84 | 24.25 | 10.73 | jeonsworld/CarbonVillain-en-10.7B-v4 |
🔶 | | 46.33 | 51.43 | 67.98 | 57.03 | 60.07 | 18.88 | 22.59 | 10.73 | invalid-coder/Sakura-SOLAR-Instruct-CarbonVillain-en-10.7B-v2-slerp |
🟢 | | 46 | 60.24 | 58.52 | 46.1 | 57.19 | 24.71 | 29.24 | 34.39 | 01-ai/Yi-1.5-34B-32K |
🟢 | | 45.22 | 45.95 | 73.06 | 53 | 54.32 | 20.1 | 24.92 | 1.72 | Qwen/Qwen3-1.7B-Base |
🔶 | | 45.19 | 56.19 | 65 | 52.8 | 54.32 | 17.35 | 25.45 | 7.24 | shadowml/BeagSake-7B |
🔶 | | 45.18 | 64.29 | 62.53 | 39.43 | 56.12 | 23.33 | 25.41 | 8.03 | TIGER-Lab/MAmmoTH2-8B-Plus |
🟦 | | 44.85 | 61.43 | 53.33 | 54.5 | 52.88 | 21.32 | 25.63 | 14.17 | Qwen/Qwen1.5-14B-Chat |
🔶 | | 44.45 | 55.48 | 62.08 | 52.73 | 53.24 | 16.73 | 26.44 | 12.88 | zhengr/MixTAO-7Bx2-MoE-v8.1 |
? | | 44.35 | 54.52 | 68.38 | 41.13 | 55.4 | 21.43 | 25.25 | 0 | maywell/EXAONE-3.0-7.8B-Instruct-Llamafied |
? | | 44.35 | 54.52 | 68.42 | 41.13 | 55.4 | 21.38 | 25.25 | 0 | Bingsu/exaone-3.0-7.8b-it |
🟦 | | 44.25 | 54.52 | 54.06 | 56.67 | 56.12 | 19.23 | 24.92 | 8.83 | 01-ai/Yi-1.5-9B-Chat |
🟢 | | 44.07 | 53.33 | 64.74 | 54.5 | 52.16 | 13.8 | 25.91 | 3.08 | HuggingFaceTB/SmolLM3-3B-Base |
🟦 | | 44 | 53.57 | 60.94 | 51.17 | 57.55 | 15.86 | 24.88 | 7.24 | mlabonne/AlphaMonarch-7B |
🟦 | | 43.98 | 53.57 | 60.94 | 51.2 | 57.55 | 15.86 | 24.74 | 7.24 | mlabonne/NeuralMonarch-7B |
🔶 | | 43.93 | 55.48 | 62.57 | 51.37 | 53.6 | 15.84 | 24.75 | 8.99 | eldogbbhed/Peagle-9b |
🔶 | | 43.41 | 55.71 | 58.62 | 50 | 54.32 | 15.9 | 25.91 | 7.24 | Kukedlc/NeuralSynthesis-7B-v0.1 |
🔶 | | 43.38 | 55.48 | 66.02 | 44.9 | 50.36 | 16.62 | 26.91 | 8.03 | deepseek-ai/DeepSeek-R1-Distill-Llama-8B |
🟦 | | 43.37 | 50.48 | 65.92 | 40.23 | 60.07 | 17.55 | 25.96 | 10.73 | vicgalle/CarbonBeagle-11B-truthy |
🟦 | | 43.19 | 55.48 | 66.05 | 44.2 | 49.64 | 16.52 | 27.24 | 8.03 | deepseek-ai/DeepSeek-R1-Distill-Llama-8B |
🟢 | | 42.42 | 42.38 | 71.83 | 50.6 | 51.44 | 11.67 | 26.58 | 8.89 | nvidia/NVIDIA-Nemotron-Nano-9B-v2-Base |
🟦 | | 42.42 | 53.81 | 58.9 | 51.1 | 52.88 | 15.44 | 22.37 | 12.88 | yunconglong/Truthful_DPO_TomGrc_FusionNet_7Bx2_MoE_13B |
⭕ | | 42.36 | 43.81 | 64.79 | 45.6 | 55.4 | 16.64 | 27.91 | 8.17 | ibm-granite/granite-3.1-8b-instruct |
⭕ | | 42.08 | 57.62 | 64.4 | 38.83 | 55.04 | 13 | 23.59 | 3.08 | HuggingFaceTB/SmolLM3-3B |
🟢 | | 41.51 | 42.38 | 65.76 | 54.6 | 48.2 | 15.84 | 22.26 | 1.54 | Qwen/Qwen2.5-1.5B |
🟦 | | 41.28 | 52.62 | 56.99 | 50.67 | 53.24 | 15.25 | 18.94 | 12.88 | yunconglong/DARE_TIES_13B |
⭕ | | 40.46 | 47.14 | 64.38 | 34.17 | 52.88 | 15.97 | 28.24 | 8.17 | ibm-granite/granite-3.0-8b-instruct |
🟢 | | 40.38 | 42.38 | 64.55 | 38 | 53.24 | 16.54 | 27.57 | 8.17 | ibm-granite/granite-3.0-8b-base |
⭕ | | 39.99 | 47.62 | 62.83 | 41.43 | 51.8 | 14.33 | 21.93 | 10.31 | tiiuae/Falcon3-10B-Instruct |
🔶 | | 39.75 | 49.05 | 61.38 | 35.83 | 52.16 | 14.78 | 25.3 | 7.24 | TIGER-Lab/MAmmoTH2-7B-Plus |
🟢 | | 39.19 | 47.62 | 63.98 | 33.6 | 48.92 | 14.43 | 26.58 | 7.59 | tiiuae/Falcon-H1-7B-Base |
🟢 | | 39.16 | 39.05 | 63.22 | 55.6 | 43.88 | 16.58 | 16.61 | 1.54 | Qwen/Qwen2-1.5B |
🟢 | | 38.82 | 47.38 | 53.98 | 35.03 | 51.8 | 13.23 | 31.51 | 7.24 | lex-hue/Delexa-7b |
⭕ | | 37.88 | 44.29 | 57.49 | 35.17 | 47.12 | 10.2 | 33.02 | 7.24 | HuggingFaceH4/zephyr-7b-beta |
🟢 | | 37.55 | 35.71 | 63.12 | 34.77 | 50.36 | 15.74 | 25.58 | 8.17 | ibm-granite/granite-3.1-8b-base |
⭕ | | 37.27 | 44.52 | 58.11 | 33.9 | 50.72 | 12.09 | 24.25 | 7.46 | tiiuae/Falcon3-7B-Instruct |
🔶 | | 37.06 | 50.71 | 53.88 | 33.47 | 49.64 | 9.3 | 25.35 | 7.24 | royallab/ZephRP-m7b |
🟢 | | 36.87 | 43.33 | 60.99 | 35.73 | 50.36 | 12.5 | 18.27 | 10.31 | tiiuae/Falcon3-10B-Base |
🟢 | | 36.62 | 46.43 | 64.05 | 36.5 | 47.84 | 11.97 | 12.96 | 0.6 | Qwen/Qwen3-0.6B-Base |
🟦 | | 36.04 | 33.57 | 53.06 | 33.7 | 48.2 | 9.25 | 38.46 | 13.02 | haoranxu/ALMA-13B-R |
🔶 | | 35.64 | 39.05 | 52.26 | 55.8 | 35.97 | 12.17 | 18.6 | 7.62 | deepseek-ai/DeepSeek-R1-Distill-Qwen-7B |
⭕ | | 35.23 | 34.76 | 54.28 | 34.6 | 51.08 | 11.06 | 25.58 | 3.82 | unsloth/Phi-3-mini-4k-instruct |
🟦 | | 35.16 | 60 | 34.37 | 42.17 | 39.21 | 22.24 | 12.96 | 8.03 | lightblue/suzume-llama-3-8B-multilingual-orpo-borda-half |
🟢 | | 34.99 | 36.67 | 51.78 | 33.33 | 51.44 | 10.12 | 26.58 | 7.46 | tiiuae/Falcon3-7B-Base |
🔶 | | 34.74 | 33.33 | 54.95 | 35.03 | 49.28 | 10.56 | 25.3 | 8.03 | shanchen/llama3-8B-slerp-med-chinese |
🟢 | | 34.23 | 32.86 | 54.89 | 33.3 | 49.28 | 9.15 | 25.91 | 2.53 | ibm-granite/granite-3.0-2b-base |
⭕ | | 34.09 | 36.67 | 52.02 | 33.27 | 48.2 | 9.14 | 25.25 | 2.63 | ibm-granite/granite-3.0-2b-instruct |
🟢 | | 33.93 | 57.62 | 39.57 | 55.4 | 13.67 | 27.72 | 9.63 | 36.15 | ByteDance-Seed/Seed-OSS-36B-Base |
🟢 | | 33.65 | 32.38 | 51.76 | 33.37 | 50.36 | 9.11 | 24.92 | 2.53 | ibm-granite/granite-3.1-2b-base |
⭕ | | 33.15 | 37.38 | 40.84 | 36.33 | 48.92 | 5.84 | 29.57 | 3.3 | ibm-granite/granite-3.1-3b-a800m-instruct |
🟢 | | 33.09 | 38.81 | 52.87 | 33.33 | 44.24 | 7.68 | 21.59 | 3.15 | tiiuae/Falcon-H1-3B-Base |
🟢 | | 32.64 | 31.19 | 49.47 | 47.27 | 43.88 | 7.75 | 16.28 | 0.49 | Qwen/Qwen2.5-0.5B |