A powerful sparse-activation language model with web search capabilities, tool use, and mixture-of-experts architecture. Key Features: - 26B total parameters, 18B active (sparse activation) - 48-layer transformer with grouped-query attention - 8 experts per layer with top-2 routing - 32K context length - Web search integration - Multi-language support - Tool use capabilities Quick Start: ----------- from transformers import AutoModelForCausalLM, AutoTokenizer model = AutoModelForCausalLM.from_pretrained("Kirim-ai/Kirim-V2") tokenizer = AutoTokenizer.from_pretrained("Kirim-ai/Kirim-V2") prompt = "Explain quantum computing:" inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(**inputs, max_new_tokens=200) response = tokenizer.decode(outputs[0]) For more examples, see: examples.py """ __version__ = "2.0.0" __author__ = "Kirim-ai" __license__ = "Apache-2.0" # Model information MODEL_NAME = "Kirim-V2" MODEL_SIZE = "26B" ACTIVE_SIZE = "18B" ARCHITECTURE = "Sparse Transformer + Mixture of Experts" # Model configuration MODEL_CONFIG = { "model_type": "kirim", "architecture": "KirimForCausalLM", "total_parameters": 26_000_000_000, "active_parameters": 18_000_000_000, "hidden_size": 6144, "intermediate_size": 16384, "num_hidden_layers": 48, "num_attention_heads": 48, "num_key_value_heads": 8, "num_experts": 8, "experts_per_token": 2, "vocab_size": 128256, "max_position_embeddings": 32768, "rope_theta": 500000.0, } # Capabilities CAPABILITIES = [ "text_generation", "web_search", "tool_use", "code_generation", "multilingual", "long_context", "reasoning", "question_answering", "summarization", "translation", ] # Supported languages SUPPORTED_LANGUAGES = [ "en", # English "es", # Spanish "fr", # French "de", # German "it", # Italian "pt", # Portuguese "zh", # Chinese "ja", # Japanese "ko", # Korean "ru", # Russian ] # Import utilities try: from .utils import ModelUtils, calculate_model_stats __all__ = ["ModelUtils", "calculate_model_stats"] except ImportError: # If running standalone, utils might not be in package try: from utils import ModelUtils, calculate_model_stats __all__ = ["ModelUtils", "calculate_model_stats"] except ImportError: __all__ = [] # Add configuration utilities def get_model_info(): """ Get comprehensive model information. Returns: Dictionary with model details """ return { "name": MODEL_NAME, "version": __version__, "size": MODEL_SIZE, "active_size": ACTIVE_SIZE, "architecture": ARCHITECTURE, "config": MODEL_CONFIG, "capabilities": CAPABILITIES, "languages": SUPPORTED_LANGUAGES, "license": __license__, } def print_model_card(): """Print a formatted model card with key information.""" info = get_model_info() print("\n" + "="*70) print(f"{info['name']} - Model Card".center(70)) print("="*70) print(f"\nVersion: {info['version']}") print(f"Parameters: {info['size']} total, {info['active_size']} active") print(f"Architecture: {info['architecture']}") print(f"License: {info['license']}") print(f"\nCapabilities: {', '.join(info['capabilities'][:5])}") print(f" {', '.join(info['capabilities'][5:])}") print(f"\nLanguages: {', '.join(info['languages'][:5])}") print(f" {', '.join(info['languages'][5:])}") print("\nKey Features:") print(" • Sparse activation (69% parameters active)") print(" • Mixture of Experts (8 experts, top-2 routing)") print(" • Extended context (32,768 tokens)") print(" • Web search integration") print(" • Tool use capabilities") print(" • Multi-language support") print("\nHardware Requirements:") print(" Minimum: 24GB VRAM (with INT8 quantization)") print(" Recommended: 48GB+ VRAM (BF16 precision)") print(" Optimal: 80GB VRAM (full batch processing)") print("\nQuick Start:") print(" from transformers import AutoModelForCausalLM, AutoTokenizer") print(' model = AutoModelForCausalLM.from_pretrained("Kirim-ai/Kirim-V2")') print(' tokenizer = AutoTokenizer.from_pretrained("Kirim-ai/Kirim-V2")') print("\nDocumentation:") print(" README: See README.md for detailed documentation") print(" Quickstart: See QUICKSTART.md for usage examples") print(" Examples: See examples.py for code samples") print(" Web Search: See web/README.md for search integration") print("\n" + "="*70 + "\n") def check_requirements(): """ Check if required packages are installed. Returns: Dictionary with package availability """ requirements = {} packages = [ "torch", "transformers", "accelerate", "sentencepiece", "safetensors", "bitsandbytes", ] for package in packages: try: __import__(package) requirements[package] = "✓ Installed" except ImportError: requirements[package] = "✗ Not installed" return requirements def print_requirements(): """Print package requirements and their installation status.""" reqs = check_requirements() print("\n" + "="*70) print("Package Requirements".center(70)) print("="*70 + "\n") for package, status in reqs.items(): print(f" {package:<20} {status}") missing = [pkg for pkg, status in reqs.items() if "Not installed" in status] if missing: print("\n" + "⚠"*35) print("\nMissing packages detected!") print("Install with: pip install " + " ".join(missing)) print("\nOr install all requirements:") print(" pip install -r requirements.txt") else: print("\n✓ All requirements are installed!") print("\n" + "="*70 + "\n") # Convenience functions def get_default_generation_config(): """ Get default generation configuration. Returns: Dictionary with generation parameters """ return { "max_new_tokens": 512, "temperature": 0.7, "top_p": 0.9, "top_k": 50, "repetition_penalty": 1.1, "do_sample": True, "pad_token_id": None, } def get_recommended_device_map(): """ Get recommended device map based on available hardware. Returns: Device map string """ try: import torch if torch.cuda.is_available(): device_count = torch.cuda.device_count() if device_count > 1: return "balanced" # Multi-GPU else: return "auto" # Single GPU else: return "cpu" except ImportError: return "auto" # Export public API __all__ = [ # Version info "__version__", "__author__", "__license__", # Model info "MODEL_NAME", "MODEL_SIZE", "ACTIVE_SIZE", "ARCHITECTURE", "MODEL_CONFIG", "CAPABILITIES", "SUPPORTED_LANGUAGES", # Functions "get_model_info", "print_model_card", "check_requirements", "print_requirements", "get_default_generation_config", "get_recommended_device_map", ] # Print welcome message on import (optional) if __name__ != "__main__": import sys if hasattr(sys, 'ps1'): # Interactive mode print(f"\n🚀 Kirim-V2 v{__version__} loaded") print(f" Use print_model_card() for details\n")