import gradio as gr import pandas as pd from datetime import datetime import os import time from datasets import load_dataset, Dataset # Configuration DATASET_ID = "daniehua/gptoss-fp4-vllm-isl8192osl1024" HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access dataframe: pd.DataFrame = None def load_dataframe(dataset=None) -> pd.DataFrame: global dataframe if dataset is None: # Force download of latest data by disabling cache dataset = load_dataset( DATASET_ID, split="train", download_mode="force_redownload" ) print(f"Loaded dataset: {len(dataset)}") results = dataset.map( lambda item: { "TEAM_NAME": item["team_name"], "CONC": item["conc"], "MI355X_E2E": item["mi355x_e2e"], "MI355X_THROUGHPUT": item["mi355x_throughput"], "B200_E2E": item["b200_e2e"], "B200_THROUGHPUT": item["b200_throughput"], "E2E_RATIO": item["e2e_ratio"], "THROUGHPUT_RATIO": item["throughput_ratio"], "INTERACTIVITY": item["interactivity"], "B200_INTERACTIVITY": item["b200_interactivity"], "INTERACTIVITY_RATIO": item["interactivity_ratio"], "BITS_PER_BYTE": item["bits_per_byte"], "BYTE_PERPLEXITY": item["byte_perplexity"], "WORD_PERPLEXITY": item["word_perplexity"], "TIMESTAMP": item["timestamp"] }, batch_size=64, remove_columns=dataset.column_names, ) df = results.to_pandas() df = df.sort_values("E2E_RATIO", ascending=False) dataframe = df return df def update_data( team_name, conc, mi355x_e2e, mi355x_throughput, b200_e2e, b200_throughput, e2e_ratio, throughput_ratio, interactivity, b200_interactivity, interactivity_ratio, bits_per_byte, byte_perplexity, word_perplexity, ): global dataframe """Insert a new row into the Hugging Face dataset""" existing_data = dataframe.to_dict(orient="records") print(f"Current data length: {len(existing_data)}") new_entry = { "TEAM_NAME": team_name, "CONC": int(conc), "MI355X_E2E": float(mi355x_e2e), "MI355X_THROUGHPUT": float(mi355x_throughput), "B200_E2E": float(b200_e2e), "B200_THROUGHPUT": float(b200_throughput), "E2E_RATIO": float(e2e_ratio), "THROUGHPUT_RATIO": float(throughput_ratio), "INTERACTIVITY": float(interactivity), "B200_INTERACTIVITY": float(b200_interactivity), "INTERACTIVITY_RATIO": float(interactivity_ratio), "BITS_PER_BYTE": float(bits_per_byte), "BYTE_PERPLEXITY": float(byte_perplexity), "WORD_PERPLEXITY": float(word_perplexity), "TIMESTAMP": datetime.now().strftime("%Y-%m-%d %H:%M:%S"), } existing_data.append(new_entry) df = pd.DataFrame(existing_data) df = df.sort_values("E2E_RATIO", ascending=False) dataframe = df print(f"New data length: {len(dataframe)}") return df def api_submit_results( team_name: str, conc: int, mi355x_e2e: float, mi355x_throughput: float, b200_e2e: float, b200_throughput: float, e2e_ratio: float, throughput_ratio: float, interactivity: float, b200_interactivity: float, interactivity_ratio: float, bits_per_byte: float, byte_perplexity: float, word_perplexity: float ) -> str: try: print(f"[{datetime.now()}] Received submission from {team_name}") # Update the dataset with new submission new_data = update_data( team_name=team_name, conc=conc, mi355x_e2e=mi355x_e2e, mi355x_throughput=mi355x_throughput, b200_e2e=b200_e2e, b200_throughput=b200_throughput, e2e_ratio=e2e_ratio, throughput_ratio=throughput_ratio, interactivity=interactivity, b200_interactivity=b200_interactivity, interactivity_ratio=interactivity_ratio, bits_per_byte=bits_per_byte, byte_perplexity=byte_perplexity, word_perplexity=word_perplexity ) print(f"Data updated in memory, pushing to Hub...") # Immediately push to hub after receiving submission refresh_leaderboard(push_to_hub=True) print(f"Successfully pushed to Hub") # Wait a moment for Hub to process time.sleep(2) # Reload from Hub to get the latest data (including this submission) print("Reloading from Hub to confirm update...") updated_df = reload_from_hub() print(f"Reload complete, returning updated leaderboard with {len(updated_df)} entries") return ["Success", updated_df] except Exception as e: print(f"Error in submission: {str(e)}") return ["Failed: " + str(e), None] def refresh_leaderboard(push_to_hub: bool = True): global dataframe dataset = Dataset.from_pandas(dataframe) dataset = dataset.map( lambda item: { "team_name": item["TEAM_NAME"], "conc": item["CONC"], "mi355x_e2e": item["MI355X_E2E"], "mi355x_throughput": item["MI355X_THROUGHPUT"], "b200_e2e": item["B200_E2E"], "b200_throughput": item["B200_THROUGHPUT"], "e2e_ratio": item["E2E_RATIO"], "throughput_ratio": item["THROUGHPUT_RATIO"], "interactivity": item["INTERACTIVITY"], "b200_interactivity": item["B200_INTERACTIVITY"], "interactivity_ratio": item["INTERACTIVITY_RATIO"], "bits_per_byte": item["BITS_PER_BYTE"], "byte_perplexity": item["BYTE_PERPLEXITY"], "word_perplexity": item["WORD_PERPLEXITY"], "timestamp": item["TIMESTAMP"], }, remove_columns=dataset.column_names, ) if push_to_hub: try: print(f"Attempting to push to hub: {DATASET_ID}") if HF_TOKEN: dataset.push_to_hub(DATASET_ID, token=HF_TOKEN) print("Successfully pushed with explicit token") else: # Try to use the Space's default token dataset.push_to_hub(DATASET_ID) print("Successfully pushed with default token") except Exception as e: print(f"Error pushing to hub: {str(e)}") def get_leaderboard(): global dataframe print(f"Getting leaderboard: {len(dataframe)}") return dataframe def reload_from_hub(): """Reload data from Hugging Face Hub""" global dataframe try: print(f"[{datetime.now()}] Reloading data from Hub...") print(f"Current dataframe length before reload: {len(dataframe)}") # Force reload from hub dataset = load_dataset( DATASET_ID, split="train", download_mode="force_redownload", verification_mode="no_checks" # Skip verification to force download ) print(f"Loaded {len(dataset)} entries from Hub") # Convert to dataframe results = dataset.map( lambda item: { "TEAM_NAME": item["team_name"], "CONC": item["conc"], "MI355X_E2E": item["mi355x_e2e"], "MI355X_THROUGHPUT": item["mi355x_throughput"], "B200_E2E": item["b200_e2e"], "B200_THROUGHPUT": item["b200_throughput"], "E2E_RATIO": item["e2e_ratio"], "THROUGHPUT_RATIO": item["throughput_ratio"], "INTERACTIVITY": item["interactivity"], "B200_INTERACTIVITY": item["b200_interactivity"], "INTERACTIVITY_RATIO": item["interactivity_ratio"], "BITS_PER_BYTE": item["bits_per_byte"], "BYTE_PERPLEXITY": item["byte_perplexity"], "WORD_PERPLEXITY": item["word_perplexity"], "TIMESTAMP": item["timestamp"] }, batch_size=64, remove_columns=dataset.column_names, ) df = results.to_pandas() df = df.sort_values("THROUGHPUT_RATIO", ascending=False) dataframe = df print(f"Dataframe updated, new length: {len(dataframe)}") return dataframe except Exception as e: print(f"Error reloading from hub: {str(e)}") return dataframe # Create Gradio interface def create_interface(): global dataframe with gr.Blocks(title="AMD PR bounty Leaderboard for gptoss with isl8192osl1024") as demo: gr.Markdown("# AMD PR bounty Leaderboard for gptoss with isl8192osl1024") gr.Markdown( "Track and compare performance" ) with gr.Tab("Leaderboard"): # Initial load leaderboard_table = gr.DataFrame( value=load_dataframe(), label="Benchmark Results", interactive=False, ) with gr.Column(visible=False): team_input = gr.Textbox() conc_input = gr.Number() mi355x_e2e_input = gr.Number() mi355x_throughput_input = gr.Number() b200_e2e_input = gr.Number() b200_throughput_input = gr.Number() e2e_ratio_input = gr.Number() throughput_ratio_input = gr.Number() interactivity_input = gr.Number() b200_interactivity_input = gr.Number() interactivity_ratio_input = gr.Number() bits_per_byte_input = gr.Number() byte_perplexity_input = gr.Number() word_perplexity_input = gr.Number() submit_output = gr.Textbox() submit_btn = gr.Button("Submit") submit_btn.click( fn=api_submit_results, inputs=[ team_input, conc_input, mi355x_e2e_input, mi355x_throughput_input, b200_e2e_input, b200_throughput_input, e2e_ratio_input, throughput_ratio_input, interactivity_input, b200_interactivity_input, interactivity_ratio_input, bits_per_byte_input, byte_perplexity_input, word_perplexity_input, ], outputs=[submit_output, leaderboard_table], api_name="submit_results", concurrency_limit=10, show_progress="full", ) refresh_btn = gr.Button("Refresh Leaderboard") refresh_btn.click( fn=reload_from_hub, outputs=leaderboard_table, ) # Place timer outside the hidden column so it always runs # Reload from Hub every 30 seconds to show latest submissions reload_timer = gr.Timer(20) reload_timer.tick( fn=reload_from_hub, outputs=leaderboard_table, ) return demo # Create and launch the app if __name__ == "__main__": demo = create_interface() demo.queue(max_size=100) demo.launch( server_name="0.0.0.0", server_port=7860, share=True, ssr_mode=False )