Spaces:

daniehua
/

gptoss-fp4-vllm-isl8192osl1024

Sleeping

App Files Files Community

gptoss-fp4-vllm-isl8192osl1024 / app.py

daniehua

Update app.py

c711de8 verified 10 days ago

raw

history blame contribute delete

11.4 kB

	import gradio as gr
	import pandas as pd
	from datetime import datetime
	import os
	import time
	from datasets import load_dataset, Dataset

	# Configuration
	DATASET_ID = "daniehua/gptoss-fp4-vllm-isl8192osl1024"
	HF_TOKEN = os.environ.get("HF_TOKEN") # Optional: for write access

	dataframe: pd.DataFrame = None


	def load_dataframe(dataset=None) -> pd.DataFrame:
	global dataframe
	if dataset is None:
	# Force download of latest data by disabling cache
	dataset = load_dataset(
	DATASET_ID, split="train", download_mode="force_redownload"
	)
	print(f"Loaded dataset: {len(dataset)}")
	results = dataset.map(
	lambda item: {
	"TEAM_NAME": item["team_name"],
	"CONC": item["conc"],
	"MI355X_E2E": item["mi355x_e2e"],
	"MI355X_THROUGHPUT": item["mi355x_throughput"],
	"B200_E2E": item["b200_e2e"],
	"B200_THROUGHPUT": item["b200_throughput"],
	"E2E_RATIO": item["e2e_ratio"],
	"THROUGHPUT_RATIO": item["throughput_ratio"],
	"INTERACTIVITY": item["interactivity"],
	"B200_INTERACTIVITY": item["b200_interactivity"],
	"INTERACTIVITY_RATIO": item["interactivity_ratio"],
	"BITS_PER_BYTE": item["bits_per_byte"],
	"BYTE_PERPLEXITY": item["byte_perplexity"],
	"WORD_PERPLEXITY": item["word_perplexity"],
	"TIMESTAMP": item["timestamp"]
	},
	batch_size=64,
	remove_columns=dataset.column_names,
	)
	df = results.to_pandas()
	df = df.sort_values("E2E_RATIO", ascending=False)
	dataframe = df
	return df


	def update_data(
	team_name,
	conc,
	mi355x_e2e,
	mi355x_throughput,
	b200_e2e,
	b200_throughput,
	e2e_ratio,
	throughput_ratio,
	interactivity,
	b200_interactivity,
	interactivity_ratio,
	bits_per_byte,
	byte_perplexity,
	word_perplexity,
	):
	global dataframe
	"""Insert a new row into the Hugging Face dataset"""
	existing_data = dataframe.to_dict(orient="records")

	print(f"Current data length: {len(existing_data)}")

	new_entry = {
	"TEAM_NAME": team_name,
	"CONC": int(conc),
	"MI355X_E2E": float(mi355x_e2e),
	"MI355X_THROUGHPUT": float(mi355x_throughput),
	"B200_E2E": float(b200_e2e),
	"B200_THROUGHPUT": float(b200_throughput),
	"E2E_RATIO": float(e2e_ratio),
	"THROUGHPUT_RATIO": float(throughput_ratio),
	"INTERACTIVITY": float(interactivity),
	"B200_INTERACTIVITY": float(b200_interactivity),
	"INTERACTIVITY_RATIO": float(interactivity_ratio),
	"BITS_PER_BYTE": float(bits_per_byte),
	"BYTE_PERPLEXITY": float(byte_perplexity),
	"WORD_PERPLEXITY": float(word_perplexity),
	"TIMESTAMP": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
	}
	existing_data.append(new_entry)
	df = pd.DataFrame(existing_data)
	df = df.sort_values("E2E_RATIO", ascending=False)
	dataframe = df
	print(f"New data length: {len(dataframe)}")
	return df


	def api_submit_results(
	team_name: str,
	conc: int,
	mi355x_e2e: float,
	mi355x_throughput: float,
	b200_e2e: float,
	b200_throughput: float,
	e2e_ratio: float,
	throughput_ratio: float,
	interactivity: float,
	b200_interactivity: float,
	interactivity_ratio: float,
	bits_per_byte: float,
	byte_perplexity: float,
	word_perplexity: float
	) -> str:
	try:
	print(f"[{datetime.now()}] Received submission from {team_name}")
	# Update the dataset with new submission
	new_data = update_data(
	team_name=team_name,
	conc=conc,
	mi355x_e2e=mi355x_e2e,
	mi355x_throughput=mi355x_throughput,
	b200_e2e=b200_e2e,
	b200_throughput=b200_throughput,
	e2e_ratio=e2e_ratio,
	throughput_ratio=throughput_ratio,
	interactivity=interactivity,
	b200_interactivity=b200_interactivity,
	interactivity_ratio=interactivity_ratio,
	bits_per_byte=bits_per_byte,
	byte_perplexity=byte_perplexity,
	word_perplexity=word_perplexity
	)
	print(f"Data updated in memory, pushing to Hub...")
	# Immediately push to hub after receiving submission
	refresh_leaderboard(push_to_hub=True)
	print(f"Successfully pushed to Hub")

	# Wait a moment for Hub to process
	time.sleep(2)

	# Reload from Hub to get the latest data (including this submission)
	print("Reloading from Hub to confirm update...")
	updated_df = reload_from_hub()
	print(f"Reload complete, returning updated leaderboard with {len(updated_df)} entries")

	return ["Success", updated_df]

	except Exception as e:
	print(f"Error in submission: {str(e)}")
	return ["Failed: " + str(e), None]


	def refresh_leaderboard(push_to_hub: bool = True):
	global dataframe
	dataset = Dataset.from_pandas(dataframe)
	dataset = dataset.map(
	lambda item: {
	"team_name": item["TEAM_NAME"],
	"conc": item["CONC"],
	"mi355x_e2e": item["MI355X_E2E"],
	"mi355x_throughput": item["MI355X_THROUGHPUT"],
	"b200_e2e": item["B200_E2E"],
	"b200_throughput": item["B200_THROUGHPUT"],
	"e2e_ratio": item["E2E_RATIO"],
	"throughput_ratio": item["THROUGHPUT_RATIO"],
	"interactivity": item["INTERACTIVITY"],
	"b200_interactivity": item["B200_INTERACTIVITY"],
	"interactivity_ratio": item["INTERACTIVITY_RATIO"],
	"bits_per_byte": item["BITS_PER_BYTE"],
	"byte_perplexity": item["BYTE_PERPLEXITY"],
	"word_perplexity": item["WORD_PERPLEXITY"],
	"timestamp": item["TIMESTAMP"],
	},
	remove_columns=dataset.column_names,
	)
	if push_to_hub:
	try:
	print(f"Attempting to push to hub: {DATASET_ID}")
	if HF_TOKEN:
	dataset.push_to_hub(DATASET_ID, token=HF_TOKEN)
	print("Successfully pushed with explicit token")
	else:
	# Try to use the Space's default token
	dataset.push_to_hub(DATASET_ID)
	print("Successfully pushed with default token")
	except Exception as e:
	print(f"Error pushing to hub: {str(e)}")


	def get_leaderboard():
	global dataframe
	print(f"Getting leaderboard: {len(dataframe)}")
	return dataframe


	def reload_from_hub():
	"""Reload data from Hugging Face Hub"""
	global dataframe
	try:
	print(f"[{datetime.now()}] Reloading data from Hub...")
	print(f"Current dataframe length before reload: {len(dataframe)}")

	# Force reload from hub
	dataset = load_dataset(
	DATASET_ID,
	split="train",
	download_mode="force_redownload",
	verification_mode="no_checks" # Skip verification to force download
	)
	print(f"Loaded {len(dataset)} entries from Hub")

	# Convert to dataframe
	results = dataset.map(
	lambda item: {
	"TEAM_NAME": item["team_name"],
	"CONC": item["conc"],
	"MI355X_E2E": item["mi355x_e2e"],
	"MI355X_THROUGHPUT": item["mi355x_throughput"],
	"B200_E2E": item["b200_e2e"],
	"B200_THROUGHPUT": item["b200_throughput"],
	"E2E_RATIO": item["e2e_ratio"],
	"THROUGHPUT_RATIO": item["throughput_ratio"],
	"INTERACTIVITY": item["interactivity"],
	"B200_INTERACTIVITY": item["b200_interactivity"],
	"INTERACTIVITY_RATIO": item["interactivity_ratio"],
	"BITS_PER_BYTE": item["bits_per_byte"],
	"BYTE_PERPLEXITY": item["byte_perplexity"],
	"WORD_PERPLEXITY": item["word_perplexity"],
	"TIMESTAMP": item["timestamp"]
	},
	batch_size=64,
	remove_columns=dataset.column_names,
	)
	df = results.to_pandas()
	df = df.sort_values("THROUGHPUT_RATIO", ascending=False)
	dataframe = df
	print(f"Dataframe updated, new length: {len(dataframe)}")
	return dataframe
	except Exception as e:
	print(f"Error reloading from hub: {str(e)}")
	return dataframe


	# Create Gradio interface
	def create_interface():
	global dataframe
	with gr.Blocks(title="AMD PR bounty Leaderboard for gptoss with isl8192osl1024") as demo:
	gr.Markdown("# AMD PR bounty Leaderboard for gptoss with isl8192osl1024")
	gr.Markdown(
	"Track and compare performance"
	)

	with gr.Tab("Leaderboard"):
	# Initial load
	leaderboard_table = gr.DataFrame(
	value=load_dataframe(),
	label="Benchmark Results",
	interactive=False,
	)

	with gr.Column(visible=False):
	team_input = gr.Textbox()
	conc_input = gr.Number()
	mi355x_e2e_input = gr.Number()
	mi355x_throughput_input = gr.Number()
	b200_e2e_input = gr.Number()
	b200_throughput_input = gr.Number()
	e2e_ratio_input = gr.Number()
	throughput_ratio_input = gr.Number()
	interactivity_input = gr.Number()
	b200_interactivity_input = gr.Number()
	interactivity_ratio_input = gr.Number()
	bits_per_byte_input = gr.Number()
	byte_perplexity_input = gr.Number()
	word_perplexity_input = gr.Number()

	submit_output = gr.Textbox()
	submit_btn = gr.Button("Submit")
	submit_btn.click(
	fn=api_submit_results,
	inputs=[
	team_input,
	conc_input,
	mi355x_e2e_input,
	mi355x_throughput_input,
	b200_e2e_input,
	b200_throughput_input,
	e2e_ratio_input,
	throughput_ratio_input,
	interactivity_input,
	b200_interactivity_input,
	interactivity_ratio_input,
	bits_per_byte_input,
	byte_perplexity_input,
	word_perplexity_input,
	],
	outputs=[submit_output, leaderboard_table],
	api_name="submit_results",
	concurrency_limit=10,
	show_progress="full",
	)
	refresh_btn = gr.Button("Refresh Leaderboard")
	refresh_btn.click(
	fn=reload_from_hub,
	outputs=leaderboard_table,
	)

	# Place timer outside the hidden column so it always runs
	# Reload from Hub every 30 seconds to show latest submissions
	reload_timer = gr.Timer(20)
	reload_timer.tick(
	fn=reload_from_hub,
	outputs=leaderboard_table,
	)

	return demo


	# Create and launch the app
	if __name__ == "__main__":
	demo = create_interface()

	demo.queue(max_size=100)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=True,
	ssr_mode=False
	)