Spaces:
Sleeping
Sleeping
| import os | |
| from datetime import datetime | |
| import random | |
| import gradio as gr | |
| from datasets import load_dataset, Dataset, DatasetDict | |
| from huggingface_hub import whoami, InferenceClient | |
| # Initialize the inference client | |
| client = InferenceClient( | |
| api_key=os.getenv("HF_API_KEY"), # Make sure to set this environment variable | |
| ) | |
| # Load questions from Hugging Face dataset | |
| EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 5 # We have 5 questions total | |
| EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.0 | |
| EXAM_DATASET_ID = "agents-course/dummy-code-quiz" | |
| # prep the dataset for the quiz | |
| ds = load_dataset(EXAM_DATASET_ID, split="train") | |
| quiz_data = ds.to_list() | |
| random.shuffle(quiz_data) | |
| def check_code(user_code, solution, challenge): | |
| """ | |
| Use LLM to evaluate if the user's code solution is correct. | |
| Returns True if the solution is correct, False otherwise. | |
| """ | |
| prompt = f"""You are an expert Python programming instructor evaluating a student's code solution. | |
| Challenge: | |
| {challenge} | |
| Reference Solution: | |
| {solution} | |
| Student's Solution: | |
| {user_code} | |
| Evaluate if the student's solution is functionally equivalent to the reference solution. | |
| Consider: | |
| 1. Does it solve the problem correctly? | |
| 2. Does it handle edge cases appropriately? | |
| 3. Does it follow the requirements of the challenge? | |
| Respond with ONLY "CORRECT" or "INCORRECT" followed by a brief explanation. | |
| """ | |
| messages = [{"role": "user", "content": prompt}] | |
| try: | |
| completion = client.chat.completions.create( | |
| model="Qwen/Qwen2.5-Coder-32B-Instruct", | |
| messages=messages, | |
| max_tokens=500, | |
| ) | |
| response = completion.choices[0].message.content.strip() | |
| # Extract the verdict from the response | |
| is_correct = response.upper().startswith("CORRECT") | |
| # Add the explanation to the status text | |
| explanation = response.split("\n", 1)[1] if "\n" in response else "" | |
| gr.Info(explanation) | |
| return is_correct | |
| except Exception as e: | |
| gr.Warning(f"Error checking code: {str(e)}") | |
| # Fall back to simple string comparison if LLM fails | |
| return user_code.strip() == solution.strip() | |
| def on_user_logged_in(token: gr.OAuthToken | None): | |
| """Handle user login state""" | |
| if token is not None: | |
| return gr.update(visible=False), gr.update(visible=True) | |
| else: | |
| return gr.update(visible=True), gr.update(visible=False) | |
| def push_results_to_hub( | |
| user_answers: list, token: gr.OAuthToken | None, signed_in_message: str | |
| ): | |
| """Push results to Hugging Face Hub.""" | |
| print(f"signed_in_message: {signed_in_message}") | |
| if not user_answers: # Check if there are any answers to submit | |
| gr.Warning("No answers to submit!") | |
| return "No answers to submit!" | |
| if token is None: | |
| gr.Warning("Please log in to Hugging Face before pushing!") | |
| return "Please log in to Hugging Face before pushing!" | |
| # Calculate grade | |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
| total_questions = len(user_answers) | |
| grade = correct_count / total_questions if total_questions > 0 else 0 | |
| if grade < float(EXAM_PASSING_SCORE): | |
| gr.Warning( | |
| f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}" | |
| ) | |
| return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}" | |
| gr.Info("Submitting answers to the Hub. Please wait...", duration=2) | |
| user_info = whoami(token=token.token) | |
| username = user_info["name"] | |
| repo_id = f"{EXAM_DATASET_ID}_responses" | |
| submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") | |
| # Create a dataset with the user's answers and metadata | |
| submission_data = [ | |
| { | |
| "username": username, | |
| "datetime": submission_time, | |
| "grade": grade, | |
| **answer, # Include all answer data | |
| } | |
| for answer in user_answers | |
| ] | |
| try: | |
| # Try to load existing dataset | |
| existing_ds = load_dataset(repo_id) | |
| # Convert to DatasetDict if it isn't already | |
| if not isinstance(existing_ds, dict): | |
| existing_ds = DatasetDict({"default": existing_ds}) | |
| except Exception: | |
| # If dataset doesn't exist, create empty DatasetDict | |
| existing_ds = DatasetDict() | |
| # Create new dataset from submission | |
| new_ds = Dataset.from_list(submission_data) | |
| # Add or update the split for this user | |
| existing_ds[username] = new_ds | |
| # Push the updated dataset to the Hub | |
| existing_ds.push_to_hub( | |
| repo_id, | |
| private=True, # Make it private by default since it contains student submissions | |
| ) | |
| return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}" | |
| def handle_quiz(question_idx, user_answers, submitted_code, is_start): | |
| """Handle quiz state and progression""" | |
| # Hide the start button once the first question is shown | |
| start_btn_update = gr.update(visible=False) if is_start else None | |
| # If this is the first time (start=True), begin at question_idx=0 | |
| if is_start: | |
| question_idx = 0 | |
| else: | |
| # If not the first question and there's a submission, store the user's last submission | |
| if ( | |
| question_idx < len(quiz_data) and submitted_code.strip() | |
| ): # Only check if there's code | |
| current_q = quiz_data[question_idx] | |
| is_correct = check_code( | |
| submitted_code, current_q["solution"], current_q["challenge"] | |
| ) | |
| user_answers.append( | |
| { | |
| "challenge": current_q["challenge"], | |
| "submitted_code": submitted_code, | |
| "correct_solution": current_q["solution"], | |
| "is_correct": is_correct, | |
| } | |
| ) | |
| question_idx += 1 | |
| # If we've reached the end, show final results | |
| if question_idx >= len(quiz_data): | |
| correct_count = sum(1 for answer in user_answers if answer["is_correct"]) | |
| grade = correct_count / len(user_answers) | |
| results_text = ( | |
| f"**Quiz Complete!**\n\n" | |
| f"Your score: {grade:.1%}\n" | |
| f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n" | |
| f"Your answers:\n\n" | |
| ) | |
| for idx, answer in enumerate(user_answers): | |
| results_text += ( | |
| f"Question {idx + 1}: {'β ' if answer['is_correct'] else 'β'}\n" | |
| ) | |
| results_text += ( | |
| f"Your code:\n```python\n{answer['submitted_code']}\n```\n\n" | |
| ) | |
| return ( | |
| "", # question_text becomes blank | |
| gr.update(value="", visible=False), # clear and hide code input | |
| f"{'β Passed!' if grade >= float(EXAM_PASSING_SCORE) else 'β Did not pass'}", | |
| question_idx, | |
| user_answers, | |
| start_btn_update, | |
| gr.update(value=results_text, visible=True), # show final_markdown | |
| ) | |
| else: | |
| # Show the next question | |
| q = quiz_data[question_idx] | |
| challenge_text = f"## Question {question_idx + 1} \n### {q['challenge']}" | |
| return ( | |
| challenge_text, | |
| gr.update(value=q["placeholder"], visible=True), | |
| "Submit your code solution and click 'Next' to continue.", | |
| question_idx, | |
| user_answers, | |
| start_btn_update, | |
| gr.update(visible=False), # Hide final_markdown | |
| ) | |
| with gr.Blocks() as demo: | |
| demo.title = f"Coding Quiz: {EXAM_DATASET_ID}" | |
| # State variables | |
| question_idx = gr.State(value=0) | |
| user_answers = gr.State(value=[]) | |
| with gr.Row(variant="compact"): | |
| gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz") | |
| with gr.Row(variant="compact"): | |
| gr.Markdown( | |
| "Log in first, then click 'Start' to begin. Complete each coding challenge, click 'Next', " | |
| "and finally click 'Submit' to publish your results to the Hugging Face Hub." | |
| ) | |
| with gr.Row(variant="panel"): | |
| question_text = gr.Markdown("") | |
| code_input = gr.Code(language="python", label="Your Solution", visible=False) | |
| with gr.Row(variant="compact"): | |
| status_text = gr.Markdown("") | |
| with gr.Row(variant="compact"): | |
| final_markdown = gr.Markdown("", visible=False) | |
| next_btn = gr.Button("Next βοΈ") | |
| submit_btn = gr.Button("Submit β ") | |
| with gr.Row(variant="compact"): | |
| login_btn = gr.LoginButton() | |
| start_btn = gr.Button("Start", visible=False) | |
| login_btn.click(fn=on_user_logged_in, inputs=None, outputs=[login_btn, start_btn]) | |
| start_btn.click( | |
| fn=handle_quiz, | |
| inputs=[question_idx, user_answers, code_input, gr.State(True)], | |
| outputs=[ | |
| question_text, | |
| code_input, | |
| status_text, | |
| question_idx, | |
| user_answers, | |
| start_btn, | |
| final_markdown, | |
| ], | |
| ) | |
| next_btn.click( | |
| fn=handle_quiz, | |
| inputs=[question_idx, user_answers, code_input, gr.State(False)], | |
| outputs=[ | |
| question_text, | |
| code_input, | |
| status_text, | |
| question_idx, | |
| user_answers, | |
| start_btn, | |
| final_markdown, | |
| ], | |
| ) | |
| submit_btn.click( | |
| fn=push_results_to_hub, | |
| inputs=[user_answers, login_btn], | |
| outputs=status_text, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |