Upload folder using huggingface_hub

Browse files

Files changed (15) hide show

README.md +104 -3
dino.py +276 -0
env.py +145 -0
models/ppo_dino_1000k.zip +3 -0
models/ppo_dino_100k.zip +3 -0
models/ppo_dino_200k.zip +3 -0
models/ppo_dino_300k.zip +3 -0
models/ppo_dino_400k.zip +3 -0
models/ppo_dino_500k.zip +3 -0
models/ppo_dino_600k.zip +3 -0
models/ppo_dino_700k.zip +3 -0
models/ppo_dino_800k.zip +3 -0
models/ppo_dino_900k.zip +3 -0
requirements.txt +11 -0
rf.py +14 -0

README.md CHANGED Viewed

@@ -1,3 +1,104 @@
----
-license: mit
----

+# AIDino
+[![AIDino Demo Video](https://img.shields.io/badge/Watch%20Demo-YouTube-red?style=for-the-badge&logo=youtube)](https://youtu.be/jsjFVBiZG3I)
+## Overview
+AIDino is a reinforcement learning project that automates gameplay of the Chrome Dino game using deep reinforcement learning. The system connects to Chrome's DevTools Protocol via WebSocket, captures the game state through efficient screenshot processing, and makes intelligent decisions using a trained Proximal Policy Optimization (PPO) model.
+## Features
+- Chrome DevTools Protocol integration for direct browser communication
+- Efficient screenshot capture and processing using MSS
+- Custom OpenAI Gym environment for reinforcement learning
+- PPO model implementation via Stable Baselines3
+- Image preprocessing with Sobel edge detection
+- Progressive model checkpoints for tracking training progress
+## Technical Architecture
+### Chrome Integration
+The system connects to Chrome's DevTools Protocol via WebSocket, allowing for:
+- Programmatic control of the Dino game
+- Precise input simulation (keyboard events)
+- Real-time game state monitoring
+- Screenshot capture of the game area
+### Custom Gym Environment
+The project implements a custom OpenAI Gym environment (`DinoEnv`) that:
+- Defines a discrete action space (jump, duck, do nothing)
+- Processes screenshots into suitable observations
+- Provides appropriate rewards based on game progress
+- Handles game reset and initialization
+### Reinforcement Learning
+The training process uses:
+- Proximal Policy Optimization (PPO) algorithm
+- MLP policy network architecture
+- Reward function that encourages longer survival
+- Periodic model checkpoints to track training progress
+## Model Checkpoints
+The repository includes 10 progressive model checkpoints from training:
+- ppo_dino_100k.zip through ppo_dino_1000k.zip
+- Each checkpoint represents increased training (100k to 1M timesteps)
+## Getting Started
+### Environment Setup
+1. Create and activate a virtual environment:
+   ```bash
+   # Create virtual environment
+   python -m venv venv
+   # Activate on Linux/macOS
+   source venv/bin/activate
+   # Activate on Windows
+   # venv\Scripts\activate
+   ```
+2. Install dependencies:
+   ```bash
+   pip install -r requirements.txt
+   ```
+### Running the Project
+1. Launch Chrome with remote debugging:
+   ```bash
+   # Linux
+   google-chrome --remote-debugging-port=1234
+   # Windows
+   # chrome.exe --remote-debugging-port=1234
+   ```
+2. Train the model:
+   ```bash
+   python rf.py
+   ```
+3. Use a pre-trained model by modifying the load path in `rf.py`:
+   ```python
+   # Uncomment and modify this line in rf.py
+   # model = PPO.load("models/ppo_dino_1000k", env=env)
+   ```
+## How It Works
+1. The environment connects to Chrome via DevTools Protocol
+2. The game is initialized and the initial state is captured
+3. For each timestep:
+   - The current game state is captured through screenshots
+   - Images are processed using edge detection
+   - The model selects an action (jump, duck, nothing)
+   - The action is executed through the Chrome connection
+   - Rewards are calculated based on survival and game progress
+4. Training continues until the model achieves optimal performance
+## Performance
+The model demonstrates progressive improvement across training checkpoints, with later checkpoints showing significantly better game performance and higher average scores.
+## Contributing
+Feel free to contribute to this project! Here are some ways you can help:
+- Improve the reinforcement learning model or try different algorithms
+- Enhance the image processing for better feature detection
+- Optimize the performance for faster training
+- Report bugs or suggest features by opening an issue

dino.py ADDED Viewed

	@@ -0,0 +1,276 @@

+import asyncio
+import json
+import websockets
+import requests
+import base64
+import time
+import mss
+import numpy as np
+from PIL import Image
+from io import BytesIO
+from datetime import datetime
+import pyautogui
+class Dino:
+    def __init__(self, class_name):
+        self.class_name = class_name
+        self.ws_url = self.get_ws_url()
+        self.websocket = None
+        self.command_id = 1
+    @staticmethod
+    def get_ws_url():
+        response = requests.get('http://localhost:1234/json')
+        data = response.json()
+        return data[0]['webSocketDebuggerUrl']
+    async def connect(self):
+        self.websocket = await websockets.connect(self.ws_url)
+        # Enable necessary domains
+        await self.send_command("DOM.enable", {})
+        await self.send_command("CSS.enable", {})
+        await self.send_command("Page.enable", {})
+        await self.send_command("Runtime.enable", {})
+    async def send_command(self, method, params):
+        command = {
+            "id": self.command_id,
+            "method": method,
+            "params": params
+        }
+        await self.websocket.send(json.dumps(command))
+        self.command_id += 1
+        while True:
+            response = await self.websocket.recv()
+            response_data = json.loads(response)
+            if response_data.get("id") == command["id"]:
+                return response_data
+    async def capture_screenshot(self):
+        try:
+            # Get document root
+            root = await self.send_command("DOM.getDocument", {"depth": -1})
+            root_node_id = root["result"]["root"]["nodeId"]
+            # Get the node ID of the element with the specified class name
+            search = await self.send_command("DOM.querySelector", {"nodeId": root_node_id, "selector": f".{self.class_name}"})
+            node_id = search["result"]["nodeId"]
+            # Get the box model of the element
+            box_model = await self.send_command("DOM.getBoxModel", {"nodeId": node_id})
+            content_box = box_model["result"]["model"]["content"]
+            # Capture screenshot of the area
+            screenshot = await self.send_command("Page.captureScreenshot", {
+                "clip": {
+                    "x": content_box[0],
+                    "y": content_box[1],
+                    "width": content_box[2] - content_box[0],
+                    "height": content_box[5] - content_box[1],
+                    "scale": 1
+                }
+            })
+            # Decode the base64 screenshot data
+            screenshot_data = base64.b64decode(screenshot["result"]["data"])
+            image = Image.open(BytesIO(screenshot_data))
+            resized_image = image.resize((image.width//5, image.height//5))
+            # Get the current date and time
+            #current_time = datetime.now()
+            # Format the date and time as a string
+            #timestamp_string = current_time.strftime('%H:%M:%S')
+            cropped_image = resized_image.crop((52, 0, 82, resized_image.height))
+            final_image = cropped_image.resize((30, 92))
+            return final_image
+        except Exception as e:
+            print(f"An error occurred: {e}")
+    async def get_window_name(self):
+        try:
+            # Evaluate JavaScript to get the window name
+            response = await self.send_command("Runtime.evaluate", {
+                "expression": "window.name"
+            })
+            #print(response)
+            window_name = response["result"]["result"]["value"]
+            print(f"Window name: {window_name}")
+            return window_name
+        except Exception as e:
+            print(f"An error occurred while getting window name: {e}")
+            return None
+    async def enable_all_obstacles(self):
+        try:
+            # Evaluate JavaScript to get the window name
+            response = await self.send_command("Runtime.evaluate", {
+                "expression": "spriteDefinitionByType.original.OBSTACLES[2].minSpeed = 0"
+            })
+            print(f"Enabled all obstacles")
+            return True
+        except Exception as e:
+            print(f"An error occurred while enabling obstacles: {e}")
+            return None
+    async def open_dino(self):
+        try:
+            response = await self.send_command("Page.navigate", {
+                    "url": "chrome://dino/"
+            })
+            return True
+        except Exception as e:
+            print(f"An error occurred while opening game: {e}")
+            return None
+    async def send_key_event(self, key, code, key_code):
+        try:
+            response1 = await self.send_command("Input.dispatchKeyEvent", {
+                "type": "rawKeyDown",
+                "key": key,
+                "code": code,
+                "keyCode": key_code,
+                "windowsVirtualKeyCode": key_code,
+                "nativeVirtualKeyCode": key_code,
+                "modifiers": 0
+            })
+            if key_code == 40: time.sleep(0.4)
+            response = await self.send_command("Input.dispatchKeyEvent", {
+                "type": "keyUp",
+                "key": key,
+                "code": code,
+                "keyCode": key_code,
+                "windowsVirtualKeyCode": key_code,
+                "nativeVirtualKeyCode": key_code,
+                "modifiers": 0
+            })
+            return True
+        except Exception as e:
+            print(f"An error occurred while sending key event: {e}")
+            return None
+    async def send_key_event2(self, key):
+        try:
+            pyautogui.press(key)
+            return True
+        except Exception as e:
+            print(f"An error occurred while sending key event: {e}")
+            return None
+    async def check_status(self):
+        try:
+            crashed = await self.send_command("Runtime.evaluate", {
+                    "expression": "Runner.instance_.crashed"
+            })
+            score = 0.0
+            try:
+                score = await self.send_command("Runtime.evaluate", {
+                        "expression": "Runner.instance_.distanceRan"
+                })
+                score = float(score['result']['result']['value']) // 10
+            except:
+                pass
+            return {
+                "crashed": crashed['result']['result']['value'],
+                "score": score
+                }
+        except Exception as e:
+            print(f"An error occurred while checking status: {e}")
+            return None
+    async def complete_action(self):
+        try:
+            crashed = await self.send_command("Runtime.evaluate", {
+                    "expression": "Runner.instance_.crashed"
+            })
+            crashed = crashed['result']['result']['value']
+            while not crashed:
+                jumping = await self.send_command("Runtime.evaluate", {
+                        "expression": "Runner.instance_.tRex.jumping"
+                })
+                jumping = jumping['result']['result']['value']
+                ducking = await self.send_command("Runtime.evaluate", {
+                        "expression": "Runner.instance_.tRex.ducking"
+                })
+                ducking = ducking['result']['result']['value']
+                crashed = await self.send_command("Runtime.evaluate", {
+                        "expression": "Runner.instance_.crashed"
+                })
+                crashed = crashed['result']['result']['value']
+                if (not jumping) and (not ducking): break
+        except Exception as e:
+            print(f"An error occurred while selecting action: {e}")
+            return None
+    async def capture_screenshot2(self):
+        try:
+            with mss.mss() as sct:
+                # Define the region to capture
+                monitor = {
+                    "top": 245,
+                    "left": 730,
+                    "width": 200,
+                    "height": 45,
+                }
+                # Capture the screenshot
+                screenshot = sct.grab(monitor)
+                # Convert the raw bytes data to a numpy array
+                img = np.array(screenshot)
+                # Convert the BGRA image to RGB
+                img = img[:, :, :3]
+                img = img[..., ::-1]
+                # Convert the numpy array to a PIL image
+                image = Image.fromarray(img)
+                #resized_image = image.resize((100, 80))
+                # Get the current date and time
+                #current_time = datetime.now()
+                # Format the date and time as a string
+                #timestamp_string = current_time.strftime('%H:%M:%S')
+                #resized_image.save(timestamp_string + 'resized_image.png')
+                return image
+        except Exception as e:
+            print(f"An error occurred while opening game: {e}")
+            return None
+    async def start(self):
+        await self.connect()
+        # Get the window name once
+        #await self.get_window_name()
+        #await self.capture_screenshot()

env.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import gymnasium as gym
+from gymnasium import spaces
+import numpy as np
+from dino import Dino
+import asyncio
+import time
+from scipy.ndimage import convolve
+import numpy as np
+import gymnasium as gym
+from tensorboardX import SummaryWriter
+from datetime import datetime
+from PIL import Image
+from datetime import datetime
+class DinoEnv(gym.Env):
+    def __init__(self):
+        super(DinoEnv, self).__init__()
+        # Define the action and observation space
+        # Actions: 0=up, 1=down, 2 do nothing
+        self.action_space = spaces.Discrete(3)
+        self.sobel_kernel = np.array([
+            [-1, 0, 1],
+            [-2, 0, 2],
+            [-1, 0, 1]
+        ])
+        self.im_size = 9000
+        # Define the observation space: 11316 integer pixels
+        self.observation_space = spaces.Box(low=0, high=255, shape=(self.im_size,), dtype=np.int32)
+        # Initialize the state (e.g., all pixels set to 0)
+        self.state = np.zeros((self.im_size,), dtype=np.int32)
+        # Initialize other variables (e.g., a variable to keep track of the score)
+        self.score = 0
+        self.current_step = 0
+    def step(self, action):
+        print( str(action) + ' : ' + str(self.score))
+        self.current_step += 1
+        if action == 0:  # up
+            #asyncio.get_event_loop().run_until_complete(self.dino.send_key_event2("down"))
+            asyncio.get_event_loop().run_until_complete(self.dino.send_key_event("ArrowUp", "ArrowUp", 38))
+            asyncio.get_event_loop().run_until_complete(self.dino.complete_action())
+        elif action == 1:  # down
+            #asyncio.get_event_loop().run_until_complete(self.dino.send_key_event2("down"))
+            asyncio.get_event_loop().run_until_complete(self.dino.send_key_event("ArrowDown", "ArrowDown", 40))
+            asyncio.get_event_loop().run_until_complete(self.dino.complete_action())
+        elif action == 2:  # do nothing
+            time.sleep(0.1)
+            pass
+        self.state = self.get_screenshot()
+        status = asyncio.get_event_loop().run_until_complete(self.dino.check_status())
+        if not status or status['crashed']:
+            with open('scores.txt', 'a') as f:
+               now = datetime.now() # current date and time
+               date_time = now.strftime("%m/%d/%Y, %H:%M:%S")
+               print("---------date and time:",date_time)
+               f.write(date_time + ' - '+ str(self.score) + '\n')
+            reward = -100
+            done = True
+            if not status:
+                asyncio.get_event_loop().run_until_complete(self.dino.open_dino())
+        else:
+            reward = 2
+            #if action == 1: reward = 3
+            done = False
+        self.score+=reward
+        info = {}
+        #observation, reward, terminated, truncated, info
+        return self.state, reward, done, False, info
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        self.dino = Dino('runner-canvas')
+        asyncio.get_event_loop().run_until_complete(self.dino.start())
+        asyncio.get_event_loop().run_until_complete(self.dino.open_dino())
+        time.sleep(2)
+        asyncio.get_event_loop().run_until_complete(self.dino.send_key_event(" ", "Space", 32))
+        time.sleep(2)
+        asyncio.get_event_loop().run_until_complete(self.dino.capture_screenshot2())
+        self.state = self.get_screenshot()
+        info = {}
+        self.current_step = 0
+        self.score = 0
+        asyncio.get_event_loop().run_until_complete(self.dino.enable_all_obstacles())
+        return self.state, info
+    def get_screenshot(self):
+        image = asyncio.get_event_loop().run_until_complete(self.dino.capture_screenshot2())
+        gray_image = image.convert('L')
+        image_array = np.array(gray_image)
+        convolved_image = convolve(image_array, self.sobel_kernel)
+        # Get the current date and time
+        current_time = datetime.now()
+        # Format the date and time as a string
+        timestamp_string = current_time.strftime('%H:%M:%S')
+        image = Image.fromarray(convolved_image)
+        # Save the image
+        #image.save('im/' + timestamp_string + 'conv_image.png')
+        flattened_array = convolved_image.flatten()
+        return flattened_array
+    def render(self, mode='human'):
+       pass
+# Register the custom environment
+gym.envs.registration.register(
+    id='DinoEnv-v0',
+    entry_point=__name__ + ':DinoEnv',
+)

models/ppo_dino_1000k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:262d370a4c6af4ce1de1fc87ce618e8f2f79a65cef5756001d5f3c61cd3a2dbc
+size 14129512

models/ppo_dino_100k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b2275e46dc4d90d5a3ab3b7348a28a1af0f61234dd78b6a9f745ca7b62bbd39b
+size 14129459

models/ppo_dino_200k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd376e9c53002769c61e7d4464bdcc864b6b6f0b2695390779af87e52198ad4a
+size 14129479

models/ppo_dino_300k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d7f0099ccd3a945d7ed7e72176bb5dc10f6213dfceff5a3596d9dac7944465a2
+size 14129480

models/ppo_dino_400k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f43c84fc9a2506bfd22632e325dc355701812d1539a4ac73f9a9328169c3fb4d
+size 14129484

models/ppo_dino_500k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6cedc8fbf789ccc67d0d374f5ec7d421419b8627ed5f7abc7ed240e4adbfffe
+size 14129492

models/ppo_dino_600k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92d5465e28ce410f8ecc34b8779e08fd4326212c0ed18baf8ca237ef2854ab12
+size 14129480

models/ppo_dino_700k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e877d70cf77fc234f0ef9c44360de26c20d68d5355d586296cecbc33508b6ed8
+size 14129488

models/ppo_dino_800k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f50e439a5ac9a6afa674cd267c98ce7921eadbca8df2c5e507ce686147b9c393
+size 14129496

models/ppo_dino_900k.zip ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad4fa130ef150c2f715ae7aae5563168f543c51c51986b10530d0d7b423374b
+size 14129496

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+asyncio
+websockets
+requests
+mss
+numpy
+Pillow
+pyautogui
+gymnasium
+scipy
+stable-baselines3
+tensorboardX

rf.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gymnasium as gym
+import env
+from stable_baselines3 import PPO
+env = gym.make("DinoEnv-v0")
+model = PPO("MlpPolicy", env, verbose=1)
+# Load the saved PPO model (Optional)
+#model = PPO.load("models/ppo_dino_backup8", env=env)
+for i in range (100):
+    model.learn(total_timesteps=100000)
+    model.save("models/ppo_dino")
+    model.save("models/ppo_dino_backup" + str(i))
+    env.close()