add RL training which doesnt work that wall yet, and start to make UI for model training

This commit is contained in:
csd4ni3l
2025-11-15 15:56:56 +01:00
parent 032f38f4ce
commit 32477def6a
9 changed files with 524 additions and 17 deletions

View File

@@ -3,6 +3,8 @@ import arcade, arcade.gui, random, time
from utils.constants import button_style, ENEMY_ROWS, ENEMY_COLS, PLAYER_ATTACK_SPEED
from utils.preload import button_texture, button_hovered_texture
from stable_baselines3 import PPO
from game.sprites import Enemy, Player, Bullet
class Game(arcade.gui.UIView):
@@ -16,19 +18,29 @@ class Game(arcade.gui.UIView):
self.spritelist = arcade.SpriteList()
self.player = Player(100, 100) # not actually player
self.player = Player(self.window.width / 2 + random.randint(int(-self.window.width / 3), int(self.window.width / 3)), 100) # not actually player
self.spritelist.append(self.player)
self.last_player_shoot = time.perf_counter() # not actually player
self.model = PPO.load("invader_agent.zip")
self.enemies: list[Enemy] = []
self.bullets: list[Bullet] = []
self.player_bullets: list[Bullet] = []
self.enemy_bullets: list[Bullet] = []
self.summon_enemies()
def on_show_view(self):
super().on_show_view()
self.back_button = self.anchor.add(arcade.gui.UITextureButton(texture=button_texture, texture_hovered=button_hovered_texture, text='<--', style=button_style, width=100, height=50), anchor_x="left", anchor_y="top")
self.back_button.on_click = lambda event: self.main_exit()
def main_exit(self):
from menus.main import Main
self.window.show_view(Main(self.pypresence_client))
def summon_enemies(self):
enemy_start_x = self.window.width * 0.15
enemy_start_y = self.window.height * 0.9
@@ -45,7 +57,7 @@ class Game(arcade.gui.UIView):
bullets_to_remove = []
for bullet in self.bullets:
for bullet in self.player_bullets + self.enemy_bullets:
bullet.update()
bullet_hit = False
@@ -68,9 +80,22 @@ class Game(arcade.gui.UIView):
for bullet_to_remove in bullets_to_remove:
self.spritelist.remove(bullet_to_remove)
self.bullets.remove(bullet_to_remove)
self.player.update(self.enemies) # not actually player
if bullet_to_remove in self.enemy_bullets:
self.enemy_bullets.remove(bullet_to_remove)
elif bullet_to_remove in self.player_bullets:
self.player_bullets.remove(bullet_to_remove)
self.player.update(self.model, self.enemies, self.enemy_bullets, self.window.width, self.window.height) # not actually player
if self.player.center_x > self.window.width:
self.player.center_x = self.window.width
elif self.player.center_x < 0:
self.player.center_x = 0
if self.player.shoot:
self.player.shoot = False
self.shoot(self.player.center_x, self.player.center_y, 1)
if time.perf_counter() - self.last_player_shoot >= PLAYER_ATTACK_SPEED:
self.last_player_shoot = time.perf_counter()
@@ -79,7 +104,13 @@ class Game(arcade.gui.UIView):
def shoot(self, x, y, direction_y):
bullet = Bullet(x, y, direction_y)
self.spritelist.append(bullet)
self.bullets.append(bullet)
if direction_y == 1:
bullets = self.player_bullets
else:
bullets = self.enemy_bullets
bullets.append(bullet)
def on_key_press(self, symbol, modifiers):
if symbol == arcade.key.SPACE:

View File

@@ -1,6 +1,10 @@
import arcade, random, time
import arcade, time
from utils.constants import PLAYER_SPEED, BULLET_SPEED, BULLET_RADIUS
from stable_baselines3 import PPO
import numpy as np
from utils.constants import PLAYER_SPEED, BULLET_SPEED, BULLET_RADIUS, PLAYER_ATTACK_SPEED, ENEMY_COLS, ENEMY_ROWS
from utils.preload import player_texture, enemy_texture
class Bullet(arcade.Sprite):
@@ -21,17 +25,55 @@ class Player(arcade.Sprite): # Not actually the player
super().__init__(player_texture, center_x=x, center_y=y)
self.last_target_change = time.perf_counter()
self.last_shoot = time.perf_counter()
self.target = None
self.shoot = False
def update(self, enemies):
if not enemies:
return
self.player_speed = 0
if not self.target or time.perf_counter() - self.last_target_change >= 1:
self.last_target_change = time.perf_counter()
self.target = random.choice(enemies)
def update(self, model: PPO, enemies, bullets, width, height):
if enemies:
nearest_enemy = min(enemies, key=lambda e: abs(e.center_y - self.center_y) + abs(e.center_x - self.center_x))
enemy_x = (nearest_enemy.center_x - self.center_x) / width
enemy_y = (nearest_enemy.center_y - self.center_y) / height
else:
enemy_x = 2
enemy_y = 2
if self.target.center_x > self.center_x:
enemy_count = len(enemies) / float(max(1, ENEMY_ROWS * ENEMY_COLS))
player_x_norm = self.center_x / width
curr_bullet = min(bullets, key=lambda b: abs(b.center_x - self.center_x) + abs(b.center_y - self.center_y)) if bullets else None
if curr_bullet is not None:
curr_bx = (curr_bullet.center_x - self.center_x) / float(width)
curr_by = (curr_bullet.center_y - self.center_y) / float(height)
else:
curr_bx = 2.0
curr_by = 2.0
lowest = max(enemies, key=lambda e: e.center_y) if enemies else None
if lowest is not None:
lowest_dy = (lowest.center_y - self.center_y) / float(height)
else:
lowest_dy = 2.0
enemy_dispersion = 0.0
if enemies:
xs = np.array([e.center_x for e in enemies], dtype=np.float32)
enemy_dispersion = float(xs.std()) / float(width)
obs = np.array([player_x_norm, enemy_x, enemy_y, lowest_dy, curr_bx, curr_by, self.player_speed, enemy_count, enemy_dispersion], dtype=np.float32)
action, _ = model.predict(obs, deterministic=True)
self.prev_x = self.center_x
if action == 0:
self.center_x -= PLAYER_SPEED
elif action == 1:
self.center_x += PLAYER_SPEED
elif self.target.center_x < self.center_x:
self.center_x -= PLAYER_SPEED
elif action == 2:
t = time.perf_counter()
if t - self.last_shoot >= PLAYER_ATTACK_SPEED:
self.last_shoot = t
self.shoot = True
self.player_speed = (self.center_x - self.prev_x) / max(1e-6, PLAYER_SPEED)