ML-Agents与训练达不到目的AI的斗争史-如何用unity训练一个类吸血鬼幸存者自动躲避AI(探讨,暂时非成功经验)1.0
问题:如何用unity训练一个类吸血鬼幸存者自动躲避AI。
我的想法:
应该抓住问题的根源解决:类吸血鬼幸存者游戏的躲避的目的是使血量维持一个健康值,所以我的逻辑是对训练的AI所有奖励(AddRewards)进行与血量是否健康一个强关联,简而言之是不管怎么加奖励都会乘以一个血量当前的百分比,只要当前血量降低,就会减少全局的奖励,但类吸血鬼幸存者游戏有回血机制,所以我们也要在训练AI原型时简单加入,简化加血的机制,每当一定时间后如果血量还是大于0就加血10-30随机(符合rouguelike游戏回血本质),间接增加全局的奖励和存活时间。当前血量小于0,则给与一个很大的警告(惩罚力度十分大,警告ai不要让此事再次发生)。游戏的表层设计原型就是这样,下面是代码如何实现游戏的表层设计原型。
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
//挂在玩家身上
public class PlayerAgent : Agent
{
float timer = 1f;
public float hp = 0;
float timer_HP = 20f;
private Vector3 lastPosition;
//AI玩家第一次启动此脚本调用一次
public override void Initialize()
{
transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
}
//AI玩家收集环境中自己的血量变化,与全局血量相对应
public override void CollectObservations(VectorSensor sensor)
{
sensor.AddObservation(hp);
}
//AI玩家第一次启动此脚本和调用EndEpisode()时调用一次
public override void OnEpisodeBegin()
{
hp = 100;
transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
}
//AI玩家每决策一次行动调用一次类似有输入参数的fixedupdate,参数为随机决策的离散变量,会根据奖励变化
public override void OnActionReceived(ActionBuffers actionBuffers)
{
//hp>100 回到100
if (hp > 100f) hp = 100f;
//当前血量的百分比
float a = hp / 100f;
var collider2Ds = Physics2D.OverlapCircleAll(transform.position, 0.6f);
foreach (var item in collider2Ds)
{
if (item.CompareTag("Enemy"))
{
//当AI玩家与敌人碰撞挨在一起时进行扣血扣奖励
if (Vector2.Distance(item.transform.position, transform.position) <= 0.3f)
{
hp -= 0.1f;
AddReward(-1 * (10 - hp/10));
}
//当敌人进入ai玩家攻击范围却没挨在一起时进行加血血扣奖励
else
{
AddReward(0.1f * a);
}
}
}
timer -= Time.fixedDeltaTime;
timer_HP -= Time.fixedDeltaTime;
//当AI玩家每秒移动的距离大于一定值时给与奖励,鼓励ai多运动多活跃多尝试,主要在前期鼓励ai踏出第一步,不然可能遇到各种惩罚减奖励,动与不动都一样甚至不如不动,就一直挂机摆烂,没有了后续
if (timer < 0f)
{
var currentPosition = transform.position;
if (Vector3.Distance(currentPosition, lastPosition) > 0.8f)
{
AddReward(10f * a);
}
else
{
AddReward(10f * Vector3.Distance(currentPosition, lastPosition) * a);
}
lastPosition = currentPosition;
timer = 1f;
}
//hp<0 正常游戏中游戏结束信号,应该给与警告大惩罚,然后重新随机环境尝试
if (hp <= 0)
{
AddReward(-10000f);
EndEpisode();
}
//hp>0 正常游戏中游戏继续信号,应该给与奖励回血,延迟游戏时间
else if (hp > 0 && timer_HP < 0)
{
hp += Random.Range(10, 30);
timer_HP = 60f;
AddReward(10f * a);
}
//以下是随机移动逻辑处理
var dirToGo = Vector3.zero;
var rotateDir = Vector3.zero;
var forwardAxis = actionBuffers.DiscreteActions[0];
var rightAxis = actionBuffers.DiscreteActions[1];
switch (forwardAxis)
{
case 1:
dirToGo += Vector3.up * 1f;
break;
case 2:
dirToGo += Vector3.up * -1f;
break;
}
switch (rightAxis)
{
case 1:
dirToGo += Vector3.right * 1f;
break;
case 2:
dirToGo += Vector3.right * -1f;
break;
}
transform.Translate(dirToGo * Time.fixedDeltaTime * 1f, Space.World);
}
//真人操作,启发ai用,需要一个脚本才能记录
public override void Heuristic(in ActionBuffers actionsOut)
{
var OutDiscreteActions = actionsOut.DiscreteActions;
if (Input.GetAxis("Vertical") > 0)
{
OutDiscreteActions[0] = 1;
}
else if (Input.GetAxis("Vertical") < 0)
{
OutDiscreteActions[0] = 2;
}
if (Input.GetAxis("Horizontal") > 0)
{
OutDiscreteActions[1] = 1;
}
else if (Input.GetAxis("Horizontal") < 0)
{
OutDiscreteActions[1] = 2;
}
}
//挨着墙就惩罚返回,我的这个项目需要
private void OnCollisionEnter2D(Collision2D collision)
{
if (collision.gameObject.CompareTag("Wall"))
{
AddReward(-20f * (100 - hp));
hp -= 20f;
transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
}
}
}
player 上面的组件有
enemy的脚本
using System.Collections;
using System.Collections.Generic;
using UnityEngine;
public class EnemyController : MonoBehaviour
{
public Transform transformPlayer;
public Transform PlayerGen;
public float timer = 0f;
// Start is called before the first frame update
void Start()
{
}
public void Enable()
{
PlayerGen = transform.parent.parent.Find("PlayerGen");
transformPlayer = PlayerGen.GetChild(Random.Range(0, PlayerGen.childCount));
timer = 0f;
}
// Update is called once per frame
void FixedUpdate()
{
transform.position = Vector2.MoveTowards(transform.position, transformPlayer.position, 0.3f * Time.fixedDeltaTime);
var collider2Ds = Physics2D.OverlapCircleAll(transform.position, 0.75f, 1 << LayerMask.NameToLayer("Player"));
for (int i = 0; i < collider2Ds.Length; i++)
{
if (collider2Ds[i].gameObject.CompareTag("Player"))
{
timer += Time.deltaTime / collider2Ds.Length;
if (timer > 3f)
{
EnemyGEN.instance.pool.Release(gameObject);
timer = 0f;
}
}
}
}
}
场景:
结果训练中未出,结果和代码调整后续更新
本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
二维码