ML-Agents与训练达不到目的AI的斗争史-如何用unity训练一个类吸血鬼幸存者自动躲避AI(探讨,暂时非成功经验)1.0

问题:如何用unity训练一个类吸血鬼幸存者自动躲避AI。

我的想法:

       应该抓住问题的根源解决:类吸血鬼幸存者游戏的躲避的目的是使血量维持一个健康值,所以我的逻辑是对训练的AI所有奖励(AddRewards)进行与血量是否健康一个强关联,简而言之是不管怎么加奖励都会乘以一个血量当前的百分比,只要当前血量降低,就会减少全局的奖励,但类吸血鬼幸存者游戏有回血机制,所以我们也要在训练AI原型时简单加入,简化加血的机制,每当一定时间后如果血量还是大于0就加血10-30随机(符合rouguelike游戏回血本质),间接增加全局的奖励和存活时间。当前血量小于0,则给与一个很大的警告(惩罚力度十分大,警告ai不要让此事再次发生)。游戏的表层设计原型就是这样,下面是代码如何实现游戏的表层设计原型。

        

using System.Collections;
using System.Collections.Generic;
using UnityEngine;
using Unity.MLAgents;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
//挂在玩家身上
public class PlayerAgent : Agent
{
    float timer = 1f;

    public float hp = 0;
    float timer_HP = 20f;
    private Vector3 lastPosition;
//AI玩家第一次启动此脚本调用一次
    public override void Initialize()
    {
        transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
    }
//AI玩家收集环境中自己的血量变化,与全局血量相对应
    public override void CollectObservations(VectorSensor sensor)
    {
        sensor.AddObservation(hp);
    }
//AI玩家第一次启动此脚本和调用EndEpisode()时调用一次
    public override void OnEpisodeBegin()
    {
        hp = 100;
        transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
    }
//AI玩家每决策一次行动调用一次类似有输入参数的fixedupdate,参数为随机决策的离散变量,会根据奖励变化
    public override void OnActionReceived(ActionBuffers actionBuffers)
    {
//hp>100 回到100
        if (hp > 100f) hp = 100f;
//当前血量的百分比
        float a = hp / 100f;
        var collider2Ds = Physics2D.OverlapCircleAll(transform.position, 0.6f);
        foreach (var item in collider2Ds)
        {
            if (item.CompareTag("Enemy"))
            {
//当AI玩家与敌人碰撞挨在一起时进行扣血扣奖励
                if (Vector2.Distance(item.transform.position, transform.position) <= 0.3f)
                {
                    hp -= 0.1f;
                    AddReward(-1 * (10 - hp/10));
                }
//当敌人进入ai玩家攻击范围却没挨在一起时进行加血血扣奖励
                else
                {
                    AddReward(0.1f * a);
                }
            }
        }
        timer -= Time.fixedDeltaTime;
        timer_HP -= Time.fixedDeltaTime;
//当AI玩家每秒移动的距离大于一定值时给与奖励,鼓励ai多运动多活跃多尝试,主要在前期鼓励ai踏出第一步,不然可能遇到各种惩罚减奖励,动与不动都一样甚至不如不动,就一直挂机摆烂,没有了后续
        if (timer < 0f)
        {
            var currentPosition = transform.position;
            if (Vector3.Distance(currentPosition, lastPosition) > 0.8f)
            {
                AddReward(10f * a);
            }
            else
            {
                AddReward(10f * Vector3.Distance(currentPosition, lastPosition) * a);
            }
            lastPosition = currentPosition;
            timer = 1f;
        }
//hp<0 正常游戏中游戏结束信号,应该给与警告大惩罚,然后重新随机环境尝试
        if (hp <= 0)
        {
            AddReward(-10000f);
            EndEpisode();
        }
//hp>0 正常游戏中游戏继续信号,应该给与奖励回血,延迟游戏时间
        else if (hp > 0 && timer_HP < 0)
        {
            hp += Random.Range(10, 30);
            timer_HP = 60f;
            AddReward(10f * a);
        }
//以下是随机移动逻辑处理
        var dirToGo = Vector3.zero;
        var rotateDir = Vector3.zero;
        var forwardAxis = actionBuffers.DiscreteActions[0];
        var rightAxis = actionBuffers.DiscreteActions[1];
        switch (forwardAxis)
        {
            case 1:
                dirToGo += Vector3.up * 1f;

                break;
            case 2:
                dirToGo += Vector3.up * -1f;
                break;
        }

        switch (rightAxis)
        {
            case 1:
                dirToGo += Vector3.right * 1f;
                break;
            case 2:
                dirToGo += Vector3.right * -1f;
                break;
        }
        transform.Translate(dirToGo * Time.fixedDeltaTime * 1f, Space.World);
    }
//真人操作,启发ai用,需要一个脚本才能记录
    public override void Heuristic(in ActionBuffers actionsOut)
    {
        var OutDiscreteActions = actionsOut.DiscreteActions;
        if (Input.GetAxis("Vertical") > 0)
        {
            OutDiscreteActions[0] = 1;
        }
        else if (Input.GetAxis("Vertical") < 0)
        {
            OutDiscreteActions[0] = 2;
        }

        if (Input.GetAxis("Horizontal") > 0)
        {
            OutDiscreteActions[1] = 1;
        }
        else if (Input.GetAxis("Horizontal") < 0)
        {
            OutDiscreteActions[1] = 2;
        }
    }
//挨着墙就惩罚返回,我的这个项目需要
    private void OnCollisionEnter2D(Collision2D collision)
    {
        if (collision.gameObject.CompareTag("Wall"))
        {
            AddReward(-20f * (100 - hp));
            hp -= 20f;
            transform.localPosition = new Vector3(Random.Range(-2f, 2f), Random.Range(-2f, 2f), 0);
        }
    }
}

player 上面的组件有

enemy的脚本

using System.Collections;
using System.Collections.Generic;
using UnityEngine;

public class EnemyController : MonoBehaviour
{
    public Transform transformPlayer;
    public Transform PlayerGen;
    public float timer = 0f;
    // Start is called before the first frame update
    void Start()
    {

    }
    public void Enable()
    {
        PlayerGen = transform.parent.parent.Find("PlayerGen");
        transformPlayer = PlayerGen.GetChild(Random.Range(0, PlayerGen.childCount));
        timer = 0f;
    }

    // Update is called once per frame
    void FixedUpdate()
    {
        transform.position = Vector2.MoveTowards(transform.position, transformPlayer.position, 0.3f * Time.fixedDeltaTime);
        var collider2Ds = Physics2D.OverlapCircleAll(transform.position, 0.75f, 1 << LayerMask.NameToLayer("Player"));
        for (int i = 0; i < collider2Ds.Length; i++)
        {
            if (collider2Ds[i].gameObject.CompareTag("Player"))
            {
                timer += Time.deltaTime / collider2Ds.Length;
                if (timer > 3f)
                {
                    EnemyGEN.instance.pool.Release(gameObject);
                    timer = 0f;
                }
            }
        }
    }
}

场景:

 结果训练中未出,结果和代码调整后续更新

本图文内容来源于网友网络收集整理提供,作为学习参考使用,版权属于原作者。
THE END
分享
二维码
< <上一篇
下一篇>>