Add an 'Evaluate' RL endpoint, intended for dynamic game balancing

This adds a new endpoint to the RL interface for evaluating custom JavaScript. When combined with the ability to create arbitrary modifiers, this provides the required functionality for exploring quantitative game balancing. Patch By: irishninja Fixes #5981 Differential Revision: https://code.wildfiregames.com/D3479 This was SVN commit r24962.
2021-02-28 12:16:32 +00:00 · 2021-02-28 12:16:32 +00:00 · df18102e96
commit df18102e96
parent 2e2ef6f3fe
7 changed files with 137 additions and 15 deletions
--- a/source/rlinterface/RLInterface.cpp
+++ b/source/rlinterface/RLInterface.cpp
@ -62,7 +62,7 @@ std::string Interface::SendGameMessage(GameMessage&& msg)
 	ENSURE(m_GameMessage.type == GameMessageType::None);
 	m_GameMessage = std::move(msg);
 	m_MsgApplied.wait(msgLock, [this]() { return m_GameMessage.type == GameMessageType::None; });
-	return m_GameState;
+	return m_ReturnValue;
 }

 std::string Interface::Step(std::vector<GameCommand>&& commands)
@ -78,6 +78,13 @@ std::string Interface::Reset(ScenarioConfig&& scenario)
 	return SendGameMessage({ GameMessageType::Reset });
 }

+std::string Interface::Evaluate(std::string&& code)
+{
+	std::lock_guard<std::mutex> lock(m_Lock);
+	m_Code = std::move(code);
+	return SendGameMessage({ GameMessageType::Evaluate });
+}
+
 std::vector<std::string> Interface::GetTemplates(const std::vector<std::string>& names) const
 {
 	std::lock_guard<std::mutex> lock(m_Lock);
@ -192,6 +199,30 @@ void* Interface::MgCallback(mg_event event, struct mg_connection *conn, const st
 			else
 				stream << gameState.c_str();
 		}
+		else if (uri == "/evaluate")
+		{
+			if (!interface->IsGameRunning())
+			{
+				mg_printf(conn, "%s", notRunningResponse);
+				return handled;
+			}
+
+			std::string code = GetRequestContent(conn);
+			if (code.empty())
+			{
+				mg_printf(conn, "%s", noPostData);
+				return handled;
+			}
+
+			const std::string codeResult = interface->Evaluate(std::move(code));
+			if (codeResult.empty())
+			{
+				mg_printf(conn, "%s", notRunningResponse);
+				return handled;
+			}
+			else
+				stream << codeResult.c_str();
+		}
 		else if (uri == "/templates")
 		{
 			if (!interface->IsGameRunning()) {
@ -274,7 +305,7 @@ void Interface::TryApplyMessage()
 	const bool isGameStarted = g_Game && g_Game->IsGameStarted();
 	if (m_NeedsGameState && isGameStarted)
 	{
-		m_GameState = GetGameState();
+		m_ReturnValue = GetGameState();
 		m_MsgApplied.notify_one();
 		m_MsgLock.unlock();
 		m_NeedsGameState = false;
@ -318,7 +349,7 @@ void Interface::ApplyMessage(const GameMessage& msg)
 			{
 				LDR_NonprogressiveLoad();
 				ENSURE(g_Game->ReallyStartGame() == PSRETURN_OK);
-				m_GameState = GetGameState();
+				m_ReturnValue = GetGameState();
 				m_MsgApplied.notify_one();
 				m_MsgLock.unlock();
 			}
@ -342,7 +373,7 @@ void Interface::ApplyMessage(const GameMessage& msg)
 		{
 			if (!g_Game)
 			{
-				m_GameState = EMPTY_STATE;
+				m_ReturnValue = EMPTY_STATE;
 				m_MsgApplied.notify_one();
 				m_MsgLock.unlock();
 				return;
@ -368,11 +399,29 @@ void Interface::ApplyMessage(const GameMessage& msg)
 			else
 				g_Game->Update(deltaRealTime);

-			m_GameState = GetGameState();
+			m_ReturnValue = GetGameState();
 			m_MsgApplied.notify_one();
 			m_MsgLock.unlock();
 			break;
 		}
+        case GameMessageType::Evaluate:
+        {
+            if (!g_Game)
+            {
+                m_ReturnValue = EMPTY_STATE;
+                m_MsgApplied.notify_one();
+                m_MsgLock.unlock();
+                return;
+            }
+            const ScriptInterface& scriptInterface = g_Game->GetSimulation2()->GetScriptInterface();
+            ScriptRequest rq(scriptInterface);
+            JS::RootedValue ret(rq.cx);
+            scriptInterface.Eval(m_Code.c_str(), &ret);
+            m_ReturnValue = scriptInterface.StringifyJSON(&ret, false);
+            m_MsgApplied.notify_one();
+            m_MsgLock.unlock();
+            break;
+        }
 		default:
 		break;
 	}
--- a/source/rlinterface/RLInterface.h
+++ b/source/rlinterface/RLInterface.h
@ -1,4 +1,4 @@
-/* Copyright (C) 2020 Wildfire Games.
+/* Copyright (C) 2021 Wildfire Games.
 * This file is part of 0 A.D.
 *
 * 0 A.D. is free software: you can redistribute it and/or modify
@ -45,6 +45,7 @@ enum class GameMessageType
 	None,
 	Reset,
 	Commands,
+	Evaluate,
 };

 /**
@ -71,9 +72,9 @@ struct GameMessage
 * Flow of data (with the interface active):
 *  0. The game/main thread calls TryApplyMessage()
 *    - If no messages are pending, GOTO 0 (the simulation is not advanced).
- *  1. TryApplyMessage locks m_MsgLock, pulls the message, processes it, advances the simulation, and sets m_GameState.
+ *  1. TryApplyMessage locks m_MsgLock, pulls the message, processes it, advances the simulation, and sets m_ReturnValue.
 *  2. TryApplyMessage notifies the RL thread that it can carry on and unlocks m_MsgLock. The main thread carries on frame rendering and goes back to 0.
- *  3. The RL thread locks m_MsgLock, reads m_GameState, unlocks m_MsgLock, and sends the gamestate as HTTP Response to the RL client.
+ *  3. The RL thread locks m_MsgLock, reads m_ReturnValue, unlocks m_MsgLock, and sends the gamestate as HTTP Response to the RL client.
 *	4. The client processes the response and ultimately sends a new HTTP message to the RL Interface.
 *  5. The RL thread locks m_MsgLock, pushes the message, and starts waiting on the game/main thread to notify it (step 2).
 *   - GOTO 0.
@ -86,7 +87,7 @@ public:

 	/**
 	 * Non-blocking call to process any pending messages from the RL client.
-	 * Updates m_GameState to the gamestate after messages have been processed.
+	 * Updates m_ReturnValue to the gamestate after messages have been processed.
 	 */
 	void TryApplyMessage();

@ -106,6 +107,12 @@ private:
 	 */
 	std::string Reset(ScenarioConfig&& scenario);

+	/**
+	 * Evaluate JS code in the engine such as applying arbitrary modifiers.
+	 * @return the gamestate after script evaluation.
+	 */
+	std::string Evaluate(std::string&& code);
+
 	/**
 	 * @return template data for all templates of @param names.
 	 */
@ -131,7 +138,7 @@ private:

 	/**
 	 * Process any pending messages from the RL client.
-	 * Updates m_GameState to the gamestate after messages have been processed.
+	 * Updates m_ReturnValue to the gamestate after messages have been processed.
 	 */
 	void ApplyMessage(const GameMessage& msg);

@ -144,12 +151,13 @@ private:
 private:
 	GameMessage m_GameMessage;
 	ScenarioConfig m_ScenarioConfig;
-	std::string m_GameState;
+	std::string m_ReturnValue;
 	bool m_NeedsGameState = false;

 	mutable std::mutex m_Lock;
 	std::mutex m_MsgLock;
 	std::condition_variable m_MsgApplied;
+	std::string m_Code;
 };

 }
--- a/source/tools/rlclient/python/tests/fastactions.js
+++ b/source/tools/rlclient/python/tests/fastactions.js
@ -0,0 +1,10 @@
+let cmpPlayerManager = Engine.QueryInterface(SYSTEM_ENTITY, IID_PlayerManager);
+let playerEnt = cmpPlayerManager.GetPlayerByID('1');
+let cmpModifiersManager = Engine.QueryInterface(SYSTEM_ENTITY, IID_ModifiersManager);
+cmpModifiersManager.AddModifiers("cheat/fastactions", {
+	"Cost/BuildTime": [{ "affects": [["Structure"], ["Unit"]], "multiply": 0.01 }],
+	"ResourceGatherer/BaseSpeed": [{ "affects": [["Structure"], ["Unit"]], "multiply": 1000 }],
+	"Pack/Time": [{ "affects": [["Structure"], ["Unit"]], "multiply": 0.01 }],
+	"Upgrade/Time": [{ "affects": [["Structure"], ["Unit"]], "multiply": 0.01 }],
+	"ProductionQueue/TechCostMultiplier/time": [{ "affects": [["Structure"], ["Unit"]], "multiply": 0.01 }]
+}, playerEnt);
--- a/source/tools/rlclient/python/tests/test_actions.py
+++ b/source/tools/rlclient/python/tests/test_actions.py
@ -78,15 +78,17 @@ def test_walk():

 def test_attack():
    state = game.reset(config)
-    units = state.units(owner=1, type='cavalry')
+    unit = state.units(owner=1, type='cavalry')[0]
    target = state.units(owner=2, type='female_citizen')[0]
-    initial_health = target.health()
+    initial_health_target = target.health()
+    initial_health_unit = unit.health()

    state = game.step([zero_ad.actions.reveal_map()])

-    attack = zero_ad.actions.attack(units, target)
+    attack = zero_ad.actions.attack([unit], target)
    state = game.step([attack])
-    while state.unit(target.id()).health() >= initial_health:
+    while (state.unit(target.id()).health() >= initial_health_target
+        ) and (state.unit(unit.id()).health() >= initial_health_unit):
        state = game.step()

 def test_chat():
--- a/source/tools/rlclient/python/tests/test_evaluate.py
+++ b/source/tools/rlclient/python/tests/test_evaluate.py
@ -0,0 +1,46 @@
+import zero_ad
+import json
+import math
+from os import path
+
+game = zero_ad.ZeroAD('http://localhost:6000')
+scriptdir = path.dirname(path.realpath(__file__))
+with open(path.join(scriptdir, '..', 'samples', 'arcadia.json'), 'r') as f:
+    config = f.read()
+
+with open(path.join(scriptdir, 'fastactions.js'), 'r') as f:
+    fastactions = f.read()
+
+def test_return_object():
+    state = game.reset(config)
+    result = game.evaluate('({"hello": "world"})')
+    assert type(result) is dict
+    assert result['hello'] == 'world'
+
+def test_return_null():
+    result = game.evaluate('null')
+    assert result == None
+
+def test_return_string():
+    state = game.reset(config)
+    result = game.evaluate('"cat"')
+    assert result == 'cat'
+
+def test_fastactions():
+    state = game.reset(config)
+    game.evaluate(fastactions)
+    female_citizens = state.units(owner=1, type='female_citizen')
+    house_tpl = 'structures/spart/house'
+    house_count = len(state.units(owner=1, type=house_tpl))
+    x = 680
+    z = 640
+    build_house = zero_ad.actions.construct(female_citizens, house_tpl, x, z, autocontinue=True)
+    # Check that they start building the house
+    state = game.step([build_house])
+    step_count = 0
+    new_house = lambda _=None: state.units(owner=1, type=house_tpl)[0]
+    initial_health = new_house().health(ratio=True)
+    while new_house().health(ratio=True) == initial_health:
+        state = game.step()
+
+    assert new_house().health(ratio=True) >= 1.0
--- a/source/tools/rlclient/python/zero_ad/api.py
+++ b/source/tools/rlclient/python/zero_ad/api.py
@ -27,3 +27,7 @@ class RLAPI():
        post_data = '\n'.join(names)
        response = self.post('templates', post_data)
        return zip(names, response.decode().split('\n'))
+
+    def evaluate(self, code):
+        response = self.post('evaluate', code)
+        return json.loads(response.decode())
--- a/source/tools/rlclient/python/zero_ad/environment.py
+++ b/source/tools/rlclient/python/zero_ad/environment.py
@ -25,6 +25,9 @@ class ZeroAD():
        self.current_state = GameState(json.loads(state_json), self)
        return self.current_state

+    def evaluate(self, code):
+        return self.api.evaluate(code)
+
    def get_template(self, name):
        return self.get_templates([name])[0]