Thread the pathfinder computations using the task manager.

The pathfinder computations are run asynchronously (and potentially on
the main thread) in-between simulation turns, thus reducing
pathfinder-related lag considerably in common cases.

To make this most efficient, the number of paths computed during a turn
via MaxSameTurnMoves is reduced from 64 to 20.

This has a hard dependency on the obstruction manager (via the vertex
pathfinder) not being modified in-between simulation turn (or to put it
more generally on the simulation state not changing outside of turn
computation), otherwise results will be non-deterministic and go OOS.
This is currently entirely safe (as in, it indeed does not happen that
the simulation state changes in-between turn), but future work towards
improving simulation sandboxing would be good.

Thanks to Kuba386 for maintaining & improving the patch in 2020
Thanks to everyone who tested the various iterations of this patch.

Fixes #4324

Differential Revision: https://code.wildfiregames.com/D14
This was SVN commit r25657.
This commit is contained in:
wraitii 2021-06-03 16:21:28 +00:00
parent 1b35d36daa
commit 0ebc08b13c
5 changed files with 47 additions and 12 deletions

View File

@ -144,6 +144,7 @@
{ "nick": "kingadami", "name": "Adam Winsor" },
{ "nick": "kingbasil", "name": "Giannis Fafalios" },
{ "nick": "Krinkle", "name": "Timo Tijhof" },
{ "nick": "Kuba386", "name": "Jakub Kośmicki" },
{ "nick": "lafferjm", "name": "Justin Lafferty" },
{ "nick": "Langbart" },
{ "nick": "LeanderH", "name": "Leander Hemelhof" },

View File

@ -2,7 +2,7 @@
<Pathfinder>
<!-- Sets limit on the number of same turns moves we will process -->
<!-- Setting the value to 0 disable this functionality -->
<MaxSameTurnMoves>64</MaxSameTurnMoves>
<MaxSameTurnMoves>20</MaxSameTurnMoves>
<!-- Multiplier for the distance at which units push each other. -->
<!-- Setting the value to 0 disables unit pushing entirely. -->

View File

@ -59,10 +59,16 @@ void CCmpPathfinder::Init(const CParamNode& UNUSED(paramNode))
m_AtlasOverlay = NULL;
m_VertexPathfinder = std::make_unique<VertexPathfinder>(m_GridSize, m_TerrainOnlyGrid);
size_t workerThreads = Threading::TaskManager::Instance().GetNumberOfWorkers();
// Store one vertex pathfinder for each thread (including the main thread).
while (m_VertexPathfinders.size() < workerThreads + 1)
m_VertexPathfinders.emplace_back(m_GridSize, m_TerrainOnlyGrid);
m_LongPathfinder = std::make_unique<LongPathfinder>();
m_PathfinderHier = std::make_unique<HierarchicalPathfinder>();
// Set up one future for each worker thread.
m_Futures.resize(workerThreads);
// Register Relax NG validator
CXeromyces::AddValidator(g_VFS, "pathfinder", "simulation/data/pathfinder.rng");
@ -75,14 +81,13 @@ void CCmpPathfinder::Init(const CParamNode& UNUSED(paramNode))
// Paths are computed:
// - Before MT_Update
// - Before MT_MotionUnitFormation
// - 'in-between' turns (effectively at the start until threading is implemented).
// - asynchronously between turn end and turn start.
// The latter of these must compute all outstanding requests, but the former two are capped
// to avoid spending too much time there (since the latter are designed to be threaded and thus not block the GUI).
// to avoid spending too much time there (since the latter are threaded and thus much 'cheaper').
// This loads that maximum number (note that it's per computation call, not per turn for now).
const CParamNode pathingSettings = externalParamNode.GetChild("Pathfinder");
m_MaxSameTurnMoves = (u16)pathingSettings.GetChild("MaxSameTurnMoves").ToInt();
const CParamNode::ChildrenMap& passClasses = externalParamNode.GetChild("Pathfinder").GetChild("PassabilityClasses").GetChildren();
for (CParamNode::ChildrenMap::const_iterator it = passClasses.begin(); it != passClasses.end(); ++it)
{
@ -99,6 +104,12 @@ CCmpPathfinder::~CCmpPathfinder() {};
void CCmpPathfinder::Deinit()
{
SetDebugOverlay(false); // cleans up memory
// Wait on all pathfinding tasks.
for (Future<void>& future : m_Futures)
future.Cancel();
m_Futures.clear();
SAFE_DELETE(m_AtlasOverlay);
SAFE_DELETE(m_Grid);
@ -749,7 +760,7 @@ void CCmpPathfinder::ComputePathImmediate(entity_pos_t x0, entity_pos_t z0, cons
WaypointPath CCmpPathfinder::ComputeShortPathImmediate(const ShortPathRequest& request) const
{
return m_VertexPathfinder->ComputeShortPath(request, CmpPtr<ICmpObstructionManager>(GetSystemEntity()));
return m_VertexPathfinders.front().ComputeShortPath(request, CmpPtr<ICmpObstructionManager>(GetSystemEntity()));
}
template<typename T>
@ -785,9 +796,14 @@ void CCmpPathfinder::SendRequestedPaths()
if (!m_LongPathRequests.m_ComputeDone || !m_ShortPathRequests.m_ComputeDone)
{
m_ShortPathRequests.Compute(*this, *m_VertexPathfinder);
// Also start computing on the main thread to finish faster.
m_ShortPathRequests.Compute(*this, m_VertexPathfinders.front());
m_LongPathRequests.Compute(*this, *m_LongPathfinder);
}
// We're done, clear futures.
// Use CancelOrWait instead of just Cancel to ensure determinism.
for (Future<void>& future : m_Futures)
future.CancelOrWait();
{
PROFILE2("PostMessages");
@ -811,8 +827,22 @@ void CCmpPathfinder::StartProcessingMoves(bool useMax)
{
m_ShortPathRequests.PrepareForComputation(useMax ? m_MaxSameTurnMoves : 0);
m_LongPathRequests.PrepareForComputation(useMax ? m_MaxSameTurnMoves : 0);
Threading::TaskManager& taskManager = Threading::TaskManager::Instance();
for (size_t i = 0; i < m_Futures.size(); ++i)
{
ENSURE(!m_Futures[i].Valid());
// Pass the i+1th vertex pathfinder to keep the first for the main thread,
// each thread get its own instance to avoid conflicts in cached data.
m_Futures[i] = taskManager.PushTask([&pathfinder=*this, &vertexPfr=m_VertexPathfinders[i + 1]]() {
PROFILE2("Async pathfinding");
pathfinder.m_ShortPathRequests.Compute(pathfinder, vertexPfr);
pathfinder.m_LongPathRequests.Compute(pathfinder, *pathfinder.m_LongPathfinder);
});
}
}
//////////////////////////////////////////////////////////
bool CCmpPathfinder::IsGoalReachable(entity_pos_t x0, entity_pos_t z0, const PathGoal& goal, pass_class_t passClass)

View File

@ -35,6 +35,7 @@
#include "graphics/Terrain.h"
#include "maths/MathUtil.h"
#include "ps/CLogger.h"
#include "ps/TaskManager.h"
#include "renderer/TerrainOverlay.h"
#include "simulation2/components/ICmpObstructionManager.h"
#include "simulation2/helpers/Grid.h"
@ -94,19 +95,22 @@ public:
GridUpdateInformation m_AIPathfinderDirtinessInformation;
bool m_TerrainDirty;
std::unique_ptr<VertexPathfinder> m_VertexPathfinder;
std::vector<VertexPathfinder> m_VertexPathfinders;
std::unique_ptr<HierarchicalPathfinder> m_PathfinderHier;
std::unique_ptr<LongPathfinder> m_LongPathfinder;
// One per live asynchronous path computing task.
std::vector<Future<void>> m_Futures;
template<typename T>
class PathRequests {
public:
std::vector<T> m_Requests;
std::vector<PathResult> m_Results;
// This is the array index of the next path to compute.
size_t m_NextPathToCompute = 0;
std::atomic<size_t> m_NextPathToCompute = 0;
// This is false until all scheduled paths have been computed.
bool m_ComputeDone = true;
std::atomic<bool> m_ComputeDone = true;
void ClearComputed()
{

View File

@ -113,8 +113,8 @@ private:
};
/**
* If there are several vertex pathfinders running asynchronously, their debug output might conflict.
* To remain thread-safe, this single class will handle the debug data.
* There are several vertex pathfinders running asynchronously, so their debug output
* might conflict. To remain thread-safe, this single class will handle the debug data.
* NB: though threadsafe, the way things are setup means you can have a few
* more graphs and edges than you'd expect showing up in the rendered graph.
*/