mirror of https://github.com/dapr/dotnet-sdk.git
Added extension for supporting parallel processing with maximum concurrency without the developer having to figure this out themselves
Signed-off-by: Whit Waldo <whit.waldo@innovian.net>
This commit is contained in:
parent
384c6aee4e
commit
77ec6ec48c
|
@ -0,0 +1,148 @@
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// Copyright 2025 The Dapr Authors
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.Linq;
|
||||||
|
using System.Threading.Tasks;
|
||||||
|
|
||||||
|
namespace Dapr.Workflow;
|
||||||
|
|
||||||
|
/// <summary>
|
||||||
|
/// Extension methods for <see cref="WorkflowContext"/> that provide high-level parallel processing primitives
|
||||||
|
/// with controlled concurrency.
|
||||||
|
/// </summary>
|
||||||
|
public static class ParallelExtensions
|
||||||
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Processes a collection of inputs in parallel with controlled concurrency using a streaming execution model.
|
||||||
|
/// </summary>
|
||||||
|
/// <typeparam name="TInput">The type of input items to process.</typeparam>
|
||||||
|
/// <typeparam name="TResult">The type of result items returned by the task factory.</typeparam>
|
||||||
|
/// <param name="context">The orchestration context.</param>
|
||||||
|
/// <param name="inputs">The collection of inputs to process in parallel.</param>
|
||||||
|
/// <param name="taskFactory">
|
||||||
|
/// A function that creates a task for each input item. This function is called in the orchestration context
|
||||||
|
/// to ensure all tasks are properly tracked by the durable task framework.
|
||||||
|
/// </param>
|
||||||
|
/// <param name="maxConcurrency">
|
||||||
|
/// The maximum number of tasks to execute concurrently. Defaults to 5 if not specified.
|
||||||
|
/// Must be greater than 0.
|
||||||
|
/// </param>
|
||||||
|
/// <returns>
|
||||||
|
/// A task that completes when all input items have been processed. The result is an array containing
|
||||||
|
/// the results in the same order as the input collection.
|
||||||
|
/// </returns>
|
||||||
|
/// <exception cref="ArgumentNullException">
|
||||||
|
/// Thrown when <paramref name="context"/>, <paramref name="inputs"/>, or <paramref name="taskFactory"/> is null.
|
||||||
|
/// </exception>
|
||||||
|
/// <exception cref="ArgumentOutOfRangeException">
|
||||||
|
/// Thrown when <paramref name="maxConcurrency"/> is less than or equal to 0.
|
||||||
|
/// </exception>
|
||||||
|
/// <exception cref="AggregateException">
|
||||||
|
/// Thrown when one or more tasks fail during execution. All task exceptions are collected and wrapped.
|
||||||
|
/// </exception>
|
||||||
|
/// <remarks>
|
||||||
|
/// <para>
|
||||||
|
/// This method uses a streaming execution model that maintains constant memory usage regardless of input size.
|
||||||
|
/// Only <paramref name="maxConcurrency"/> tasks are active at any given time, with new tasks started as
|
||||||
|
/// existing ones complete. This provides optimal resource utilization and prevents memory issues with large datasets.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// The method is fully deterministic for durable task orchestrations. All tasks are created in the orchestration
|
||||||
|
/// context before any coordination logic begins, ensuring proper replay behavior. The framework records history
|
||||||
|
/// events for each task creation, and during replay, all tasks complete immediately with their recorded results.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// If any task fails, the method will wait for all currently executing tasks to complete before throwing an
|
||||||
|
/// <see cref="AggregateException"/> containing all failures.
|
||||||
|
/// </para>
|
||||||
|
/// <para>
|
||||||
|
/// Example usage:
|
||||||
|
/// <code>
|
||||||
|
/// var orderIds = new[] { "order1", "order2", "order3", "order4", "order5" };
|
||||||
|
/// var results = await context.ProcessInParallelAsync(
|
||||||
|
/// orderIds,
|
||||||
|
/// orderId => context.CallActivityAsync<OrderResult>("ProcessOrder", orderId),
|
||||||
|
/// maxConcurrency: 3);
|
||||||
|
/// </code>
|
||||||
|
/// </para>
|
||||||
|
/// </remarks>
|
||||||
|
public static async Task<TResult[]> ProcessInParallelAsync<TInput, TResult>(
|
||||||
|
this WorkflowContext context,
|
||||||
|
IEnumerable<TInput> inputs,
|
||||||
|
Func<TInput, Task<TResult>> taskFactory,
|
||||||
|
int maxConcurrency = 5)
|
||||||
|
{
|
||||||
|
ArgumentNullException.ThrowIfNull(context);
|
||||||
|
ArgumentNullException.ThrowIfNull(inputs);
|
||||||
|
ArgumentNullException.ThrowIfNull(taskFactory);
|
||||||
|
if (maxConcurrency <= 0)
|
||||||
|
throw new ArgumentOutOfRangeException(nameof(maxConcurrency), "Max concurrency must be greater than 0.");
|
||||||
|
|
||||||
|
var inputList = inputs.ToList();
|
||||||
|
if (inputList.Count == 0)
|
||||||
|
return [];
|
||||||
|
|
||||||
|
var results = new TResult[inputList.Count];
|
||||||
|
var inFlightTasks = new Dictionary<Task<TResult>, int>(); // Task -> result index
|
||||||
|
var inputIndex = 0;
|
||||||
|
var completedCount = 0;
|
||||||
|
var exceptions = new List<Exception>();
|
||||||
|
|
||||||
|
// Start initial batch up to maxConcurrency
|
||||||
|
while (inputIndex < inputList.Count && inFlightTasks.Count < maxConcurrency)
|
||||||
|
{
|
||||||
|
var task = taskFactory(inputList[inputIndex]);
|
||||||
|
inFlightTasks[task] = inputIndex;
|
||||||
|
inputIndex++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process remaining items with streaming execution
|
||||||
|
while (completedCount < inputList.Count)
|
||||||
|
{
|
||||||
|
var completedTask = await Task.WhenAny(inFlightTasks.Keys);
|
||||||
|
var resultIndex = inFlightTasks[completedTask];
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
results[resultIndex] = await completedTask;
|
||||||
|
}
|
||||||
|
catch (Exception ex)
|
||||||
|
{
|
||||||
|
exceptions.Add(ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
inFlightTasks.Remove(completedTask);
|
||||||
|
completedCount++;
|
||||||
|
|
||||||
|
// Start next task if more work remains
|
||||||
|
if (inputIndex < inputList.Count)
|
||||||
|
{
|
||||||
|
var nextTask = taskFactory(inputList[inputIndex]);
|
||||||
|
inFlightTasks[nextTask] = inputIndex;
|
||||||
|
inputIndex++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If any exceptions occurred, throw them as an aggregate
|
||||||
|
if (exceptions.Count > 0)
|
||||||
|
{
|
||||||
|
throw new AggregateException(
|
||||||
|
$"One or more tasks failed during parallel processing. {exceptions.Count} out of {inputList.Count} tasks failed.",
|
||||||
|
exceptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue