Data Flow Block Foundation

Keywords: C# Programming

> Back to C#Concurrent Programming

1. Introduction

TPL dataflow libraries are powerful enough to create meshes and piplelines and send data asynchronously.

Main namespace: System.Threading.Tasks.Dataflow

2. Linked Data Flow Blocks

When creating a grid, data stream blocks need to be connected to each other.

public static void LinkBlockRun()
{
    System.Console.WriteLine("Building Block link.");
    TransformBlock<int, int> multiplyBlock = new TransformBlock<int, int>(item =>
    {
        System.Console.WriteLine("first block.");
        Thread.Sleep(500);
        return item * 2;
    });
    var subtractBlock = new TransformBlock<int, int>(item =>
    {
        System.Console.WriteLine("last block.");
        Thread.Sleep(500);
        return item - 2;
    });
    var options = new DataflowLinkOptions
    {
        PropagateCompletion = true
    };
    multiplyBlock.LinkTo(subtractBlock, options);

    System.Console.WriteLine("Builded Block link.");

    var task = Task.Run(async () =>
    {
        System.Console.WriteLine("Posting");

        for (int i = 0; i < 3; i++)
        {
            multiplyBlock.Post(i);
        }

        System.Console.WriteLine("Posted");

        // The completion of the first block is automatically passed to the second block. 
        // It is not valid to Post after Complete
        multiplyBlock.Complete();

        await multiplyBlock.Completion;
        // Links exhausted
        System.Console.WriteLine("Block link Ended.");
    });

    task.Wait();
}

The output is:

Building Block link.
Builded Block link.
Posting
Posted
first block.
first block.
last block.
first block.
last block.
last block.
Block link Ended.

3. Deliver error message

public static void BlockErrorRun()
{
    Task.Run(async () =>
    {
        try
        {
            //Single block exception type
            var block = new TransformBlock<int, int>(item =>
              {
                  if (item == 1)
                      throw new InvalidOperationException("Blech.");
                  return item * 2;
              });
            block.Post(1);
            await block.Completion;

        }
        catch (InvalidOperationException ex)
        {
            System.Console.WriteLine(ex.GetType().Name);
        }

        try
        {
            //Connected block exception type
            var multiplyBlock = new TransformBlock<int, int>(item =>
             {
                 if (item == 1)
                     throw new InvalidOperationException("Blech.");
                 return item * 2;
             });
            var subtractBlock = new TransformBlock<int, int>(item => item - 2);
            multiplyBlock.LinkTo(subtractBlock, new DataflowLinkOptions { PropagateCompletion = true });
            multiplyBlock.Post(1);
            await subtractBlock.Completion;
        }
        catch (AggregateException ex)
        {
            System.Console.WriteLine(ex.GetType().Name);
        }

    }).Wait();
}

The output is:

InvalidOperationException
AggregateException
  • In the simplest case, it is best to pass on the error and wait until the end for one-time processing.
  • For more complex grids, each data flow block needs to be checked after the data flow is complete.

4. Disconnect Links

public static void BlockDisposeRun()
{
    var multiplyBlock = new TransformBlock<int, int>(item =>
    {
        System.Console.WriteLine("first block.");
        Thread.Sleep(500);
        return item * 2;
    });
    var subtractBlock = new TransformBlock<int, int>(item =>
    {
        System.Console.WriteLine("last block.");
        Thread.Sleep(500);
        return item - 2;
    });

    IDisposable link = multiplyBlock.LinkTo(subtractBlock);
    multiplyBlock.Post(1);
    multiplyBlock.Post(2);
    // Disconnect a data stream block.
    // In the previous code, the data may or may not have been passed through the link. 
    // In practice, consider using code blocks instead of calling Dispose. 
    link.Dispose();
    Thread.Sleep(1200);
}

The output is:

first block.
first block.

5. Limit traffic

Use the BoundedCapacity property of the data flow block to limit the throttling of the target block.The default setting for BoundedCapacity is DataflowBlockOptions.Unbounded

Solve the following issues:

  • Prevent data from being too fast, causing the first target block to buffer all data before it has time to process it
public static void BlockBoundedCapacityRun()
{
    var sourceBlock = new BufferBlock<int>();
    var options = new DataflowBlockOptions
    {
        BoundedCapacity = 10
        //BoundedCapacity = DataflowBlockOptions.Unbounded
    };
    var targetBlockA = new BufferBlock<int>(options);
    var targetBlockB = new BufferBlock<int>(options);
    sourceBlock.LinkTo(targetBlockA);
    sourceBlock.LinkTo(targetBlockB);

    for (int i = 0; i < 31; i++)
    {
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} Post:{i % 10}");
        sourceBlock.Post(i % 10);
    }
    //Inject 31 droplets into the water pipe
    //Due to branch restrictions, targetBlockA and targetBlockB each get 10 water droplets
    var task = Task.Run(() =>
    {
        int i = 0;

        System.Console.WriteLine("Pre-processing targetBlockA Water droplets, which are recycled here to drain, but cannot be picked up targetBlockB Water droplets in");
        do
        {
            IList<int> res;
            if (targetBlockA.TryReceiveAll(out res))
            {
                i += res.Count;
                System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} RevcA:{string.Join(",", res)} {i}");
            }
            else
            {
                break;
            }
            Thread.Sleep(100);
        } while (true);

        i = 0;

        System.Console.WriteLine("Handle targetBlockB Water droplets, only buffered water droplets left");
        do
        {
            IList<int> res;
            if (targetBlockB.TryReceiveAll(out res))
            {
                i += res.Count;
                System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} RevcB:{string.Join(",", res)} {i}");
            }
            else
            {
                break;
            }
            Thread.Sleep(100);
        } while (true);
    });

    task.Wait();
}

The output is:

40:28.026 Post:0
40:28.038 Post:1
40:28.038 Post:2
40:28.038 Post:3
40:28.038 Post:4
40:28.038 Post:5
40:28.038 Post:6
40:28.038 Post:7
40:28.038 Post:8
40:28.038 Post:9
40:28.038 Post:0
40:28.038 Post:1
40:28.038 Post:2
40:28.038 Post:3
40:28.038 Post:4
40:28.038 Post:5
40:28.038 Post:6
40:28.038 Post:7
40:28.038 Post:8
40:28.038 Post:9
40:28.038 Post:0
40:28.038 Post:1
40:28.038 Post:2
40:28.038 Post:3
40:28.038 Post:4
40:28.038 Post:5
40:28.038 Post:6
40:28.038 Post:7
40:28.038 Post:8
40:28.038 Post:9
40:28.038 Post:0
 First process the water droplets from targetBlockA, where recycling dries the water, but no water droplets from targetBlockB are received
40:28.043 RevcA:0,1,2,3,4,5,6,7,8,9 10
40:28.149 RevcA:0,1,2,3,4,5,6,7,8,9 20
40:28.249 RevcA:0 21
 Handle the targetBlockB droplets, leaving only the buffered droplets
40:28.350 RevcB:0,1,2,3,4,5,6,7,8,9 10

Flow Limiting Example: You can set the BoundedCapacity property of a data flow block when filling a data flow grid with data operated on by I/O.This way, the grid will not read too much I/O data or cache all the data when it is too late to process the data.

6. Parallel processing of data stream blocks

public static void BlockParalleRun()
{
    var multiplyBlock = new TransformBlock<int, int>(
    item =>
    {
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} first block.");
        Thread.Sleep(100);
        return item * 2;
    },
    new ExecutionDataflowBlockOptions
    {
        MaxDegreeOfParallelism = DataflowBlockOptions.Unbounded
    }
    );
    var subtractBlock = new TransformBlock<int, int>(item =>
    {
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} last block.");
        Thread.Sleep(100);
        return item - 2;
    });
    multiplyBlock.LinkTo(subtractBlock, new DataflowLinkOptions { PropagateCompletion = true });

    var task = Task.Run(async () =>
    {
        for (int i = 0; i < 7; i++)
        {
            multiplyBlock.Post(i);
        }

        multiplyBlock.Complete();
        await multiplyBlock.Completion;

        var tk = Task.Run(() =>
        {
            IList<int> recvResList;
            //Delay here to get all data for TryReceiveAll to prevent subtractBlock from still not receiving data
            Thread.Sleep(1500);
            if (subtractBlock.TryReceiveAll(out recvResList))
            {
                System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} Revc {string.Join(",", recvResList)}.");
            }
            else
            {
                System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} Revc null.");
            }
        });
        await tk;
        // The call to multiplyBlock has been completed and the completion state of subtractBlock depends on the Link parameter PropagateCompletion
        await subtractBlock.Completion;
    });
    task.Wait();
}

The output is:

44:16.023 first block.
44:16.023 first block.
44:16.023 first block.
44:16.023 first block.
44:16.023 first block.
44:16.023 first block.
44:16.023 first block.
44:16.146 last block.
44:16.250 last block.
44:16.351 last block.
44:16.452 last block.
44:16.552 last block.
44:16.652 last block.
44:16.753 last block.
44:17.656 Revc -2,0,2,4,6,8,10.

The real challenge: finding out which data stream blocks need parallel processing

7. Create a custom data flow block

public static void BlockCustomRun()
{
    var block = CreateMyCustomBlock();
    for (int i = 0; i < 7; i++)
    {
        block.Post(i);//target
    }
    var task = Task.Run(async () =>
    {
        var tk = Task.Run(() =>
        {
            List<int> recvResList = new List<int>();
            //Delay here to get all data for TryReceiveAll to prevent subtractBlock from still not receiving data

            while (true)
            {
                try
                {
                    var recvRes = block.Receive();//source
                    recvResList.Add(recvRes);
                }
                catch (System.InvalidOperationException)
                {
                    break;
                }
            }
            Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} Revc {string.Join(",", recvResList)}.");
        });
        block.Complete();//target
        await block.Completion;//source
        await tk;
    });
    task.Wait();
}

static IPropagatorBlock<int, int> CreateMyCustomBlock()
{
    var multiplyBlock = new TransformBlock<int, int>(item =>
    {
        int res = item * 2;
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} first block {res}.");
        Thread.Sleep(100);
        return res;
    });
    var addBlock = new TransformBlock<int, int>(item =>
    {
        int res = item + 2;
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} next block {res}.");
        Thread.Sleep(100);
        return res;
    });
    var divideBlock = new TransformBlock<int, int>(item =>
    {
        int res = item / 2;
        System.Console.WriteLine($"{DateTime.Now.ToString("mm:ss.fff")} last block {res}.");
        Thread.Sleep(100);
        return res;
    });
    var flowCompletion = new DataflowLinkOptions { PropagateCompletion = true };
    multiplyBlock.LinkTo(addBlock, flowCompletion);
    addBlock.LinkTo(divideBlock, flowCompletion);
    return DataflowBlock.Encapsulate(multiplyBlock, divideBlock);
}

The output is:

45:00.528 first block 0.
45:00.639 first block 2.
45:00.641 next block 2.
45:00.739 first block 4.
45:00.746 next block 4.
45:00.747 last block 1.
45:00.844 first block 6.
45:00.847 next block 6.
45:00.848 last block 2.
45:00.947 first block 8.
45:00.951 next block 8.
45:00.951 last block 3.
45:01.049 first block 10.
45:01.055 next block 10.
45:01.056 last block 4.
45:01.152 first block 12.
45:01.159 next block 12.
45:01.160 last block 5.
45:01.264 next block 14.
45:01.265 last block 6.
45:01.365 last block 7.
45:01.472 Revc 1,2,3,4,5,6,7.

DataflowBlock.Encapsulate only encapsulates a grid of one input block and one output block.If a reusable grid has multiple inputs or outputs, it should be encapsulated in a custom object

Posted by animedls on Fri, 31 Jan 2020 19:38:50 -0800