Disabled external gits

2022-04-07 18:46:57 +02:00
parent 88cb3426ad
commit 15e7120d6d
5316 changed files with 4563444 additions and 6 deletions
--- a/cs440-acg/ext/tbb/include/index.html
+++ b/cs440-acg/ext/tbb/include/index.html
@@ -0,0 +1,25 @@
+<HTML>
+<BODY>
+
+<H2>Overview</H2>
+Include files for Intel&reg; Threading Building Blocks (Intel&reg; TBB).
+
+<H2>Directories</H2>
+<DL>
+<DT><A HREF="tbb/index.html">tbb</A>
+<DD>Include files for Intel TBB classes and functions.
+<DT><A HREF="serial/tbb/">serial/tbb</A>
+<DD>Include files for a sequential implementation of the parallel_for algorithm.
+</DL>
+
+<HR>
+<A HREF="../index.html">Up to parent directory</A>
+<p></p>
+Copyright &copy; 2005-2020 Intel Corporation.  All Rights Reserved.
+<P></P>
+Intel is a registered trademark or trademark of Intel Corporation
+or its subsidiaries in the United States and other countries.
+<p></p>
+* Other names and brands may be claimed as the property of others.
+</BODY>
+</HTML>
--- a/cs440-acg/ext/tbb/include/serial/tbb/parallel_for.h
+++ b/cs440-acg/ext/tbb/include/serial/tbb/parallel_for.h
@@ -0,0 +1,226 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../../tbb/internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_parallel_for_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_parallel_for_H
+#pragma message("TBB Warning: serial/tbb/parallel_for.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_SERIAL_parallel_for_H
+#define __TBB_SERIAL_parallel_for_H
+
+#include "tbb_annotate.h"
+
+#ifndef __TBB_NORMAL_EXECUTION
+#include "tbb/blocked_range.h"
+#include "tbb/partitioner.h"
+#endif
+
+#if TBB_USE_EXCEPTIONS
+#include <stdexcept>
+#include <string> // required to construct std exception classes
+#else
+#include <cstdlib>
+#include <iostream>
+#endif
+
+namespace tbb {
+namespace serial {
+namespace interface9 {
+
+// parallel_for serial annotated implementation
+
+template< typename Range, typename Body, typename Partitioner >
+class start_for : tbb::internal::no_copy {
+    Range my_range;
+    const Body my_body;
+    typename Partitioner::task_partition_type my_partition;
+    void execute();
+
+    //! Constructor for root task.
+    start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
+        my_range( range ),
+        my_body( body ),
+        my_partition( partitioner )
+    {
+    }
+
+    //! Splitting constructor used to generate children.
+    /** this becomes left child.  Newly constructed object is right child. */
+    start_for( start_for& parent_, typename Partitioner::split_type& split_obj ) :
+        my_range( parent_.my_range, split_obj ),
+        my_body( parent_.my_body ),
+        my_partition( parent_.my_partition, split_obj )
+    {
+    }
+
+public:
+    static void run(  const Range& range, const Body& body, Partitioner& partitioner ) {
+        if( !range.empty() ) {
+            ANNOTATE_SITE_BEGIN( tbb_parallel_for );
+            {
+                start_for a( range, body, partitioner );
+                a.execute();
+            }
+            ANNOTATE_SITE_END( tbb_parallel_for );
+        }
+    }
+};
+
+template< typename Range, typename Body, typename Partitioner >
+void start_for< Range, Body, Partitioner >::execute() {
+    if( !my_range.is_divisible() || !my_partition.is_divisible() ) {
+        ANNOTATE_TASK_BEGIN( tbb_parallel_for_range );
+        {
+            my_body( my_range );
+        }
+        ANNOTATE_TASK_END( tbb_parallel_for_range );
+    } else {
+        typename Partitioner::split_type split_obj;
+        start_for b( *this, split_obj );
+        this->execute(); // Execute the left interval first to keep the serial order.
+        b.execute();     // Execute the right interval then.
+    }
+}
+
+//! Parallel iteration over range with default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for( const Range& range, const Body& body ) {
+    serial::interface9::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
+}
+
+//! Parallel iteration over range with simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
+    serial::interface9::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with auto_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
+    serial::interface9::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with static_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
+    serial::interface9::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with affinity_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
+    serial::interface9::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
+}
+
+//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner (ignored)
+template <typename Index, typename Function, typename Partitioner>
+void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& ) {
+    if (step <= 0 ) {
+#if TBB_USE_EXCEPTIONS
+        throw std::invalid_argument( "nonpositive_step" );
+#else
+        std::cerr << "nonpositive step in a call to parallel_for" << std::endl;
+        std::abort();
+#endif
+    } else if (last > first) {
+        // Above "else" avoids "potential divide by zero" warning on some platforms
+        ANNOTATE_SITE_BEGIN( tbb_parallel_for );
+        for( Index i = first; i < last; i = i + step ) {
+            ANNOTATE_TASK_BEGIN( tbb_parallel_for_iteration );
+            { f( i ); }
+            ANNOTATE_TASK_END( tbb_parallel_for_iteration );
+        }
+        ANNOTATE_SITE_END( tbb_parallel_for );
+    }
+}
+
+//! Parallel iteration over a range of integers with explicit step and default partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, Index step, const Function& f) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
+}
+//! Parallel iteration over a range of integers with explicit step and simple partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& p) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, p);
+}
+//! Parallel iteration over a range of integers with explicit step and auto partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& p) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, p);
+}
+//! Parallel iteration over a range of integers with explicit step and static partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& p) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, p);
+}
+//! Parallel iteration over a range of integers with explicit step and affinity partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& p) {
+    parallel_for_impl(first, last, step, f, p);
+}
+
+//! Parallel iteration over a range of integers with default step and default partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, const Function& f) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
+}
+//! Parallel iteration over a range of integers with default step and simple partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& p) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, p);
+}
+//! Parallel iteration over a range of integers with default step and auto partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& p) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, p);
+}
+//! Parallel iteration over a range of integers with default step and static partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, const Function& f, const static_partitioner& p) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, p);
+}
+//! Parallel iteration over a range of integers with default step and affinity_partitioner
+template <typename Index, typename Function>
+__TBB_DEPRECATED_IN_VERBOSE_MODE void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& p) {
+    parallel_for_impl(first, last, static_cast<Index>(1), f, p);
+}
+
+} // namespace interfaceX
+
+using interface9::parallel_for;
+
+} // namespace serial
+
+#ifndef __TBB_NORMAL_EXECUTION
+using serial::interface9::parallel_for;
+#endif
+
+} // namespace tbb
+
+#endif /* __TBB_SERIAL_parallel_for_H */
--- a/cs440-acg/ext/tbb/include/serial/tbb/tbb_annotate.h
+++ b/cs440-acg/ext/tbb/include/serial/tbb/tbb_annotate.h
@@ -0,0 +1,32 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_annotate_H
+#define __TBB_annotate_H
+
+// Macros used by the Intel(R) Parallel Advisor.
+#ifdef __TBB_NORMAL_EXECUTION
+    #define ANNOTATE_SITE_BEGIN( site )
+    #define ANNOTATE_SITE_END( site )
+    #define ANNOTATE_TASK_BEGIN( task )
+    #define ANNOTATE_TASK_END( task )
+    #define ANNOTATE_LOCK_ACQUIRE( lock )
+    #define ANNOTATE_LOCK_RELEASE( lock )
+#else
+    #include <advisor-annotate.h>
+#endif
+
+#endif /* __TBB_annotate_H */
--- a/cs440-acg/ext/tbb/include/tbb/aggregator.h
+++ b/cs440-acg/ext/tbb/include/tbb/aggregator.h
@@ -0,0 +1,204 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__aggregator_H
+#define __TBB__aggregator_H
+
+#define __TBB_aggregator_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if !TBB_PREVIEW_AGGREGATOR
+#error Set TBB_PREVIEW_AGGREGATOR before including aggregator.h
+#endif
+
+#include "atomic.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+namespace interface6 {
+
+using namespace tbb::internal;
+
+class aggregator_operation {
+    template<typename handler_type> friend class aggregator_ext;
+    uintptr_t status;
+    aggregator_operation* my_next;
+public:
+    enum aggregator_operation_status { agg_waiting=0, agg_finished };
+    aggregator_operation() : status(agg_waiting), my_next(NULL) {}
+    /// Call start before handling this operation
+    void start() { call_itt_notify(acquired, &status); }
+    /// Call finish when done handling this operation
+    /** The operation will be released to its originating thread, and possibly deleted. */
+    void finish() { itt_store_word_with_release(status, uintptr_t(agg_finished)); }
+    aggregator_operation* next() { return itt_hide_load_word(my_next);}
+    void set_next(aggregator_operation* n) { itt_hide_store_word(my_next, n); }
+};
+
+namespace internal {
+
+class basic_operation_base : public aggregator_operation {
+    friend class basic_handler;
+    virtual void apply_body() = 0;
+public:
+    basic_operation_base() : aggregator_operation() {}
+    virtual ~basic_operation_base() {}
+};
+
+template<typename Body>
+class basic_operation : public basic_operation_base, no_assign {
+    const Body& my_body;
+    void apply_body() __TBB_override { my_body(); }
+public:
+    basic_operation(const Body& b) : basic_operation_base(), my_body(b) {}
+};
+
+class basic_handler {
+public:
+    basic_handler() {}
+    void operator()(aggregator_operation* op_list) const {
+        while (op_list) {
+            // ITT note: &(op_list->status) tag is used to cover accesses to the operation data.
+            // The executing thread "acquires" the tag (see start()) and then performs
+            // the associated operation w/o triggering a race condition diagnostics.
+            // A thread that created the operation is waiting for its status (see execute_impl()),
+            // so when this thread is done with the operation, it will "release" the tag
+            // and update the status (see finish()) to give control back to the waiting thread.
+            basic_operation_base& request = static_cast<basic_operation_base&>(*op_list);
+            // IMPORTANT: need to advance op_list to op_list->next() before calling request.finish()
+            op_list = op_list->next();
+            request.start();
+            request.apply_body();
+            request.finish();
+        }
+    }
+};
+
+} // namespace internal
+
+//! Aggregator base class and expert interface
+/** An aggregator for collecting operations coming from multiple sources and executing
+    them serially on a single thread. */
+template <typename handler_type>
+class aggregator_ext : tbb::internal::no_copy {
+public:
+    aggregator_ext(const handler_type& h) : handler_busy(0), handle_operations(h) { mailbox = NULL; }
+
+    //! EXPERT INTERFACE: Enter a user-made operation into the aggregator's mailbox.
+    /** Details of user-made operations must be handled by user-provided handler */
+    void process(aggregator_operation *op) { execute_impl(*op); }
+
+protected:
+    /** Place operation in mailbox, then either handle mailbox or wait for the operation
+        to be completed by a different thread. */
+    void execute_impl(aggregator_operation& op) {
+        aggregator_operation* res;
+
+        // ITT note: &(op.status) tag is used to cover accesses to this operation. This
+        // thread has created the operation, and now releases it so that the handler
+        // thread may handle the associated operation w/o triggering a race condition;
+        // thus this tag will be acquired just before the operation is handled in the
+        // handle_operations functor.
+        call_itt_notify(releasing, &(op.status));
+        // insert the operation into the list
+        do {
+            // ITT may flag the following line as a race; it is a false positive:
+            // This is an atomic read; we don't provide itt_hide_load_word for atomics
+            op.my_next = res = mailbox; // NOT A RACE
+        } while (mailbox.compare_and_swap(&op, res) != res);
+        if (!res) { // first in the list; handle the operations
+            // ITT note: &mailbox tag covers access to the handler_busy flag, which this
+            // waiting handler thread will try to set before entering handle_operations.
+            call_itt_notify(acquired, &mailbox);
+            start_handle_operations();
+            __TBB_ASSERT(op.status, NULL);
+        }
+        else { // not first; wait for op to be ready
+            call_itt_notify(prepare, &(op.status));
+            spin_wait_while_eq(op.status, uintptr_t(aggregator_operation::agg_waiting));
+            itt_load_word_with_acquire(op.status);
+        }
+    }
+
+
+private:
+    //! An atomically updated list (aka mailbox) of aggregator_operations
+    atomic<aggregator_operation *> mailbox;
+
+    //! Controls thread access to handle_operations
+    /** Behaves as boolean flag where 0=false, 1=true */
+    uintptr_t handler_busy;
+
+    handler_type handle_operations;
+
+    //! Trigger the handling of operations when the handler is free
+    void start_handle_operations() {
+        aggregator_operation *pending_operations;
+
+        // ITT note: &handler_busy tag covers access to mailbox as it is passed
+        // between active and waiting handlers.  Below, the waiting handler waits until
+        // the active handler releases, and the waiting handler acquires &handler_busy as
+        // it becomes the active_handler. The release point is at the end of this
+        // function, when all operations in mailbox have been handled by the
+        // owner of this aggregator.
+        call_itt_notify(prepare, &handler_busy);
+        // get handler_busy: only one thread can possibly spin here at a time
+        spin_wait_until_eq(handler_busy, uintptr_t(0));
+        call_itt_notify(acquired, &handler_busy);
+        // acquire fence not necessary here due to causality rule and surrounding atomics
+        __TBB_store_with_release(handler_busy, uintptr_t(1));
+
+        // ITT note: &mailbox tag covers access to the handler_busy flag itself.
+        // Capturing the state of the mailbox signifies that handler_busy has been
+        // set and a new active handler will now process that list's operations.
+        call_itt_notify(releasing, &mailbox);
+        // grab pending_operations
+        pending_operations = mailbox.fetch_and_store(NULL);
+
+        // handle all the operations
+        handle_operations(pending_operations);
+
+        // release the handler
+        itt_store_word_with_release(handler_busy, uintptr_t(0));
+    }
+};
+
+//! Basic aggregator interface
+class aggregator : private aggregator_ext<internal::basic_handler> {
+public:
+    aggregator() : aggregator_ext<internal::basic_handler>(internal::basic_handler()) {}
+    //! BASIC INTERFACE: Enter a function for exclusive execution by the aggregator.
+    /** The calling thread stores the function object in a basic_operation and
+        places the operation in the aggregator's mailbox */
+    template<typename Body>
+    void execute(const Body& b) {
+        internal::basic_operation<Body> op(b);
+        this->execute_impl(op);
+    }
+};
+
+} // namespace interface6
+
+using interface6::aggregator;
+using interface6::aggregator_ext;
+using interface6::aggregator_operation;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_aggregator_H_include_area
+
+#endif  // __TBB__aggregator_H
--- a/cs440-acg/ext/tbb/include/tbb/aligned_space.h
+++ b/cs440-acg/ext/tbb/include/tbb/aligned_space.h
@@ -0,0 +1,60 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_aligned_space_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_aligned_space_H
+#pragma message("TBB Warning: tbb/aligned_space.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_aligned_space_H
+#define __TBB_aligned_space_H
+
+#define __TBB_aligned_space_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "tbb_stddef.h"
+#include "tbb_machine.h"
+
+namespace tbb {
+
+//! Block of space aligned sufficiently to construct an array T with N elements.
+/** The elements are not constructed or destroyed by this class.
+    @ingroup memory_allocation */
+template<typename T,size_t N=1>
+class __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::aligned_space is deprecated, use std::aligned_storage") aligned_space {
+private:
+    typedef __TBB_TypeWithAlignmentAtLeastAsStrict(T) element_type;
+    element_type array[(sizeof(T)*N+sizeof(element_type)-1)/sizeof(element_type)];
+public:
+    //! Pointer to beginning of array
+    T* begin() const {return internal::punned_cast<T*>(this);}
+
+    //! Pointer to one past last element in array.
+    T* end() const {return begin()+N;}
+};
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_aligned_space_H_include_area
+
+#endif /* __TBB_aligned_space_H */
--- a/cs440-acg/ext/tbb/include/tbb/atomic.h
+++ b/cs440-acg/ext/tbb/include/tbb/atomic.h
@@ -0,0 +1,586 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_atomic_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_atomic_H
+#pragma message("TBB Warning: tbb/atomic.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_atomic_H
+#define __TBB_atomic_H
+
+#define __TBB_atomic_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include <cstddef>
+
+#if _MSC_VER
+#define __TBB_LONG_LONG __int64
+#else
+#define __TBB_LONG_LONG long long
+#endif /* _MSC_VER */
+
+#include "tbb_machine.h"
+
+#if _MSC_VER && !__INTEL_COMPILER
+    // Suppress overzealous compiler warnings till the end of the file
+    #pragma warning (push)
+    #pragma warning (disable: 4244 4267 4512)
+#endif
+
+namespace tbb {
+
+//! Specifies memory semantics.
+enum memory_semantics {
+    //! Sequential consistency
+    full_fence,
+    //! Acquire
+    acquire,
+    //! Release
+    release,
+    //! No ordering
+    relaxed
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+#if __TBB_ALIGNAS_PRESENT
+    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) alignas(a) t f;
+#elif __TBB_ATTRIBUTE_ALIGNED_PRESENT
+    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) t f  __attribute__ ((aligned(a)));
+#elif __TBB_DECLSPEC_ALIGN_PRESENT
+    #define __TBB_DECL_ATOMIC_FIELD(t,f,a) __declspec(align(a)) t f;
+#else
+    #error Do not know syntax for forcing alignment.
+#endif
+
+template<size_t S>
+struct atomic_rep;           // Primary template declared, but never defined.
+
+template<>
+struct atomic_rep<1> {       // Specialization
+    typedef int8_t word;
+};
+template<>
+struct atomic_rep<2> {       // Specialization
+    typedef int16_t word;
+};
+template<>
+struct atomic_rep<4> {       // Specialization
+#if _MSC_VER && !_WIN64
+    // Work-around that avoids spurious /Wp64 warnings
+    typedef intptr_t word;
+#else
+    typedef int32_t word;
+#endif
+};
+#if __TBB_64BIT_ATOMICS
+template<>
+struct atomic_rep<8> {       // Specialization
+    typedef int64_t word;
+};
+#endif
+
+template<typename value_type, size_t size>
+struct aligned_storage;
+
+//the specializations are needed to please MSVC syntax of __declspec(align()) which accept _literal_ constants only
+#if __TBB_ATOMIC_CTORS
+    #define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S)                  \
+    template<typename value_type>                                     \
+    struct aligned_storage<value_type,S> {                            \
+        __TBB_DECL_ATOMIC_FIELD(value_type,my_value,S)                \
+        aligned_storage() = default ;                                 \
+        constexpr aligned_storage(value_type value):my_value(value){} \
+    };                                                                \
+
+#else
+    #define ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(S)                  \
+    template<typename value_type>                                     \
+    struct aligned_storage<value_type,S> {                            \
+        __TBB_DECL_ATOMIC_FIELD(value_type,my_value,S)                \
+    };                                                                \
+
+#endif
+
+template<typename value_type>
+struct aligned_storage<value_type,1> {
+    value_type my_value;
+#if __TBB_ATOMIC_CTORS
+    aligned_storage() = default ;
+    constexpr aligned_storage(value_type value):my_value(value){}
+#endif
+};
+
+ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(2)
+ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(4)
+#if __TBB_64BIT_ATOMICS
+ATOMIC_STORAGE_PARTIAL_SPECIALIZATION(8)
+#endif
+
+template<size_t Size, memory_semantics M>
+struct atomic_traits;        // Primary template declared, but not defined.
+
+#define __TBB_DECL_FENCED_ATOMIC_PRIMITIVES(S,M)                                                         \
+    template<> struct atomic_traits<S,M> {                                                               \
+        typedef atomic_rep<S>::word word;                                                                \
+        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
+            return __TBB_machine_cmpswp##S##M(location,new_value,comparand);                             \
+        }                                                                                                \
+        inline static word fetch_and_add( volatile void* location, word addend ) {                       \
+            return __TBB_machine_fetchadd##S##M(location,addend);                                        \
+        }                                                                                                \
+        inline static word fetch_and_store( volatile void* location, word value ) {                      \
+            return __TBB_machine_fetchstore##S##M(location,value);                                       \
+        }                                                                                                \
+    };
+
+#define __TBB_DECL_ATOMIC_PRIMITIVES(S)                                                                  \
+    template<memory_semantics M>                                                                         \
+    struct atomic_traits<S,M> {                                                                          \
+        typedef atomic_rep<S>::word word;                                                                \
+        inline static word compare_and_swap( volatile void* location, word new_value, word comparand ) { \
+            return __TBB_machine_cmpswp##S(location,new_value,comparand);                                \
+        }                                                                                                \
+        inline static word fetch_and_add( volatile void* location, word addend ) {                       \
+            return __TBB_machine_fetchadd##S(location,addend);                                           \
+        }                                                                                                \
+        inline static word fetch_and_store( volatile void* location, word value ) {                      \
+            return __TBB_machine_fetchstore##S(location,value);                                          \
+        }                                                                                                \
+    };
+
+template<memory_semantics M>
+struct atomic_load_store_traits;    // Primary template declaration
+
+#define __TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(M)                      \
+    template<> struct atomic_load_store_traits<M> {                     \
+        template <typename T>                                           \
+        inline static T load( const volatile T& location ) {            \
+            return __TBB_load_##M( location );                          \
+        }                                                               \
+        template <typename T>                                           \
+        inline static void store( volatile T& location, T value ) {     \
+            __TBB_store_##M( location, value );                         \
+        }                                                               \
+    }
+
+#if __TBB_USE_FENCED_ATOMICS
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(1,relaxed)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(2,relaxed)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(4,relaxed)
+#if __TBB_64BIT_ATOMICS
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,full_fence)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,acquire)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,release)
+__TBB_DECL_FENCED_ATOMIC_PRIMITIVES(8,relaxed)
+#endif
+#else /* !__TBB_USE_FENCED_ATOMICS */
+__TBB_DECL_ATOMIC_PRIMITIVES(1)
+__TBB_DECL_ATOMIC_PRIMITIVES(2)
+__TBB_DECL_ATOMIC_PRIMITIVES(4)
+#if __TBB_64BIT_ATOMICS
+__TBB_DECL_ATOMIC_PRIMITIVES(8)
+#endif
+#endif /* !__TBB_USE_FENCED_ATOMICS */
+
+__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(full_fence);
+__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(acquire);
+__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(release);
+__TBB_DECL_ATOMIC_LOAD_STORE_PRIMITIVES(relaxed);
+
+//! Additive inverse of 1 for type T.
+/** Various compilers issue various warnings if -1 is used with various integer types.
+    The baroque expression below avoids all the warnings (we hope). */
+#define __TBB_MINUS_ONE(T) (T(T(0)-T(1)))
+
+//! Base class that provides basic functionality for atomic<T> without fetch_and_add.
+/** Works for any type T that has the same size as an integral type, has a trivial constructor/destructor,
+    and can be copied/compared by memcpy/memcmp. */
+template<typename T>
+struct atomic_impl {
+protected:
+    aligned_storage<T,sizeof(T)> my_storage;
+private:
+    //TODO: rechecks on recent versions of gcc if union is still the _only_ way to do a conversion without warnings
+    //! Union type used to convert type T to underlying integral type.
+    template<typename value_type>
+    union converter {
+        typedef typename atomic_rep<sizeof(value_type)>::word bits_type;
+        converter(){}
+        converter(value_type a_value) : value(a_value) {}
+        value_type value;
+        bits_type bits;
+    };
+
+    template<typename value_t>
+    static typename converter<value_t>::bits_type to_bits(value_t value){
+        return converter<value_t>(value).bits;
+    }
+    template<typename value_t>
+    static value_t to_value(typename converter<value_t>::bits_type bits){
+        converter<value_t> u;
+        u.bits = bits;
+        return u.value;
+    }
+
+    template<typename value_t>
+    union ptr_converter;            //Primary template declared, but never defined.
+
+    template<typename value_t>
+    union ptr_converter<value_t *> {
+        ptr_converter(){}
+        ptr_converter(value_t* a_value) : value(a_value) {}
+        value_t* value;
+        uintptr_t bits;
+    };
+    //TODO: check if making to_bits accepting reference (thus unifying it with to_bits_ref)
+    //does not hurt performance
+    template<typename value_t>
+    static typename converter<value_t>::bits_type & to_bits_ref(value_t& value){
+        //TODO: this #ifdef is temporary workaround, as union conversion seems to fail
+        //on suncc for 64 bit types for 32 bit target
+        #if !__SUNPRO_CC
+            return *(typename converter<value_t>::bits_type*)ptr_converter<value_t*>(&value).bits;
+        #else
+            return *(typename converter<value_t>::bits_type*)(&value);
+        #endif
+    }
+
+
+public:
+    typedef T value_type;
+
+#if __TBB_ATOMIC_CTORS
+    atomic_impl() = default ;
+    constexpr atomic_impl(value_type value):my_storage(value){}
+#endif
+    template<memory_semantics M>
+    value_type fetch_and_store( value_type value ) {
+          return to_value<value_type>(
+                  internal::atomic_traits<sizeof(value_type),M>::fetch_and_store( &my_storage.my_value, to_bits(value) )
+          );
+    }
+
+    value_type fetch_and_store( value_type value ) {
+        return fetch_and_store<full_fence>(value);
+    }
+
+    template<memory_semantics M>
+    value_type compare_and_swap( value_type value, value_type comparand ) {
+        return to_value<value_type>(
+                internal::atomic_traits<sizeof(value_type),M>::compare_and_swap( &my_storage.my_value, to_bits(value), to_bits(comparand) )
+        );
+    }
+
+    value_type compare_and_swap( value_type value, value_type comparand ) {
+        return compare_and_swap<full_fence>(value,comparand);
+    }
+
+    operator value_type() const volatile {                // volatile qualifier here for backwards compatibility
+        return  to_value<value_type>(
+                __TBB_load_with_acquire( to_bits_ref(my_storage.my_value) )
+        );
+    }
+
+    template<memory_semantics M>
+    value_type load () const {
+        return to_value<value_type>(
+                internal::atomic_load_store_traits<M>::load( to_bits_ref(my_storage.my_value) )
+        );
+    }
+
+    value_type load () const {
+        return load<acquire>();
+    }
+
+    template<memory_semantics M>
+    void store ( value_type value ) {
+        internal::atomic_load_store_traits<M>::store( to_bits_ref(my_storage.my_value), to_bits(value));
+    }
+
+    void store ( value_type value ) {
+        store<release>( value );
+    }
+
+protected:
+    value_type store_with_release( value_type rhs ) {
+       //TODO: unify with store<release>
+        __TBB_store_with_release( to_bits_ref(my_storage.my_value), to_bits(rhs) );
+        return rhs;
+    }
+};
+
+//! Base class that provides basic functionality for atomic<T> with fetch_and_add.
+/** I is the underlying type.
+    D is the difference type.
+    StepType should be char if I is an integral type, and T if I is a T*. */
+template<typename I, typename D, typename StepType>
+struct atomic_impl_with_arithmetic: atomic_impl<I> {
+public:
+    typedef I value_type;
+#if    __TBB_ATOMIC_CTORS
+    atomic_impl_with_arithmetic() = default ;
+    constexpr atomic_impl_with_arithmetic(value_type value): atomic_impl<I>(value){}
+#endif
+    template<memory_semantics M>
+    value_type fetch_and_add( D addend ) {
+        return value_type(internal::atomic_traits<sizeof(value_type),M>::fetch_and_add( &this->my_storage.my_value, addend*sizeof(StepType) ));
+    }
+
+    value_type fetch_and_add( D addend ) {
+        return fetch_and_add<full_fence>(addend);
+    }
+
+    template<memory_semantics M>
+    value_type fetch_and_increment() {
+        return fetch_and_add<M>(1);
+    }
+
+    value_type fetch_and_increment() {
+        return fetch_and_add(1);
+    }
+
+    template<memory_semantics M>
+    value_type fetch_and_decrement() {
+        return fetch_and_add<M>(__TBB_MINUS_ONE(D));
+    }
+
+    value_type fetch_and_decrement() {
+        return fetch_and_add(__TBB_MINUS_ONE(D));
+    }
+
+public:
+    value_type operator+=( D value ) {
+        return fetch_and_add(value)+value;
+    }
+
+    value_type operator-=( D value ) {
+        // Additive inverse of value computed using binary minus,
+        // instead of unary minus, for sake of avoiding compiler warnings.
+        return operator+=(D(0)-value);
+    }
+
+    value_type operator++() {
+        return fetch_and_add(1)+1;
+    }
+
+    value_type operator--() {
+        return fetch_and_add(__TBB_MINUS_ONE(D))-1;
+    }
+
+    value_type operator++(int) {
+        return fetch_and_add(1);
+    }
+
+    value_type operator--(int) {
+        return fetch_and_add(__TBB_MINUS_ONE(D));
+    }
+};
+
+} /* Internal */
+//! @endcond
+
+//! Primary template for atomic.
+/** See the Reference for details.
+    @ingroup synchronization */
+template<typename T>
+struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic")
+atomic: internal::atomic_impl<T> {
+#if __TBB_ATOMIC_CTORS
+    atomic() = default;
+    constexpr atomic(T arg): internal::atomic_impl<T>(arg) {}
+    constexpr atomic<T>(const atomic<T>& rhs): internal::atomic_impl<T>(rhs) {}
+#endif
+    T operator=( T rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<T>& operator=( const atomic<T>& rhs ) {this->store_with_release(rhs); return *this;}
+};
+
+#if __TBB_ATOMIC_CTORS
+    #define __TBB_DECL_ATOMIC(T)                                                                    \
+        template<> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic")  \
+        atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {                                \
+            atomic() = default;                                                                     \
+            constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {}        \
+            constexpr atomic<T>(const atomic<T>& rhs):                                              \
+                      internal::atomic_impl_with_arithmetic<T,T,char>(rhs) {}                       \
+                                                                                                    \
+            T operator=( T rhs ) {return store_with_release(rhs);}                                  \
+            atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}   \
+        };
+#else
+    #define __TBB_DECL_ATOMIC(T)                                                                    \
+        template<> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic")  \
+        atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {                                \
+            T operator=( T rhs ) {return store_with_release(rhs);}                                  \
+            atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}   \
+        };
+#endif
+
+#if __TBB_64BIT_ATOMICS
+//TODO: consider adding non-default (and atomic) copy constructor for 32bit platform
+__TBB_DECL_ATOMIC(__TBB_LONG_LONG)
+__TBB_DECL_ATOMIC(unsigned __TBB_LONG_LONG)
+#else
+// test_atomic will verify that sizeof(long long)==8
+#endif
+__TBB_DECL_ATOMIC(long)
+__TBB_DECL_ATOMIC(unsigned long)
+
+#if _MSC_VER && !_WIN64
+#if __TBB_ATOMIC_CTORS
+/* Special version of __TBB_DECL_ATOMIC that avoids gratuitous warnings from cl /Wp64 option.
+   It is identical to __TBB_DECL_ATOMIC(unsigned) except that it replaces operator=(T)
+   with an operator=(U) that explicitly converts the U to a T.  Types T and U should be
+   type synonyms on the platform.  Type U should be the wider variant of T from the
+   perspective of /Wp64. */
+#define __TBB_DECL_ATOMIC_ALT(T,U) \
+    template<> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic") \
+    atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {                               \
+        atomic() = default ;                                                                   \
+        constexpr atomic(T arg): internal::atomic_impl_with_arithmetic<T,T,char>(arg) {}       \
+        constexpr atomic<T>(const atomic<T>& rhs):                                             \
+                  internal::atomic_impl_with_arithmetic<T,T,char>(rhs) {}                      \
+                                                                                               \
+        T operator=( U rhs ) {return store_with_release(T(rhs));}                              \
+        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
+    };
+#else
+#define __TBB_DECL_ATOMIC_ALT(T,U) \
+    template<> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic") \
+    atomic<T>: internal::atomic_impl_with_arithmetic<T,T,char> {                               \
+        T operator=( U rhs ) {return store_with_release(T(rhs));}                              \
+        atomic<T>& operator=( const atomic<T>& rhs ) {store_with_release(rhs); return *this;}  \
+    };
+#endif
+__TBB_DECL_ATOMIC_ALT(unsigned,size_t)
+__TBB_DECL_ATOMIC_ALT(int,ptrdiff_t)
+#else
+__TBB_DECL_ATOMIC(unsigned)
+__TBB_DECL_ATOMIC(int)
+#endif /* _MSC_VER && !_WIN64 */
+
+__TBB_DECL_ATOMIC(unsigned short)
+__TBB_DECL_ATOMIC(short)
+__TBB_DECL_ATOMIC(char)
+__TBB_DECL_ATOMIC(signed char)
+__TBB_DECL_ATOMIC(unsigned char)
+
+#if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
+__TBB_DECL_ATOMIC(wchar_t)
+#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
+
+//! Specialization for atomic<T*> with arithmetic and operator->.
+template<typename T> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic")
+atomic<T*>: internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T> {
+#if __TBB_ATOMIC_CTORS
+    atomic() = default ;
+    constexpr atomic(T* arg): internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T>(arg) {}
+    constexpr atomic(const atomic<T*>& rhs): internal::atomic_impl_with_arithmetic<T*,ptrdiff_t,T>(rhs) {}
+#endif
+    T* operator=( T* rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<T*>& operator=( const atomic<T*>& rhs ) {
+        this->store_with_release(rhs); return *this;
+    }
+    T* operator->() const {
+        return (*this);
+    }
+};
+
+//! Specialization for atomic<void*>, for sake of not allowing arithmetic or operator->.
+template<> struct __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::atomic is deprecated, use std::atomic")
+atomic<void*>: internal::atomic_impl<void*> {
+#if __TBB_ATOMIC_CTORS
+    atomic() = default ;
+    constexpr atomic(void* arg): internal::atomic_impl<void*>(arg) {}
+    constexpr atomic(const atomic<void*>& rhs): internal::atomic_impl<void*>(rhs) {}
+#endif
+    void* operator=( void* rhs ) {
+        // "this" required here in strict ISO C++ because store_with_release is a dependent name
+        return this->store_with_release(rhs);
+    }
+    atomic<void*>& operator=( const atomic<void*>& rhs ) {
+        this->store_with_release(rhs); return *this;
+    }
+};
+
+// Helpers to workaround ugly syntax of calling template member function of a
+// template class with template argument dependent on template parameters.
+
+template <memory_semantics M, typename T>
+T load ( const atomic<T>& a ) { return a.template load<M>(); }
+
+template <memory_semantics M, typename T>
+void store ( atomic<T>& a, T value ) { a.template store<M>(value); }
+
+namespace interface6{
+//! Make an atomic for use in an initialization (list), as an alternative to zero-initialization or normal assignment.
+template<typename T>
+atomic<T> make_atomic(T t) {
+    atomic<T> a;
+    store<relaxed>(a,t);
+    return a;
+}
+}
+using interface6::make_atomic;
+
+namespace internal {
+template<memory_semantics M, typename T >
+void swap(atomic<T> & lhs, atomic<T> & rhs){
+    T tmp = load<M>(lhs);
+    store<M>(lhs,load<M>(rhs));
+    store<M>(rhs,tmp);
+}
+
+// only to aid in the gradual conversion of ordinary variables to proper atomics
+template<typename T>
+inline atomic<T>& as_atomic( T& t ) {
+    return (atomic<T>&)t;
+}
+} // namespace tbb::internal
+
+} // namespace tbb
+
+#if _MSC_VER && !__INTEL_COMPILER
+    #pragma warning (pop)
+#endif // warnings are restored
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_atomic_H_include_area
+
+#endif /* __TBB_atomic_H */
--- a/cs440-acg/ext/tbb/include/tbb/blocked_range.h
+++ b/cs440-acg/ext/tbb/include/tbb/blocked_range.h
@@ -0,0 +1,168 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_blocked_range_H
+#define __TBB_blocked_range_H
+
+#include "tbb_stddef.h"
+
+namespace tbb {
+
+namespace internal {
+
+// blocked_rangeNd_impl forward declaration in tbb::internal namespace to
+// name it as a friend for a tbb::blocked_range.
+template<typename Value, unsigned int N, typename>
+class blocked_rangeNd_impl;
+
+} // namespace internal
+
+/** \page range_req Requirements on range concept
+    Class \c R implementing the concept of range must define:
+    - \code R::R( const R& ); \endcode               Copy constructor
+    - \code R::~R(); \endcode                        Destructor
+    - \code bool R::is_divisible() const; \endcode   True if range can be partitioned into two subranges
+    - \code bool R::empty() const; \endcode          True if range is empty
+    - \code R::R( R& r, split ); \endcode            Split range \c r into two subranges.
+**/
+
+//! A range over which to iterate.
+/** @ingroup algorithms */
+template<typename Value>
+class blocked_range {
+public:
+    //! Type of a value
+    /** Called a const_iterator for sake of algorithms that need to treat a blocked_range
+        as an STL container. */
+    typedef Value const_iterator;
+
+    //! Type for size of a range
+    typedef std::size_t size_type;
+
+#if __TBB_DEPRECATED_BLOCKED_RANGE_DEFAULT_CTOR
+    //! Construct range with default-constructed values for begin, end, and grainsize.
+    /** Requires that Value have a default constructor. */
+    blocked_range() : my_end(), my_begin(), my_grainsize() {}
+#endif
+
+    //! Construct range over half-open interval [begin,end), with the given grainsize.
+    blocked_range( Value begin_, Value end_, size_type grainsize_=1 ) :
+        my_end(end_), my_begin(begin_), my_grainsize(grainsize_)
+    {
+        __TBB_ASSERT( my_grainsize>0, "grainsize must be positive" );
+    }
+
+    //! Beginning of range.
+    const_iterator begin() const {return my_begin;}
+
+    //! One past last value in range.
+    const_iterator end() const {return my_end;}
+
+    //! Size of the range
+    /** Unspecified if end()<begin(). */
+    size_type size() const {
+        __TBB_ASSERT( !(end()<begin()), "size() unspecified if end()<begin()" );
+        return size_type(my_end-my_begin);
+    }
+
+    //! The grain size for this range.
+    size_type grainsize() const {return my_grainsize;}
+
+    //------------------------------------------------------------------------
+    // Methods that implement Range concept
+    //------------------------------------------------------------------------
+
+    //! True if range is empty.
+    bool empty() const {return !(my_begin<my_end);}
+
+    //! True if range is divisible.
+    /** Unspecified if end()<begin(). */
+    bool is_divisible() const {return my_grainsize<size();}
+
+    //! Split range.
+    /** The new Range *this has the second part, the old range r has the first part.
+        Unspecified if end()<begin() or !is_divisible(). */
+    blocked_range( blocked_range& r, split ) :
+        my_end(r.my_end),
+        my_begin(do_split(r, split())),
+        my_grainsize(r.my_grainsize)
+    {
+        // only comparison 'less than' is required from values of blocked_range objects
+        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
+    }
+
+#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
+    //! Static field to support proportional split
+    static const bool is_splittable_in_proportion = true;
+
+    //! Split range.
+    /** The new Range *this has the second part split according to specified proportion, the old range r has the first part.
+        Unspecified if end()<begin() or !is_divisible(). */
+    blocked_range( blocked_range& r, proportional_split& proportion ) :
+        my_end(r.my_end),
+        my_begin(do_split(r, proportion)),
+        my_grainsize(r.my_grainsize)
+    {
+        // only comparison 'less than' is required from values of blocked_range objects
+        __TBB_ASSERT( !(my_begin < r.my_end) && !(r.my_end < my_begin), "blocked_range has been split incorrectly" );
+    }
+#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+private:
+    /** NOTE: my_end MUST be declared before my_begin, otherwise the splitting constructor will break. */
+    Value my_end;
+    Value my_begin;
+    size_type my_grainsize;
+
+    //! Auxiliary function used by the splitting constructor.
+    static Value do_split( blocked_range& r, split )
+    {
+        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
+        Value middle = r.my_begin + (r.my_end - r.my_begin) / 2u;
+        r.my_end = middle;
+        return middle;
+    }
+
+#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
+    static Value do_split( blocked_range& r, proportional_split& proportion )
+    {
+        __TBB_ASSERT( r.is_divisible(), "cannot split blocked_range that is not divisible" );
+
+        // usage of 32-bit floating point arithmetic is not enough to handle ranges of
+        // more than 2^24 iterations accurately. However, even on ranges with 2^64
+        // iterations the computational error approximately equals to 0.000001% which
+        // makes small impact on uniform distribution of such range's iterations (assuming
+        // all iterations take equal time to complete). See 'test_partitioner_whitebox'
+        // for implementation of an exact split algorithm
+        size_type right_part = size_type(float(r.size()) * float(proportion.right())
+                                         / float(proportion.left() + proportion.right()) + 0.5f);
+        return r.my_end = Value(r.my_end - right_part);
+    }
+#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+    template<typename RowValue, typename ColValue>
+    friend class blocked_range2d;
+
+    template<typename RowValue, typename ColValue, typename PageValue>
+    friend class blocked_range3d;
+
+    template<typename DimValue, unsigned int N, typename>
+    friend class internal::blocked_rangeNd_impl;
+};
+
+} // namespace tbb
+
+#endif /* __TBB_blocked_range_H */
--- a/cs440-acg/ext/tbb/include/tbb/blocked_range2d.h
+++ b/cs440-acg/ext/tbb/include/tbb/blocked_range2d.h
@@ -0,0 +1,104 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_blocked_range2d_H
+#define __TBB_blocked_range2d_H
+
+#include "tbb_stddef.h"
+#include "blocked_range.h"
+
+namespace tbb {
+
+//! A 2-dimensional range that models the Range concept.
+/** @ingroup algorithms */
+template<typename RowValue, typename ColValue=RowValue>
+class blocked_range2d {
+public:
+    //! Type for size of an iteration range
+    typedef blocked_range<RowValue> row_range_type;
+    typedef blocked_range<ColValue> col_range_type;
+
+private:
+    row_range_type my_rows;
+    col_range_type my_cols;
+
+public:
+
+    blocked_range2d( RowValue row_begin, RowValue row_end, typename row_range_type::size_type row_grainsize,
+                     ColValue col_begin, ColValue col_end, typename col_range_type::size_type col_grainsize ) :
+        my_rows(row_begin,row_end,row_grainsize),
+        my_cols(col_begin,col_end,col_grainsize)
+    {}
+
+    blocked_range2d( RowValue row_begin, RowValue row_end,
+                     ColValue col_begin, ColValue col_end ) :
+        my_rows(row_begin,row_end),
+        my_cols(col_begin,col_end)
+    {}
+
+    //! True if range is empty
+    bool empty() const {
+        // Range is empty if at least one dimension is empty.
+        return my_rows.empty() || my_cols.empty();
+    }
+
+    //! True if range is divisible into two pieces.
+    bool is_divisible() const {
+        return my_rows.is_divisible() || my_cols.is_divisible();
+    }
+
+    blocked_range2d( blocked_range2d& r, split ) :
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        split split_obj;
+        do_split(r, split_obj);
+    }
+
+#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
+    //! Static field to support proportional split
+    static const bool is_splittable_in_proportion = true;
+
+    blocked_range2d( blocked_range2d& r, proportional_split& proportion ) :
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        do_split(r, proportion);
+    }
+#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+    //! The rows of the iteration space
+    const row_range_type& rows() const {return my_rows;}
+
+    //! The columns of the iteration space
+    const col_range_type& cols() const {return my_cols;}
+
+private:
+
+    template <typename Split>
+    void do_split( blocked_range2d& r, Split& split_obj )
+    {
+        if( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
+            my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
+        } else {
+            my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
+        }
+    }
+};
+
+} // namespace tbb
+
+#endif /* __TBB_blocked_range2d_H */
--- a/cs440-acg/ext/tbb/include/tbb/blocked_range3d.h
+++ b/cs440-acg/ext/tbb/include/tbb/blocked_range3d.h
@@ -0,0 +1,123 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_blocked_range3d_H
+#define __TBB_blocked_range3d_H
+
+#include "tbb_stddef.h"
+#include "blocked_range.h"
+
+namespace tbb {
+
+//! A 3-dimensional range that models the Range concept.
+/** @ingroup algorithms */
+template<typename PageValue, typename RowValue=PageValue, typename ColValue=RowValue>
+class blocked_range3d {
+public:
+    //! Type for size of an iteration range
+    typedef blocked_range<PageValue> page_range_type;
+    typedef blocked_range<RowValue>  row_range_type;
+    typedef blocked_range<ColValue>  col_range_type;
+
+private:
+    page_range_type my_pages;
+    row_range_type  my_rows;
+    col_range_type  my_cols;
+
+public:
+
+    blocked_range3d( PageValue page_begin, PageValue page_end,
+                     RowValue  row_begin,  RowValue row_end,
+                     ColValue  col_begin,  ColValue col_end ) :
+        my_pages(page_begin,page_end),
+        my_rows(row_begin,row_end),
+        my_cols(col_begin,col_end)
+    {}
+
+    blocked_range3d( PageValue page_begin, PageValue page_end, typename page_range_type::size_type page_grainsize,
+                     RowValue  row_begin,  RowValue row_end,   typename row_range_type::size_type row_grainsize,
+                     ColValue  col_begin,  ColValue col_end,   typename col_range_type::size_type col_grainsize ) :
+        my_pages(page_begin,page_end,page_grainsize),
+        my_rows(row_begin,row_end,row_grainsize),
+        my_cols(col_begin,col_end,col_grainsize)
+    {}
+
+    //! True if range is empty
+    bool empty() const {
+        // Range is empty if at least one dimension is empty.
+        return my_pages.empty() || my_rows.empty() || my_cols.empty();
+    }
+
+    //! True if range is divisible into two pieces.
+    bool is_divisible() const {
+        return  my_pages.is_divisible() || my_rows.is_divisible() || my_cols.is_divisible();
+    }
+
+    blocked_range3d( blocked_range3d& r, split ) :
+        my_pages(r.my_pages),
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        split split_obj;
+        do_split(r, split_obj);
+    }
+
+#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
+    //! Static field to support proportional split
+    static const bool is_splittable_in_proportion = true;
+
+    blocked_range3d( blocked_range3d& r, proportional_split& proportion ) :
+        my_pages(r.my_pages),
+        my_rows(r.my_rows),
+        my_cols(r.my_cols)
+    {
+        do_split(r, proportion);
+    }
+#endif /* __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES */
+
+    //! The pages of the iteration space
+    const page_range_type& pages() const {return my_pages;}
+
+    //! The rows of the iteration space
+    const row_range_type& rows() const {return my_rows;}
+
+    //! The columns of the iteration space
+    const col_range_type& cols() const {return my_cols;}
+
+private:
+
+    template <typename Split>
+    void do_split( blocked_range3d& r, Split& split_obj)
+    {
+        if ( my_pages.size()*double(my_rows.grainsize()) < my_rows.size()*double(my_pages.grainsize()) ) {
+            if ( my_rows.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_rows.grainsize()) ) {
+                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
+            } else {
+                my_rows.my_begin = row_range_type::do_split(r.my_rows, split_obj);
+            }
+        } else {
+            if ( my_pages.size()*double(my_cols.grainsize()) < my_cols.size()*double(my_pages.grainsize()) ) {
+                my_cols.my_begin = col_range_type::do_split(r.my_cols, split_obj);
+            } else {
+                my_pages.my_begin = page_range_type::do_split(r.my_pages, split_obj);
+            }
+        }
+    }
+};
+
+} // namespace tbb
+
+#endif /* __TBB_blocked_range3d_H */
--- a/cs440-acg/ext/tbb/include/tbb/blocked_rangeNd.h
+++ b/cs440-acg/ext/tbb/include/tbb/blocked_rangeNd.h
@@ -0,0 +1,150 @@
+/*
+    Copyright (c) 2017-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_blocked_rangeNd_H
+#define __TBB_blocked_rangeNd_H
+
+#if ! TBB_PREVIEW_BLOCKED_RANGE_ND
+    #error Set TBB_PREVIEW_BLOCKED_RANGE_ND to include blocked_rangeNd.h
+#endif
+
+#include "tbb_config.h"
+
+// tbb::blocked_rangeNd requires C++11 support
+#if __TBB_CPP11_PRESENT && __TBB_CPP11_ARRAY_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT
+
+#include "internal/_template_helpers.h" // index_sequence, make_index_sequence
+
+#include <array>
+#include <algorithm>    // std::any_of
+#include <type_traits>  // std::is_same, std::enable_if
+
+#include "tbb/blocked_range.h"
+
+namespace tbb {
+namespace internal {
+
+/*
+    The blocked_rangeNd_impl uses make_index_sequence<N> to automatically generate a ctor with
+    exactly N arguments of the type tbb::blocked_range<Value>. Such ctor provides an opportunity
+    to use braced-init-list parameters to initialize each dimension.
+    Use of parameters, whose representation is a braced-init-list, but they're not
+    std::initializer_list or a reference to one, produces a non-deduced context
+    within template argument deduction.
+
+    NOTE: blocked_rangeNd must be exactly a templated alias to the blocked_rangeNd_impl
+    (and not e.g. a derived class), otherwise it would need to declare its own ctor
+    facing the same problem that the impl class solves.
+*/
+
+template<typename Value, unsigned int N, typename = make_index_sequence<N>>
+class blocked_rangeNd_impl;
+
+template<typename Value, unsigned int N, std::size_t... Is>
+class blocked_rangeNd_impl<Value, N, index_sequence<Is...>> {
+public:
+    //! Type of a value.
+    using value_type = Value;
+
+private:
+
+    //! Helper type to construct range with N tbb::blocked_range<value_type> objects.
+    template<std::size_t>
+    using dim_type_helper = tbb::blocked_range<value_type>;
+
+public:
+    blocked_rangeNd_impl() = delete;
+
+    //! Constructs N-dimensional range over N half-open intervals each represented as tbb::blocked_range<Value>.
+    blocked_rangeNd_impl(const dim_type_helper<Is>&... args) : my_dims{ {args...} } {}
+
+    //! Dimensionality of a range.
+    static constexpr unsigned int ndims() { return N; }
+
+    //! Range in certain dimension.
+    const tbb::blocked_range<value_type>& dim(unsigned int dimension) const {
+        __TBB_ASSERT(dimension < N, "out of bound");
+        return my_dims[dimension];
+    }
+
+    //------------------------------------------------------------------------
+    // Methods that implement Range concept
+    //------------------------------------------------------------------------
+
+    //! True if at least one dimension is empty.
+    bool empty() const {
+        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
+            return d.empty();
+        });
+    }
+
+    //! True if at least one dimension is divisible.
+    bool is_divisible() const {
+        return std::any_of(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& d) {
+            return d.is_divisible();
+        });
+    }
+
+#if __TBB_USE_PROPORTIONAL_SPLIT_IN_BLOCKED_RANGES
+    //! Static field to support proportional split.
+    static const bool is_splittable_in_proportion = true;
+
+    blocked_rangeNd_impl(blocked_rangeNd_impl& r, proportional_split proportion) : my_dims(r.my_dims) {
+        do_split(r, proportion);
+    }
+#endif
+
+    blocked_rangeNd_impl(blocked_rangeNd_impl& r, split proportion) : my_dims(r.my_dims) {
+        do_split(r, proportion);
+    }
+
+private:
+    __TBB_STATIC_ASSERT(N != 0, "zero dimensional blocked_rangeNd can't be constructed");
+
+    //! Ranges in each dimension.
+    std::array<tbb::blocked_range<value_type>, N> my_dims;
+
+    template<typename split_type>
+    void do_split(blocked_rangeNd_impl& r, split_type proportion) {
+        __TBB_STATIC_ASSERT((is_same_type<split_type, split>::value
+                            || is_same_type<split_type, proportional_split>::value),
+                            "type of split object is incorrect");
+        __TBB_ASSERT(r.is_divisible(), "can't split not divisible range");
+
+        auto my_it = std::max_element(my_dims.begin(), my_dims.end(), [](const tbb::blocked_range<value_type>& first, const tbb::blocked_range<value_type>& second) {
+            return (first.size() * second.grainsize() < second.size() * first.grainsize());
+        });
+
+        auto r_it = r.my_dims.begin() + (my_it - my_dims.begin());
+
+        my_it->my_begin = tbb::blocked_range<value_type>::do_split(*r_it, proportion);
+
+        // (!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin)) equals to
+        // (my_it->my_begin == r_it->my_end), but we can't use operator== due to Value concept
+        __TBB_ASSERT(!(my_it->my_begin < r_it->my_end) && !(r_it->my_end < my_it->my_begin),
+                     "blocked_range has been split incorrectly");
+    }
+};
+
+} // namespace internal
+
+template<typename Value, unsigned int N>
+using blocked_rangeNd = internal::blocked_rangeNd_impl<Value, N>;
+
+} // namespace tbb
+
+#endif /* __TBB_CPP11_PRESENT && __TBB_CPP11_ARRAY_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT */
+#endif /* __TBB_blocked_rangeNd_H */
--- a/cs440-acg/ext/tbb/include/tbb/cache_aligned_allocator.h
+++ b/cs440-acg/ext/tbb/include/tbb/cache_aligned_allocator.h
@@ -0,0 +1,209 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_cache_aligned_allocator_H
+#define __TBB_cache_aligned_allocator_H
+
+#include <new>
+#include "tbb_stddef.h"
+#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+#include <utility> // std::forward
+#endif
+
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
+#include <memory_resource>
+#endif
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    //! Cache/sector line size.
+    /** @ingroup memory_allocation */
+    size_t __TBB_EXPORTED_FUNC NFS_GetLineSize();
+
+    //! Allocate memory on cache/sector line boundary.
+    /** @ingroup memory_allocation */
+    void* __TBB_EXPORTED_FUNC NFS_Allocate( size_t n_element, size_t element_size, void* hint );
+
+    //! Free memory allocated by NFS_Allocate.
+    /** Freeing a NULL pointer is allowed, but has no effect.
+        @ingroup memory_allocation */
+    void __TBB_EXPORTED_FUNC NFS_Free( void* );
+}
+//! @endcond
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
+    #pragma warning (push)
+    #pragma warning (disable: 4100)
+#endif
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** The members are ordered the same way they are in section 20.4.1
+    of the ISO C++ standard.
+    @ingroup memory_allocation */
+template<typename T>
+class cache_aligned_allocator {
+public:
+    typedef typename internal::allocator_type<T>::value_type value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> struct rebind {
+        typedef cache_aligned_allocator<U> other;
+    };
+    cache_aligned_allocator() throw() {}
+    cache_aligned_allocator( const cache_aligned_allocator& ) throw() {}
+    template<typename U> cache_aligned_allocator(const cache_aligned_allocator<U>&) throw() {}
+
+    pointer address(reference x) const {return &x;}
+    const_pointer address(const_reference x) const {return &x;}
+
+    //! Allocate space for n objects, starting on a cache/sector line.
+    pointer allocate( size_type n, const void* hint=0 ) {
+        // The "hint" argument is always ignored in NFS_Allocate thus const_cast shouldn't hurt
+        return pointer(internal::NFS_Allocate( n, sizeof(value_type), const_cast<void*>(hint) ));
+    }
+
+    //! Free block of memory that starts on a cache line
+    void deallocate( pointer p, size_type ) {
+        internal::NFS_Free(p);
+    }
+
+    //! Largest value for which method allocate might succeed.
+    size_type max_size() const throw() {
+        return (~size_t(0)-internal::NFS_MaxLineSize)/sizeof(value_type);
+    }
+
+    //! Copy-construct value at location pointed to by p.
+#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+    template<typename U, typename... Args>
+    void construct(U *p, Args&&... args)
+        { ::new((void *)p) U(std::forward<Args>(args)...); }
+#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
+#endif
+    void construct( pointer p, const value_type& value ) {::new((void*)(p)) value_type(value);}
+#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+
+    //! Destroy value at location pointed to by p.
+    void destroy( pointer p ) {p->~value_type();}
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4100 is back
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<>
+class cache_aligned_allocator<void> {
+public:
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+    template<typename U> struct rebind {
+        typedef cache_aligned_allocator<U> other;
+    };
+};
+
+template<typename T, typename U>
+inline bool operator==( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return true;}
+
+template<typename T, typename U>
+inline bool operator!=( const cache_aligned_allocator<T>&, const cache_aligned_allocator<U>& ) {return false;}
+
+#if __TBB_CPP17_MEMORY_RESOURCE_PRESENT
+
+//! C++17 memory resource wrapper to ensure cache line size alignment
+class cache_aligned_resource : public std::pmr::memory_resource {
+public:
+    cache_aligned_resource() : cache_aligned_resource(std::pmr::get_default_resource()) {}
+    explicit cache_aligned_resource(std::pmr::memory_resource* upstream) : m_upstream(upstream) {}
+
+    std::pmr::memory_resource* upstream_resource() const {
+        return m_upstream;
+    }
+
+private:
+    //! We don't know what memory resource set. Use padding to guarantee alignment
+    void* do_allocate(size_t bytes, size_t alignment) override {
+        size_t cache_line_alignment = correct_alignment(alignment);
+        uintptr_t base = (uintptr_t)m_upstream->allocate(correct_size(bytes) + cache_line_alignment);
+        __TBB_ASSERT(base != 0, "Upstream resource returned NULL.");
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // unary minus operator applied to unsigned type, result still unsigned
+    #pragma warning(push)
+    #pragma warning(disable: 4146 4706)
+#endif
+        // Round up to the next cache line (align the base address)
+        uintptr_t result = (base + cache_line_alignment) & -cache_line_alignment;
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning(pop)
+#endif
+        // Record where block actually starts.
+        ((uintptr_t*)result)[-1] = base;
+        return (void*)result;
+    }
+
+    void do_deallocate(void* ptr, size_t bytes, size_t alignment) override {
+        if (ptr) {
+            // Recover where block actually starts
+            uintptr_t base = ((uintptr_t*)ptr)[-1];
+            m_upstream->deallocate((void*)base, correct_size(bytes) + correct_alignment(alignment));
+        }
+    }
+
+    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
+        if (this == &other) { return true; }
+#if __TBB_USE_OPTIONAL_RTTI
+        const cache_aligned_resource* other_res = dynamic_cast<const cache_aligned_resource*>(&other);
+        return other_res && (this->upstream_resource() == other_res->upstream_resource());
+#else
+        return false;
+#endif
+    }
+
+    size_t correct_alignment(size_t alignment) {
+        __TBB_ASSERT(tbb::internal::is_power_of_two(alignment), "Alignment is not a power of 2");
+#if __TBB_CPP17_HW_INTERFERENCE_SIZE_PRESENT
+        size_t cache_line_size = std::hardware_destructive_interference_size;
+#else
+        size_t cache_line_size = internal::NFS_GetLineSize();
+#endif
+        return alignment < cache_line_size ? cache_line_size : alignment;
+    }
+
+    size_t correct_size(size_t bytes) {
+        // To handle the case, when small size requested. There could be not
+        // enough space to store the original pointer.
+        return bytes < sizeof(uintptr_t) ? sizeof(uintptr_t) : bytes;
+    }
+
+    std::pmr::memory_resource* m_upstream;
+};
+
+#endif /* __TBB_CPP17_MEMORY_RESOURCE_PRESENT */
+
+} // namespace tbb
+
+#endif /* __TBB_cache_aligned_allocator_H */
+
--- a/cs440-acg/ext/tbb/include/tbb/combinable.h
+++ b/cs440-acg/ext/tbb/include/tbb/combinable.h
@@ -0,0 +1,88 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_combinable_H
+#define __TBB_combinable_H
+
+#define __TBB_combinable_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "enumerable_thread_specific.h"
+#include "cache_aligned_allocator.h"
+
+namespace tbb {
+/** \name combinable
+    **/
+//@{
+//! Thread-local storage with optional reduction
+/** @ingroup containers */
+    template <typename T>
+    class combinable {
+
+    private:
+        typedef typename tbb::cache_aligned_allocator<T> my_alloc;
+        typedef typename tbb::enumerable_thread_specific<T, my_alloc, ets_no_key> my_ets_type;
+        my_ets_type my_ets;
+
+    public:
+
+        combinable() { }
+
+        template <typename finit>
+        explicit combinable( finit _finit) : my_ets(_finit) { }
+
+        //! destructor
+        ~combinable() { }
+
+        combinable( const combinable& other) : my_ets(other.my_ets) { }
+
+#if __TBB_ETS_USE_CPP11
+        combinable( combinable&& other) : my_ets( std::move(other.my_ets)) { }
+#endif
+
+        combinable & operator=( const combinable & other) {
+            my_ets = other.my_ets;
+            return *this;
+        }
+
+#if __TBB_ETS_USE_CPP11
+        combinable & operator=( combinable && other) {
+            my_ets=std::move(other.my_ets);
+            return *this;
+        }
+#endif
+
+        void clear() { my_ets.clear(); }
+
+        T& local() { return my_ets.local(); }
+
+        T& local(bool & exists) { return my_ets.local(exists); }
+
+        // combine_func_t has signature T(T,T) or T(const T&, const T&)
+        template <typename combine_func_t>
+        T combine(combine_func_t f_combine) { return my_ets.combine(f_combine); }
+
+        // combine_func_t has signature void(T) or void(const T&)
+        template <typename combine_func_t>
+        void combine_each(combine_func_t f_combine) { my_ets.combine_each(f_combine); }
+
+    };
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_combinable_H_include_area
+
+#endif /* __TBB_combinable_H */
--- a/cs440-acg/ext/tbb/include/tbb/compat/condition_variable
+++ b/cs440-acg/ext/tbb/include/tbb/compat/condition_variable
@@ -0,0 +1,489 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_condition_variable_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_condition_variable_H
+#pragma message("TBB Warning: tbb/compat/condition_variable is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_condition_variable_H
+#define __TBB_condition_variable_H
+
+#define __TBB_condition_variable_H_include_area
+#include "../internal/_warning_suppress_enable_notice.h"
+
+#if _WIN32||_WIN64
+#include "../machine/windows_api.h"
+
+namespace tbb {
+namespace interface5 {
+namespace internal {
+struct condition_variable_using_event
+{
+    //! Event for blocking waiting threads.
+    HANDLE event;
+    //! Protects invariants involving n_waiters, release_count, and epoch.
+    CRITICAL_SECTION mutex;
+    //! Number of threads waiting on this condition variable
+    int n_waiters;
+    //! Number of threads remaining that should no longer wait on this condition variable.
+    int release_count;
+    //! To keep threads from waking up prematurely with earlier signals.
+    unsigned epoch;
+};
+}}} // namespace tbb::interface5::internal
+
+#ifndef CONDITION_VARIABLE_INIT
+typedef void* CONDITION_VARIABLE;
+typedef CONDITION_VARIABLE* PCONDITION_VARIABLE;
+#endif
+
+#else /* if not _WIN32||_WIN64 */
+#include <errno.h> // some systems need it for ETIMEDOUT
+#include <pthread.h>
+#if __linux__
+#include <ctime>
+#else /* generic Unix */
+#include <sys/time.h>
+#endif
+#endif /* _WIN32||_WIN64 */
+
+#include "../tbb_stddef.h"
+#include "../mutex.h"
+#include "../tbb_thread.h"
+#include "../tbb_exception.h"
+#include "../tbb_profiling.h"
+
+namespace tbb {
+
+namespace interface5 {
+
+// C++0x standard working draft 30.4.3
+// Lock tag types
+struct __TBB_DEPRECATED_IN_VERBOSE_MODE defer_lock_t { }; //! do not acquire ownership of the mutex
+struct __TBB_DEPRECATED_IN_VERBOSE_MODE try_to_lock_t { }; //! try to acquire ownership of the mutex without blocking
+struct __TBB_DEPRECATED_IN_VERBOSE_MODE adopt_lock_t { }; //! assume the calling thread has already
+__TBB_DEPRECATED_IN_VERBOSE_MODE const defer_lock_t defer_lock = {};
+__TBB_DEPRECATED_IN_VERBOSE_MODE const try_to_lock_t try_to_lock = {};
+__TBB_DEPRECATED_IN_VERBOSE_MODE const adopt_lock_t adopt_lock = {};
+
+// C++0x standard working draft 30.4.3.1
+//! lock_guard
+template<typename M>
+class __TBB_DEPRECATED_IN_VERBOSE_MODE lock_guard : tbb::internal::no_copy {
+public:
+    //! mutex type
+    typedef M mutex_type;
+
+    //! Constructor
+    /** precondition: If mutex_type is not a recursive mutex, the calling thread
+        does not own the mutex m. */
+    explicit lock_guard(mutex_type& m) : pm(m) {m.lock();}
+
+    //! Adopt_lock constructor
+    /** precondition: the calling thread owns the mutex m. */
+    lock_guard(mutex_type& m, adopt_lock_t) : pm(m) {}
+
+    //! Destructor
+    ~lock_guard() { pm.unlock(); }
+private:
+    mutex_type& pm;
+};
+
+// C++0x standard working draft 30.4.3.2
+//! unique_lock
+template<typename M>
+class __TBB_DEPRECATED_IN_VERBOSE_MODE unique_lock : tbb::internal::no_copy {
+    friend class condition_variable;
+public:
+    typedef M mutex_type;
+
+    // 30.4.3.2.1 construct/copy/destroy
+    // NB: Without constructors that take an r-value reference to a unique_lock, the following constructor is of little use.
+    //! Constructor
+    /** postcondition: pm==0 && owns==false */
+    unique_lock() : pm(NULL), owns(false) {}
+
+    //! Constructor
+    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
+        does not own the mutex m.  If the precondition is not met, a deadlock occurs.
+        postcondition: pm==&m and owns==true */
+    explicit unique_lock(mutex_type& m) : pm(&m) {m.lock(); owns=true;}
+
+    //! Defer_lock constructor
+    /** postcondition: pm==&m and owns==false */
+    unique_lock(mutex_type& m, defer_lock_t) : pm(&m), owns(false) {}
+
+    //! Try_to_lock constructor
+    /** precondition: if mutex_type is not a recursive mutex, the  calling thread
+       does not own the mutex m.  If the precondition is not met, a deadlock occurs.
+       postcondition: pm==&m and owns==res where res is the value returned by
+       the call to m.try_lock(). */
+    unique_lock(mutex_type& m, try_to_lock_t) : pm(&m) {owns = m.try_lock();}
+
+    //! Adopt_lock constructor
+    /** precondition: the calling thread owns the mutex. If it does not, mutex->unlock() would fail.
+        postcondition: pm==&m and owns==true */
+    unique_lock(mutex_type& m, adopt_lock_t) : pm(&m), owns(true) {}
+
+    //! Timed unique_lock acquisition.
+    /** To avoid requiring support for namespace chrono, this method deviates from the working draft in that
+        it uses tbb::tick_count::interval_t to specify the time duration. */
+    unique_lock(mutex_type& m, const tick_count::interval_t &i) : pm(&m) {owns = try_lock_for( i );}
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move constructor
+    /** postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
+        construction), src.pm == 0 and src.owns == false. */
+    unique_lock(unique_lock && src): pm(NULL), owns(false) {this->swap(src);}
+
+    //! Move assignment
+    /** effects: If owns calls pm->unlock().
+        Postconditions: pm == src_p.pm and owns == src_p.owns (where src_p is the state of src just prior to this
+        assignment), src.pm == 0 and src.owns == false. */
+    unique_lock& operator=(unique_lock && src) {
+        if (owns)
+            this->unlock();
+        pm = NULL;
+        this->swap(src);
+        return *this;
+    }
+#endif // __TBB_CPP11_RVALUE_REF_PRESENT
+
+    //! Destructor
+    ~unique_lock() { if( owns ) pm->unlock(); }
+
+    // 30.4.3.2.2 locking
+    //! Lock the mutex and own it.
+    void lock() {
+        if( pm ) {
+            if( !owns ) {
+                pm->lock();
+                owns = true;
+            } else
+                throw_exception_v4( tbb::internal::eid_possible_deadlock );
+        } else
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        __TBB_ASSERT( owns, NULL );
+    }
+
+    //! Try to lock the mutex.
+    /** If successful, note that this lock owns it. Otherwise, set it false. */
+    bool try_lock() {
+        if( pm ) {
+            if( !owns )
+                owns = pm->try_lock();
+            else
+                throw_exception_v4( tbb::internal::eid_possible_deadlock );
+        } else
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        return owns;
+    }
+
+    //! Try to lock the mutex.
+    bool try_lock_for( const tick_count::interval_t &i );
+
+    //! Unlock the mutex
+    /** And note that this lock no longer owns it. */
+    void unlock() {
+        if( owns ) {
+            pm->unlock();
+            owns = false;
+        } else
+            throw_exception_v4( tbb::internal::eid_operation_not_permitted );
+        __TBB_ASSERT( !owns, NULL );
+    }
+
+    // 30.4.3.2.3 modifiers
+    //! Swap the two unique locks
+    void swap(unique_lock& u) {
+        mutex_type* t_pm = u.pm;    u.pm   = pm;    pm   = t_pm;
+        bool t_owns      = u.owns;  u.owns = owns;  owns = t_owns;
+    }
+
+    //! Release control over the mutex.
+    mutex_type* release() {
+        mutex_type* o_pm = pm;
+        pm = NULL;
+        owns = false;
+        return o_pm;
+    }
+
+    // 30.4.3.2.4 observers
+    //! Does this lock own the mutex?
+    bool owns_lock() const { return owns; }
+
+    // TODO: Un-comment 'explicit' when the last non-C++0x compiler support is dropped
+    //! Does this lock own the mutex?
+    /*explicit*/ operator bool() const { return owns; }
+
+    //! Return the mutex that this lock currently has.
+    mutex_type* mutex() const { return pm; }
+
+private:
+    mutex_type* pm;
+    bool owns;
+};
+
+template<typename M>
+__TBB_DEPRECATED_IN_VERBOSE_MODE bool unique_lock<M>::try_lock_for( const tick_count::interval_t &i)
+{
+    const int unique_lock_tick = 100; /* microseconds; 0.1 milliseconds */
+    // the smallest wait-time is 0.1 milliseconds.
+    bool res = pm->try_lock();
+    int duration_in_micro;
+    if( !res && (duration_in_micro=int(i.seconds()*1e6))>unique_lock_tick ) {
+        tick_count::interval_t i_100( double(unique_lock_tick)/1e6 /* seconds */); // 100 microseconds = 0.1*10E-3
+        do {
+            this_tbb_thread::sleep(i_100); // sleep for 100 micro seconds
+            duration_in_micro -= unique_lock_tick;
+            res = pm->try_lock();
+        } while( !res && duration_in_micro>unique_lock_tick );
+    }
+    return (owns=res);
+}
+
+//! Swap the two unique locks that have the mutexes of same type
+template<typename M>
+void swap(unique_lock<M>& x, unique_lock<M>& y) { x.swap( y ); }
+
+namespace internal {
+
+#if _WIN32||_WIN64
+union condvar_impl_t {
+    condition_variable_using_event cv_event;
+    CONDITION_VARIABLE             cv_native;
+};
+void __TBB_EXPORTED_FUNC internal_initialize_condition_variable( condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_destroy_condition_variable(    condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_condition_variable_notify_one( condvar_impl_t& cv );
+void __TBB_EXPORTED_FUNC internal_condition_variable_notify_all( condvar_impl_t& cv );
+bool __TBB_EXPORTED_FUNC internal_condition_variable_wait( condvar_impl_t& cv, mutex* mtx, const tick_count::interval_t* i = NULL );
+
+#else /* if !(_WIN32||_WIN64), i.e., POSIX threads */
+typedef pthread_cond_t condvar_impl_t;
+#endif
+
+} // namespace internal
+
+//! cv_status
+/** C++0x standard working draft 30.5 */
+enum cv_status { no_timeout, timeout };
+
+//! condition variable
+/** C++0x standard working draft 30.5.1
+    @ingroup synchronization */
+class __TBB_DEPRECATED_IN_VERBOSE_MODE condition_variable : tbb::internal::no_copy {
+public:
+    //! Constructor
+    condition_variable() {
+#if _WIN32||_WIN64
+        internal_initialize_condition_variable( my_cv );
+#else
+        pthread_cond_init( &my_cv, NULL );
+#endif
+    }
+
+    //! Destructor
+    ~condition_variable() {
+        //precondition: There shall be no thread blocked on *this.
+#if _WIN32||_WIN64
+        internal_destroy_condition_variable( my_cv );
+#else
+        pthread_cond_destroy( &my_cv );
+#endif
+    }
+
+    //! Notify one thread and wake it up
+    void notify_one() {
+#if _WIN32||_WIN64
+        internal_condition_variable_notify_one( my_cv );
+#else
+        pthread_cond_signal( &my_cv );
+#endif
+    }
+
+    //! Notify all threads
+    void notify_all() {
+#if _WIN32||_WIN64
+        internal_condition_variable_notify_all( my_cv );
+#else
+        pthread_cond_broadcast( &my_cv );
+#endif
+    }
+
+    //! Release the mutex associated with the lock and wait on this condition variable
+    void wait(unique_lock<mutex>& lock);
+
+    //! Wait on this condition variable while pred is false
+    template <class Predicate>
+    void wait(unique_lock<mutex>& lock, Predicate pred) {
+        while( !pred() )
+            wait( lock );
+    }
+
+    //! Timed version of wait()
+    cv_status wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i );
+
+    //! Timed version of the predicated wait
+    /** The loop terminates when pred() returns true or when the time duration specified by rel_time (i) has elapsed. */
+    template<typename Predicate>
+    bool wait_for(unique_lock<mutex>& lock, const tick_count::interval_t &i, Predicate pred)
+    {
+        while( !pred() ) {
+            cv_status st = wait_for( lock, i );
+            if( st==timeout )
+                return pred();
+        }
+        return true;
+    }
+
+    // C++0x standard working draft. 30.2.3
+    typedef internal::condvar_impl_t* native_handle_type;
+
+    native_handle_type native_handle() { return (native_handle_type) &my_cv; }
+
+private:
+    internal::condvar_impl_t my_cv;
+};
+
+
+#if _WIN32||_WIN64
+inline void condition_variable::wait( unique_lock<mutex>& lock )
+{
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( !internal_condition_variable_wait( my_cv, lock.mutex() ) ) {
+        int ec = GetLastError();
+        // on Windows 7, SleepConditionVariableCS() may return ERROR_TIMEOUT while the doc says it returns WAIT_TIMEOUT
+        __TBB_ASSERT_EX( ec!=WAIT_TIMEOUT&&ec!=ERROR_TIMEOUT, NULL );
+        lock.owns = true;
+        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+    }
+    lock.owns = true;
+}
+
+inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
+{
+    cv_status rc = no_timeout;
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    // condvar_wait could be SleepConditionVariableCS (or SleepConditionVariableSRW) or our own pre-vista cond_var_wait()
+    if( !internal_condition_variable_wait( my_cv, lock.mutex(), &i ) ) {
+        int ec = GetLastError();
+        if( ec==WAIT_TIMEOUT || ec==ERROR_TIMEOUT )
+            rc = timeout;
+        else {
+            lock.owns = true;
+            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+        }
+    }
+    lock.owns = true;
+    return rc;
+}
+
+#else /* !(_WIN32||_WIN64) */
+inline void condition_variable::wait( unique_lock<mutex>& lock )
+{
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( pthread_cond_wait( &my_cv, lock.mutex()->native_handle() ) ) {
+        lock.owns = true;
+        throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+    }
+    // upon successful return, the mutex has been locked and is owned by the calling thread.
+    lock.owns = true;
+}
+
+inline cv_status condition_variable::wait_for( unique_lock<mutex>& lock, const tick_count::interval_t& i )
+{
+#if __linux__
+    struct timespec req;
+    double sec = i.seconds();
+    clock_gettime( CLOCK_REALTIME, &req );
+    req.tv_sec  += static_cast<long>(sec);
+    req.tv_nsec += static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
+#else /* generic Unix */
+    struct timeval tv;
+    struct timespec req;
+    double sec = i.seconds();
+    int status = gettimeofday(&tv, NULL);
+    __TBB_ASSERT_EX( status==0, "gettimeofday failed" );
+    req.tv_sec  = tv.tv_sec + static_cast<long>(sec);
+    req.tv_nsec = tv.tv_usec*1000 + static_cast<long>( (sec - static_cast<long>(sec))*1e9 );
+#endif /*(choice of OS) */
+    if( req.tv_nsec>=1e9 ) {
+        req.tv_sec  += 1;
+        req.tv_nsec -= static_cast<long int>(1e9);
+    }
+    __TBB_ASSERT( 0<=req.tv_nsec && req.tv_nsec<1e9, NULL );
+
+    int ec;
+    cv_status rc = no_timeout;
+    __TBB_ASSERT( lock.owns, NULL );
+    lock.owns = false;
+    if( ( ec=pthread_cond_timedwait( &my_cv, lock.mutex()->native_handle(), &req ) ) ) {
+        if( ec==ETIMEDOUT )
+            rc = timeout;
+        else {
+            __TBB_ASSERT( lock.try_lock()==false, NULL );
+            lock.owns = true;
+            throw_exception_v4( tbb::internal::eid_condvar_wait_failed );
+        }
+    }
+    lock.owns = true;
+    return rc;
+}
+#endif /* !(_WIN32||_WIN64) */
+
+} // namespace interface5
+
+__TBB_DEFINE_PROFILING_SET_NAME(interface5::condition_variable)
+
+} // namespace tbb
+
+#if TBB_IMPLEMENT_CPP0X
+
+namespace std {
+
+using tbb::interface5::defer_lock_t;
+using tbb::interface5::try_to_lock_t;
+using tbb::interface5::adopt_lock_t;
+using tbb::interface5::defer_lock;
+using tbb::interface5::try_to_lock;
+using tbb::interface5::adopt_lock;
+using tbb::interface5::lock_guard;
+using tbb::interface5::unique_lock;
+using tbb::interface5::swap;   /* this is for void std::swap(unique_lock<M>&,unique_lock<M>&) */
+using tbb::interface5::condition_variable;
+using tbb::interface5::cv_status;
+using tbb::interface5::timeout;
+using tbb::interface5::no_timeout;
+
+} // namespace std
+
+#endif /* TBB_IMPLEMENT_CPP0X */
+
+#include "../internal/_warning_suppress_disable_notice.h"
+#undef __TBB_condition_variable_H_include_area
+
+#endif /* __TBB_condition_variable_H */
--- a/cs440-acg/ext/tbb/include/tbb/compat/ppl.h
+++ b/cs440-acg/ext/tbb/include/tbb/compat/ppl.h
@@ -0,0 +1,75 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_ppl_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_ppl_H
+#pragma message("TBB Warning: tbb/compat/ppl.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_compat_ppl_H
+#define __TBB_compat_ppl_H
+
+#define __TBB_ppl_H_include_area
+#include "../internal/_warning_suppress_enable_notice.h"
+
+#include "../task_group.h"
+#include "../parallel_invoke.h"
+#include "../parallel_for_each.h"
+#include "../parallel_for.h"
+#include "../tbb_exception.h"
+#include "../critical_section.h"
+#include "../reader_writer_lock.h"
+#include "../combinable.h"
+
+namespace Concurrency {
+
+#if __TBB_TASK_GROUP_CONTEXT
+    using tbb::task_handle;
+    using tbb::task_group_status;
+    using tbb::task_group;
+    using tbb::structured_task_group;
+    using tbb::invalid_multiple_scheduling;
+    using tbb::missing_wait;
+    using tbb::make_task;
+
+    using tbb::not_complete;
+    using tbb::complete;
+    using tbb::canceled;
+
+    using tbb::is_current_task_group_canceling;
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+    using tbb::parallel_invoke;
+    using tbb::strict_ppl::parallel_for;
+    using tbb::parallel_for_each;
+    using tbb::critical_section;
+    using tbb::reader_writer_lock;
+    using tbb::combinable;
+
+    using tbb::improper_lock;
+
+} // namespace Concurrency
+
+#include "../internal/_warning_suppress_disable_notice.h"
+#undef __TBB_ppl_H_include_area
+
+#endif /* __TBB_compat_ppl_H */
--- a/cs440-acg/ext/tbb/include/tbb/compat/thread
+++ b/cs440-acg/ext/tbb/include/tbb/compat/thread
@@ -0,0 +1,73 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_thread_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_thread_H
+#pragma message("TBB Warning: tbb/compat/thread is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_thread_H
+#define __TBB_thread_H
+
+#define __TBB_thread_H_include_area
+#include "../internal/_warning_suppress_enable_notice.h"
+
+#include "../tbb_config.h"
+
+#if TBB_IMPLEMENT_CPP0X
+
+#include "../tbb_thread.h"
+
+namespace std {
+
+typedef tbb::tbb_thread thread;
+
+namespace this_thread {
+    using tbb::this_tbb_thread::get_id;
+    using tbb::this_tbb_thread::yield;
+
+    __TBB_DEPRECATED_IN_VERBOSE_MODE inline void sleep_for(const tbb::tick_count::interval_t& rel_time) {
+        tbb::internal::thread_sleep_v3( rel_time );
+    }
+}
+
+} // namespace std
+
+#else /* TBB_IMPLEMENT_CPP0X */
+
+#define __TBB_COMPAT_THREAD_RECURSION_PROTECTOR 1
+#include <thread>
+#undef __TBB_COMPAT_THREAD_RECURSION_PROTECTOR
+
+#endif /* TBB_IMPLEMENT_CPP0X */
+
+#include "../internal/_warning_suppress_disable_notice.h"
+#undef __TBB_thread_H_include_area
+
+#else /* __TBB_thread_H */
+
+#if __TBB_COMPAT_THREAD_RECURSION_PROTECTOR
+#error The tbb/compat/thread header attempts to include itself. \
+       Please make sure that {TBBROOT}/include/tbb/compat is NOT in include paths.
+#endif
+
+#endif /* __TBB_thread_H */
--- a/cs440-acg/ext/tbb/include/tbb/compat/tuple
+++ b/cs440-acg/ext/tbb/include/tbb/compat/tuple
@@ -0,0 +1,501 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_tuple_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_tuple_H
+#pragma message("TBB Warning: tbb/compat/tuple is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_tuple_H
+#define __TBB_tuple_H
+
+#define __TBB_tuple_H_include_area
+#include "../internal/_warning_suppress_enable_notice.h"
+
+#include <utility>
+#include "../tbb_stddef.h"
+
+// build preprocessor variables for varying number of arguments
+// Need the leading comma so the empty __TBB_T_PACK will not cause a syntax error.
+#if __TBB_VARIADIC_MAX <= 5
+#define __TBB_T_PACK
+#define __TBB_U_PACK
+#define __TBB_TYPENAME_T_PACK
+#define __TBB_TYPENAME_U_PACK
+#define __TBB_NULL_TYPE_PACK
+#define __TBB_REF_T_PARAM_PACK
+#define __TBB_CONST_REF_T_PARAM_PACK
+#define __TBB_T_PARAM_LIST_PACK
+#define __TBB_CONST_NULL_REF_PACK
+//
+#elif __TBB_VARIADIC_MAX == 6
+#define __TBB_T_PACK ,__T5
+#define __TBB_U_PACK ,__U5
+#define __TBB_TYPENAME_T_PACK , typename __T5
+#define __TBB_TYPENAME_U_PACK , typename __U5
+#define __TBB_NULL_TYPE_PACK , null_type
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5
+#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5
+#define __TBB_T_PARAM_LIST_PACK ,t5
+#define __TBB_CONST_NULL_REF_PACK , const null_type&
+//
+#elif __TBB_VARIADIC_MAX == 7
+#define __TBB_T_PACK ,__T5, __T6
+#define __TBB_U_PACK ,__U5, __U6
+#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6
+#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6
+#define __TBB_NULL_TYPE_PACK , null_type, null_type
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6
+#define __TBB_CONST_REF_T_PARAM_PACK ,const __T5& t5, const __T6& t6
+#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6
+#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&
+//
+#elif __TBB_VARIADIC_MAX == 8
+#define __TBB_T_PACK ,__T5, __T6, __T7
+#define __TBB_U_PACK ,__U5, __U6, __U7
+#define __TBB_TYPENAME_T_PACK , typename __T5 , typename __T6, typename __T7
+#define __TBB_TYPENAME_U_PACK , typename __U5 , typename __U6, typename __U7
+#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7
+#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7
+#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&
+//
+#elif __TBB_VARIADIC_MAX == 9
+#define __TBB_T_PACK ,__T5, __T6, __T7, __T8
+#define __TBB_U_PACK ,__U5, __U6, __U7, __U8
+#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8
+#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8
+#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8
+#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8
+#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&
+//
+#elif __TBB_VARIADIC_MAX >= 10
+#define __TBB_T_PACK ,__T5, __T6, __T7, __T8, __T9
+#define __TBB_U_PACK ,__U5, __U6, __U7, __U8, __U9
+#define __TBB_TYPENAME_T_PACK , typename __T5, typename __T6, typename __T7, typename __T8, typename __T9
+#define __TBB_TYPENAME_U_PACK , typename __U5, typename __U6, typename __U7, typename __U8, typename __U9
+#define __TBB_NULL_TYPE_PACK , null_type, null_type, null_type, null_type, null_type
+#define __TBB_REF_T_PARAM_PACK ,__T5& t5, __T6& t6, __T7& t7, __T8& t8, __T9& t9
+#define __TBB_CONST_REF_T_PARAM_PACK , const __T5& t5, const __T6& t6, const __T7& t7, const __T8& t8, const __T9& t9
+#define __TBB_T_PARAM_LIST_PACK ,t5 ,t6 ,t7 ,t8 ,t9
+#define __TBB_CONST_NULL_REF_PACK , const null_type&, const null_type&, const null_type&, const null_type&, const null_type&
+#endif
+
+
+
+namespace tbb {
+namespace interface5 {
+
+namespace internal {
+struct null_type { };
+}
+using internal::null_type;
+
+// tuple forward declaration
+template <typename __T0=null_type, typename __T1=null_type, typename __T2=null_type,
+          typename __T3=null_type, typename __T4=null_type
+#if __TBB_VARIADIC_MAX >= 6
+, typename __T5=null_type
+#if __TBB_VARIADIC_MAX >= 7
+, typename __T6=null_type
+#if __TBB_VARIADIC_MAX >= 8
+, typename __T7=null_type
+#if __TBB_VARIADIC_MAX >= 9
+, typename __T8=null_type
+#if __TBB_VARIADIC_MAX >= 10
+, typename __T9=null_type
+#endif
+#endif
+#endif
+#endif
+#endif
+>
+class tuple;
+
+namespace internal {
+
+// const null_type temp
+inline const null_type cnull() { return null_type(); }
+
+// cons forward declaration
+template <typename __HT, typename __TT> struct cons;
+
+// type of a component of the cons
+template<int __N, typename __T>
+struct component {
+    typedef typename __T::tail_type next;
+    typedef typename component<__N-1,next>::type type;
+};
+
+template<typename __T>
+struct component<0,__T> {
+    typedef typename __T::head_type type;
+};
+
+template<>
+struct component<0,null_type> {
+    typedef null_type type;
+};
+
+// const version of component
+
+template<int __N, typename __T>
+struct component<__N, const __T>
+{
+    typedef typename __T::tail_type next;
+    typedef const typename component<__N-1,next>::type type;
+};
+
+template<typename __T>
+struct component<0, const __T>
+{
+    typedef const typename __T::head_type type;
+};
+
+
+// helper class for getting components of cons
+template< int __N>
+struct get_helper {
+template<typename __HT, typename __TT>
+inline static typename component<__N, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
+    return get_helper<__N-1>::get(ti.tail);
+}
+template<typename __HT, typename __TT>
+inline static typename component<__N, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
+    return get_helper<__N-1>::get(ti.tail);
+}
+};
+
+template<>
+struct get_helper<0> {
+template<typename __HT, typename __TT>
+inline static typename component<0, cons<__HT,__TT> >::type& get(cons<__HT,__TT>& ti) {
+    return ti.head;
+}
+template<typename __HT, typename __TT>
+inline static typename component<0, cons<__HT,__TT> >::type const& get(const cons<__HT,__TT>& ti) {
+    return ti.head;
+}
+};
+
+// traits adaptor
+template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK>
+struct tuple_traits {
+    typedef cons <__T0, typename tuple_traits<__T1, __T2, __T3, __T4 __TBB_T_PACK , null_type>::U > U;
+};
+
+template <typename __T0>
+struct tuple_traits<__T0, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
+    typedef cons<__T0, null_type> U;
+};
+
+template<>
+struct tuple_traits<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > {
+    typedef null_type U;
+};
+
+
+// core cons defs
+template <typename __HT, typename __TT>
+struct cons{
+
+    typedef __HT head_type;
+    typedef __TT tail_type;
+
+    head_type head;
+    tail_type tail;
+
+    static const int length = 1 + tail_type::length;
+
+    // default constructors
+    explicit cons() : head(), tail() { }
+
+    // non-default constructors
+    cons(head_type& h, const tail_type& t) : head(h), tail(t) { }
+
+    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+    cons(const __T0& t0, const __T1& t1, const __T2& t2, const __T3& t3, const __T4& t4 __TBB_CONST_REF_T_PARAM_PACK) :
+        head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK, cnull()) { }
+
+    template <typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+    cons(__T0& t0, __T1& t1, __T2& t2, __T3& t3, __T4& t4 __TBB_REF_T_PARAM_PACK) :
+        head(t0), tail(t1, t2, t3, t4 __TBB_T_PARAM_LIST_PACK , cnull()) { }
+
+    template <typename __HT1, typename __TT1>
+    cons(const cons<__HT1,__TT1>& other) : head(other.head), tail(other.tail) { }
+
+    cons& operator=(const cons& other) { head = other.head; tail = other.tail; return *this; }
+
+    friend bool operator==(const cons& me, const cons& other) {
+        return me.head == other.head && me.tail == other.tail;
+    }
+    friend bool operator<(const cons& me, const cons& other)  {
+        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
+    }
+    friend bool operator>(const cons& me, const cons& other)  { return other<me; }
+    friend bool operator!=(const cons& me, const cons& other) { return !(me==other); }
+    friend bool operator>=(const cons& me, const cons& other) { return !(me<other); }
+    friend bool operator<=(const cons& me, const cons& other) { return !(me>other); }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator==(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
+        return me.head == other.head && me.tail == other.tail;
+    }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator<(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) {
+        return me.head < other.head || (!(other.head < me.head) && me.tail < other.tail);
+    }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator>(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return other<me; }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator!=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me==other); }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator>=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me<other); }
+
+    template<typename __HT1, typename __TT1>
+    friend bool operator<=(const cons<__HT,__TT>& me, const cons<__HT1,__TT1>& other) { return !(me>other); }
+
+
+};  // cons
+
+
+template <typename __HT>
+struct cons<__HT,null_type> {
+
+    typedef __HT head_type;
+    typedef null_type tail_type;
+
+    head_type head;
+
+    static const int length = 1;
+
+    // default constructor
+    cons() : head() { /*std::cout << "default constructor 1\n";*/ }
+
+    cons(const null_type&, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head() { /*std::cout << "default constructor 2\n";*/ }
+
+    // non-default constructor
+    template<typename __T1>
+    cons(__T1& t1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t1) { /*std::cout << "non-default a1, t1== " << t1 << "\n";*/}
+
+    cons(head_type& h, const null_type& = null_type() ) : head(h) { }
+    cons(const head_type& t0, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(t0) { }
+
+    // converting constructor
+    template<typename __HT1>
+    cons(__HT1 h1, const null_type&, const null_type&, const null_type&, const null_type& __TBB_CONST_NULL_REF_PACK) : head(h1) { }
+
+    // copy constructor
+    template<typename __HT1>
+    cons( const cons<__HT1, null_type>& other) : head(other.head) { }
+
+    // assignment operator
+    cons& operator=(const cons& other) { head = other.head; return *this; }
+
+    friend bool operator==(const cons& me, const cons& other) { return me.head == other.head; }
+    friend bool operator<(const cons& me, const cons& other) { return me.head < other.head; }
+    friend bool operator>(const cons& me, const cons& other) { return other<me; }
+    friend bool operator!=(const cons& me, const cons& other) {return !(me==other); }
+    friend bool operator<=(const cons& me, const cons& other) {return !(me>other); }
+    friend bool operator>=(const cons& me, const cons& other) {return !(me<other); }
+
+    template<typename __HT1>
+    friend bool operator==(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
+        return me.head == other.head;
+    }
+
+    template<typename __HT1>
+    friend bool operator<(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) {
+        return me.head < other.head;
+    }
+
+    template<typename __HT1>
+    friend bool operator>(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return other<me; }
+
+    template<typename __HT1>
+    friend bool operator!=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me==other); }
+
+    template<typename __HT1>
+    friend bool operator<=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me>other); }
+
+    template<typename __HT1>
+    friend bool operator>=(const cons<__HT,null_type>& me, const cons<__HT1,null_type>& other) { return !(me<other); }
+
+};  // cons
+
+template <>
+struct cons<null_type,null_type> { typedef null_type tail_type; static const int length = 0; };
+
+// wrapper for default constructor
+template<typename __T>
+inline const __T wrap_dcons(__T*) { return __T(); }
+
+} // namespace internal
+
+// tuple definition
+template<typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+class __TBB_DEPRECATED_IN_VERBOSE_MODE tuple : public internal::tuple_traits<__T0, __T1, __T2, __T3, __T4 __TBB_T_PACK >::U {
+    // friends
+    template <typename __T> friend class tuple_size;
+    template<int __N, typename __T> friend struct tuple_element;
+
+    // stl components
+    typedef tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > value_type;
+    typedef value_type *pointer;
+    typedef const value_type *const_pointer;
+    typedef value_type &reference;
+    typedef const value_type &const_reference;
+    typedef size_t size_type;
+
+    typedef typename internal::tuple_traits<__T0,__T1,__T2,__T3, __T4 __TBB_T_PACK >::U my_cons;
+
+public:
+    __TBB_DEPRECATED_IN_VERBOSE_MODE tuple(const __T0& t0=internal::wrap_dcons((__T0*)NULL)
+          ,const __T1& t1=internal::wrap_dcons((__T1*)NULL)
+          ,const __T2& t2=internal::wrap_dcons((__T2*)NULL)
+          ,const __T3& t3=internal::wrap_dcons((__T3*)NULL)
+          ,const __T4& t4=internal::wrap_dcons((__T4*)NULL)
+#if __TBB_VARIADIC_MAX >= 6
+          ,const __T5& t5=internal::wrap_dcons((__T5*)NULL)
+#if __TBB_VARIADIC_MAX >= 7
+          ,const __T6& t6=internal::wrap_dcons((__T6*)NULL)
+#if __TBB_VARIADIC_MAX >= 8
+          ,const __T7& t7=internal::wrap_dcons((__T7*)NULL)
+#if __TBB_VARIADIC_MAX >= 9
+          ,const __T8& t8=internal::wrap_dcons((__T8*)NULL)
+#if __TBB_VARIADIC_MAX >= 10
+          ,const __T9& t9=internal::wrap_dcons((__T9*)NULL)
+#endif
+#endif
+#endif
+#endif
+#endif
+          ) :
+        my_cons(t0,t1,t2,t3,t4 __TBB_T_PARAM_LIST_PACK) { }
+
+    template<int __N>
+    struct internal_tuple_element {
+        typedef typename internal::component<__N,my_cons>::type type;
+    };
+
+    template<int __N>
+    typename internal_tuple_element<__N>::type& get() { return internal::get_helper<__N>::get(*this); }
+
+    template<int __N>
+    typename internal_tuple_element<__N>::type const& get() const { return internal::get_helper<__N>::get(*this); }
+
+    template<typename __U1, typename __U2>
+    tuple& operator=(const internal::cons<__U1,__U2>& other) {
+        my_cons::operator=(other);
+        return *this;
+    }
+
+    template<typename __U1, typename __U2>
+    tuple& operator=(const std::pair<__U1,__U2>& other) {
+        // __TBB_ASSERT(tuple_size<value_type>::value == 2, "Invalid size for pair to tuple assignment");
+        this->head = other.first;
+        this->tail.head = other.second;
+        return *this;
+    }
+
+    friend bool operator==(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)==(other);}
+    friend bool operator<(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)<(other);}
+    friend bool operator>(const tuple& me,  const tuple& other) {return static_cast<const my_cons &>(me)>(other);}
+    friend bool operator!=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)!=(other);}
+    friend bool operator>=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)>=(other);}
+    friend bool operator<=(const tuple& me, const tuple& other) {return static_cast<const my_cons &>(me)<=(other);}
+
+};  // tuple
+
+// empty tuple
+template<>
+class __TBB_DEPRECATED_IN_VERBOSE_MODE tuple<null_type, null_type, null_type, null_type, null_type __TBB_NULL_TYPE_PACK > : public null_type {
+};
+
+// helper classes
+
+template < typename __T>
+class tuple_size {
+public:
+    static const size_t value = 1 + tuple_size<typename __T::tail_type>::value;
+};
+
+template <>
+class tuple_size<tuple<> > {
+public:
+    static const size_t value = 0;
+};
+
+template <>
+class tuple_size<null_type> {
+public:
+    static const size_t value = 0;
+};
+
+template<int __N, typename __T>
+struct tuple_element {
+    typedef typename internal::component<__N, typename __T::my_cons>::type type;
+};
+
+template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type&
+    get(tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
+
+template<int __N, typename __T0, typename __T1, typename __T2, typename __T3, typename __T4 __TBB_TYPENAME_T_PACK >
+inline static typename tuple_element<__N,tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK > >::type const&
+    get(const tuple<__T0,__T1,__T2,__T3,__T4 __TBB_T_PACK >& t) { return internal::get_helper<__N>::get(t); }
+
+}  // interface5
+} // tbb
+
+#if !__TBB_CPP11_TUPLE_PRESENT
+namespace tbb {
+    namespace flow {
+        using tbb::interface5::tuple;
+        using tbb::interface5::tuple_size;
+        using tbb::interface5::tuple_element;
+        using tbb::interface5::get;
+    }
+}
+#endif
+
+#undef __TBB_T_PACK
+#undef __TBB_U_PACK
+#undef __TBB_TYPENAME_T_PACK
+#undef __TBB_TYPENAME_U_PACK
+#undef __TBB_NULL_TYPE_PACK
+#undef __TBB_REF_T_PARAM_PACK
+#undef __TBB_CONST_REF_T_PARAM_PACK
+#undef __TBB_T_PARAM_LIST_PACK
+#undef __TBB_CONST_NULL_REF_PACK
+
+#include "../internal/_warning_suppress_disable_notice.h"
+#undef __TBB_tuple_H_include_area
+
+#endif /* __TBB_tuple_H */
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_hash_map.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_hash_map.h
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_lru_cache.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_lru_cache.h
@@ -0,0 +1,290 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_lru_cache_H
+#define __TBB_concurrent_lru_cache_H
+
+#define __TBB_concurrent_lru_cache_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if ! TBB_PREVIEW_CONCURRENT_LRU_CACHE
+    #error Set TBB_PREVIEW_CONCURRENT_LRU_CACHE to include concurrent_lru_cache.h
+#endif
+
+#include "tbb_stddef.h"
+
+#include <map>
+#include <list>
+#include <algorithm> // std::find
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+#include <utility> // std::move
+#endif
+
+#include "atomic.h"
+#include "internal/_aggregator_impl.h"
+
+namespace tbb{
+namespace interface6 {
+
+
+template <typename key_type, typename value_type, typename value_functor_type = value_type (*)(key_type) >
+class concurrent_lru_cache : internal::no_assign{
+private:
+    typedef concurrent_lru_cache self_type;
+    typedef value_functor_type value_function_type;
+    typedef std::size_t ref_counter_type;
+    struct map_value_type;
+    typedef std::map<key_type, map_value_type> map_storage_type;
+    typedef std::list<typename map_storage_type::iterator> lru_list_type;
+    struct map_value_type {
+        value_type my_value;
+        ref_counter_type my_ref_counter;
+        typename lru_list_type::iterator my_lru_list_iterator;
+        bool my_is_ready;
+
+        map_value_type (value_type const& a_value,  ref_counter_type a_ref_counter,    typename lru_list_type::iterator a_lru_list_iterator, bool a_is_ready)
+            : my_value(a_value), my_ref_counter(a_ref_counter), my_lru_list_iterator (a_lru_list_iterator), my_is_ready(a_is_ready)
+        {}
+    };
+
+    class handle_object;
+
+    struct aggregator_operation;
+    typedef aggregator_operation aggregated_operation_type;
+    typedef tbb::internal::aggregating_functor<self_type,aggregated_operation_type> aggregator_function_type;
+    friend class tbb::internal::aggregating_functor<self_type,aggregated_operation_type>;
+    typedef tbb::internal::aggregator<aggregator_function_type, aggregated_operation_type> aggregator_type;
+
+private:
+    value_function_type my_value_function;
+    std::size_t const my_number_of_lru_history_items;
+    map_storage_type my_map_storage;
+    lru_list_type my_lru_list;
+    aggregator_type my_aggregator;
+
+public:
+    typedef handle_object handle;
+
+public:
+    concurrent_lru_cache(value_function_type f, std::size_t number_of_lru_history_items)
+        : my_value_function(f),my_number_of_lru_history_items(number_of_lru_history_items)
+    {
+        my_aggregator.initialize_handler(aggregator_function_type(this));
+    }
+
+    handle_object operator[](key_type k){
+        retrieve_aggregator_operation op(k);
+        my_aggregator.execute(&op);
+        if (op.is_new_value_needed()){
+             op.result().second.my_value = my_value_function(k);
+             __TBB_store_with_release(op.result().second.my_is_ready, true);
+        }else{
+            tbb::internal::spin_wait_while_eq(op.result().second.my_is_ready,false);
+        }
+        return handle_object(*this,op.result());
+    }
+private:
+    void signal_end_of_usage(typename map_storage_type::reference value_ref){
+        signal_end_of_usage_aggregator_operation op(value_ref);
+        my_aggregator.execute(&op);
+    }
+
+private:
+#if !__TBB_CPP11_RVALUE_REF_PRESENT
+    struct handle_move_t:no_assign{
+        concurrent_lru_cache & my_cache_ref;
+        typename map_storage_type::reference my_map_record_ref;
+        handle_move_t(concurrent_lru_cache & cache_ref, typename map_storage_type::reference value_ref):my_cache_ref(cache_ref),my_map_record_ref(value_ref) {};
+    };
+#endif
+    class handle_object {
+        concurrent_lru_cache * my_cache_pointer;
+        typename map_storage_type::pointer my_map_record_ptr;
+    public:
+        handle_object() : my_cache_pointer(), my_map_record_ptr() {}
+        handle_object(concurrent_lru_cache& cache_ref, typename map_storage_type::reference value_ref) : my_cache_pointer(&cache_ref), my_map_record_ptr(&value_ref) {}
+        operator bool() const {
+            return (my_cache_pointer && my_map_record_ptr);
+        }
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        // TODO: add check for double moved objects by special dedicated field
+        handle_object(handle_object&& src) : my_cache_pointer(src.my_cache_pointer), my_map_record_ptr(src.my_map_record_ptr) {
+            __TBB_ASSERT((src.my_cache_pointer && src.my_map_record_ptr) || (!src.my_cache_pointer && !src.my_map_record_ptr), "invalid state of moving object?");
+            src.my_cache_pointer = NULL;
+            src.my_map_record_ptr = NULL;
+        }
+        handle_object& operator=(handle_object&& src) {
+            __TBB_ASSERT((src.my_cache_pointer && src.my_map_record_ptr) || (!src.my_cache_pointer && !src.my_map_record_ptr), "invalid state of moving object?");
+            if (my_cache_pointer) {
+                my_cache_pointer->signal_end_of_usage(*my_map_record_ptr);
+            }
+            my_cache_pointer = src.my_cache_pointer;
+            my_map_record_ptr = src.my_map_record_ptr;
+            src.my_cache_pointer = NULL;
+            src.my_map_record_ptr = NULL;
+            return *this;
+        }
+#else
+        handle_object(handle_move_t m) : my_cache_pointer(&m.my_cache_ref), my_map_record_ptr(&m.my_map_record_ref) {}
+        handle_object& operator=(handle_move_t m) {
+            if (my_cache_pointer) {
+                my_cache_pointer->signal_end_of_usage(*my_map_record_ptr);
+            }
+            my_cache_pointer = &m.my_cache_ref;
+            my_map_record_ptr = &m.my_map_record_ref;
+            return *this;
+        }
+        operator handle_move_t(){
+            return move(*this);
+        }
+#endif // __TBB_CPP11_RVALUE_REF_PRESENT
+        value_type& value(){
+            __TBB_ASSERT(my_cache_pointer,"get value from already moved object?");
+            __TBB_ASSERT(my_map_record_ptr,"get value from an invalid or already moved object?");
+            return my_map_record_ptr->second.my_value;
+        }
+        ~handle_object(){
+            if (my_cache_pointer){
+                my_cache_pointer->signal_end_of_usage(*my_map_record_ptr);
+            }
+        }
+    private:
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        // For source compatibility with C++03
+        friend handle_object&& move(handle_object& h){
+            return std::move(h);
+        }
+#else
+        friend handle_move_t move(handle_object& h){
+            return handle_object::move(h);
+        }
+        // TODO: add check for double moved objects by special dedicated field
+        static handle_move_t move(handle_object& h){
+            __TBB_ASSERT((h.my_cache_pointer && h.my_map_record_ptr) || (!h.my_cache_pointer && !h.my_map_record_ptr), "invalid state of moving object?");
+            concurrent_lru_cache * cache_pointer = h.my_cache_pointer;
+            typename map_storage_type::pointer map_record_ptr = h.my_map_record_ptr;
+            h.my_cache_pointer = NULL;
+            h.my_map_record_ptr = NULL;
+            return handle_move_t(*cache_pointer, *map_record_ptr);
+        }
+#endif // __TBB_CPP11_RVALUE_REF_PRESENT
+    private:
+        void operator=(handle_object&);
+#if __SUNPRO_CC
+    // Presumably due to a compiler error, private copy constructor
+    // breaks expressions like handle h = cache[key];
+    public:
+#endif
+        handle_object(handle_object &);
+    };
+private:
+    //TODO: looks like aggregator_operation is a perfect match for statically typed variant type
+    struct aggregator_operation : tbb::internal::aggregated_operation<aggregator_operation>{
+        enum e_op_type {op_retive, op_signal_end_of_usage};
+        //TODO: try to use pointer to function apply_visitor here
+        //TODO: try virtual functions and measure the difference
+        e_op_type my_operation_type;
+        aggregator_operation(e_op_type operation_type): my_operation_type(operation_type) {}
+        void cast_and_handle(self_type& container ){
+            if (my_operation_type==op_retive){
+                static_cast<retrieve_aggregator_operation*>(this)->handle(container);
+            }else{
+                static_cast<signal_end_of_usage_aggregator_operation*>(this)->handle(container);
+            }
+        }
+    };
+    struct retrieve_aggregator_operation : aggregator_operation, private internal::no_assign {
+        key_type my_key;
+        typename map_storage_type::pointer my_result_map_record_pointer;
+        bool my_is_new_value_needed;
+        retrieve_aggregator_operation(key_type key):aggregator_operation(aggregator_operation::op_retive),my_key(key),my_is_new_value_needed(false){}
+        void handle(self_type& container ){
+            my_result_map_record_pointer = & container.retrieve_serial(my_key,my_is_new_value_needed);
+        }
+        typename map_storage_type::reference result(){ return * my_result_map_record_pointer; }
+        bool is_new_value_needed(){return my_is_new_value_needed;}
+    };
+    struct signal_end_of_usage_aggregator_operation : aggregator_operation, private internal::no_assign {
+        typename map_storage_type::reference my_map_record_ref;
+        signal_end_of_usage_aggregator_operation(typename map_storage_type::reference map_record_ref):aggregator_operation(aggregator_operation::op_signal_end_of_usage),my_map_record_ref(map_record_ref){}
+        void handle(self_type& container ){
+            container.signal_end_of_usage_serial(my_map_record_ref);
+        }
+    };
+
+private:
+   void handle_operations(aggregator_operation* op_list){
+       while(op_list){
+           op_list->cast_and_handle(*this);
+           aggregator_operation* tmp = op_list;
+           op_list=op_list->next;
+           tbb::internal::itt_store_word_with_release(tmp->status, uintptr_t(1));
+       }
+   }
+
+private:
+   typename map_storage_type::reference retrieve_serial(key_type k, bool& is_new_value_needed){
+        typename map_storage_type::iterator it = my_map_storage.find(k);
+        if (it == my_map_storage.end()){
+            it = my_map_storage.insert(it,std::make_pair(k,map_value_type(value_type(),0,my_lru_list.end(),false)));
+            is_new_value_needed = true;
+        }else {
+            typename lru_list_type::iterator list_it = it->second.my_lru_list_iterator;
+            if (list_it!=my_lru_list.end()) {
+                __TBB_ASSERT(!it->second.my_ref_counter,"item to be evicted should not have a live references");
+                //item is going to be used. Therefore it is not a subject for eviction
+                //so - remove it from LRU history.
+                my_lru_list.erase(list_it);
+                it->second.my_lru_list_iterator= my_lru_list.end();
+            }
+        }
+        ++(it->second.my_ref_counter);
+        return *it;
+    }
+
+    void signal_end_of_usage_serial(typename map_storage_type::reference map_record_ref){
+        typename map_storage_type::iterator it = my_map_storage.find(map_record_ref.first);
+        __TBB_ASSERT(it!=my_map_storage.end(),"cache should not return past-end iterators to outer world");
+        __TBB_ASSERT(&(*it) == &map_record_ref,"dangling reference has been returned to outside world? data race ?");
+        __TBB_ASSERT( my_lru_list.end()== std::find(my_lru_list.begin(),my_lru_list.end(),it),
+                "object in use should not be in list of unused objects ");
+        if (! --(it->second.my_ref_counter)){
+            //it was the last reference so put it to the LRU history
+            if (my_lru_list.size()>=my_number_of_lru_history_items){
+                //evict items in order to get a space
+                size_t number_of_elements_to_evict = 1 + my_lru_list.size() - my_number_of_lru_history_items;
+                for (size_t i=0; i<number_of_elements_to_evict; ++i){
+                    typename map_storage_type::iterator it_to_evict = my_lru_list.back();
+                    __TBB_ASSERT(!it_to_evict->second.my_ref_counter,"item to be evicted should not have a live references");
+                    my_lru_list.pop_back();
+                    my_map_storage.erase(it_to_evict);
+                }
+            }
+            my_lru_list.push_front(it);
+            it->second.my_lru_list_iterator = my_lru_list.begin();
+        }
+    }
+};
+} // namespace interface6
+
+using interface6::concurrent_lru_cache;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_lru_cache_H_include_area
+
+#endif //__TBB_concurrent_lru_cache_H
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_map.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_map.h
@@ -0,0 +1,389 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_map_H
+#define __TBB_concurrent_map_H
+
+#define __TBB_concurrent_map_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if !TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS
+#error Set TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS to include concurrent_map.h
+#endif
+
+#include "tbb_config.h"
+
+// concurrent_map requires C++11 support
+#if __TBB_CONCURRENT_ORDERED_CONTAINERS_PRESENT
+
+#include "internal/_concurrent_skip_list_impl.h"
+
+namespace tbb {
+
+namespace interface10 {
+
+template<typename Key, typename Value, typename KeyCompare, typename RandomGenerator,
+         size_t MAX_LEVELS, typename Allocator, bool AllowMultimapping>
+class map_traits {
+public:
+    static constexpr size_t MAX_LEVEL = MAX_LEVELS;
+    using random_level_generator_type = RandomGenerator;
+    using key_type = Key;
+    using mapped_type = Value;
+    using compare_type = KeyCompare;
+    using value_type = std::pair<const key_type, mapped_type>;
+    using reference = value_type&;
+    using const_reference = const value_type&;
+    using allocator_type = Allocator;
+    using mutex_type = tbb::spin_mutex;
+    using node_type = tbb::internal::node_handle<key_type, value_type, internal::skip_list_node<value_type, mutex_type>, allocator_type>;
+
+    static const bool allow_multimapping = AllowMultimapping;
+
+    class value_compare {
+    public:
+        // TODO: these member types are deprecated in C++17, do we need to let them
+        using result_type = bool;
+        using first_argument_type = value_type;
+        using second_argument_type = value_type;
+
+        bool operator()(const value_type& lhs, const value_type& rhs) const {
+            return comp(lhs.first, rhs.first);
+        }
+
+    protected:
+        value_compare(compare_type c) : comp(c) {}
+
+        friend class map_traits;
+
+        compare_type comp;
+    };
+
+    static value_compare value_comp(compare_type comp) { return value_compare(comp); }
+
+    static const key_type& get_key(const_reference val) {
+        return val.first;
+    }
+}; // class map_traits
+
+template <typename Key, typename Value, typename Comp, typename Allocator>
+class concurrent_multimap;
+
+template <typename Key, typename Value, typename Comp = std::less<Key>, typename Allocator = tbb_allocator<std::pair<const Key, Value>>>
+class concurrent_map
+    : public internal::concurrent_skip_list<map_traits<Key, Value, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, false>> {
+    using traits_type = map_traits<Key, Value, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, false>;
+    using base_type = internal::concurrent_skip_list<traits_type>;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using base_type::allow_multimapping;
+public:
+    using key_type = Key;
+    using mapped_type = Value;
+    using value_type = typename traits_type::value_type;
+    using size_type = typename base_type::size_type;
+    using difference_type = typename base_type::difference_type;
+    using key_compare = Comp;
+    using value_compare = typename base_type::value_compare;
+    using allocator_type = Allocator;
+
+    using reference = typename base_type::reference;
+    using const_reference = typename base_type::const_reference;
+    using pointer = typename base_type::pointer;
+    using const_pointer = typename base_type::pointer;
+
+    using iterator = typename base_type::iterator;
+    using const_iterator = typename base_type::const_iterator;
+    using reverse_iterator = typename base_type::reverse_iterator;
+    using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+    using node_type = typename base_type::node_type;
+
+    using base_type::end;
+    using base_type::find;
+    using base_type::emplace;
+    using base_type::insert;
+
+    concurrent_map() = default;
+
+    explicit concurrent_map(const key_compare& comp, const allocator_type& alloc = allocator_type()) : base_type(comp, alloc) {}
+
+    explicit concurrent_map(const allocator_type& alloc) : base_type(key_compare(), alloc) {}
+
+    template< class InputIt >
+    concurrent_map(InputIt first, InputIt last, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(first, last, comp, alloc) {}
+
+    template< class InputIt >
+    concurrent_map(InputIt first, InputIt last, const allocator_type& alloc) : base_type(first, last, key_compare(), alloc) {}
+
+    /** Copy constructor */
+    concurrent_map(const concurrent_map&) = default;
+
+    concurrent_map(const concurrent_map& other, const allocator_type& alloc) : base_type(other, alloc) {}
+
+    concurrent_map(concurrent_map&&) = default;
+
+    concurrent_map(concurrent_map&& other, const allocator_type& alloc) : base_type(std::move(other), alloc) {}
+
+    concurrent_map(std::initializer_list<value_type> init, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(comp, alloc) {
+        insert(init);
+    }
+
+    concurrent_map(std::initializer_list<value_type> init, const allocator_type& alloc)
+        : base_type(key_compare(), alloc) {
+        insert(init);
+    }
+
+    concurrent_map& operator=(const concurrent_map& other) {
+        return static_cast<concurrent_map&>(base_type::operator=(other));
+    }
+
+    concurrent_map& operator=(concurrent_map&& other) {
+        return static_cast<concurrent_map&>(base_type::operator=(std::move(other)));
+    }
+
+    mapped_type& at(const key_type& key) {
+        iterator it = find(key);
+
+        if (it == end()) {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return it->second;
+    }
+
+    const mapped_type& at(const key_type& key) const {
+        const_iterator it = find(key);
+
+        if (it == end()) {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return it->second;
+    }
+
+    mapped_type& operator[](const key_type& key) {
+        iterator it = find(key);
+
+        if (it == end()) {
+            it = emplace(std::piecewise_construct, std::forward_as_tuple(key), std::tuple<>()).first;
+        }
+
+        return it->second;
+    }
+
+    mapped_type& operator[](key_type&& key) {
+        iterator it = find(key);
+
+        if (it == end()) {
+            it = emplace(std::piecewise_construct, std::forward_as_tuple(std::move(key)), std::tuple<>()).first;
+        }
+
+        return it->second;
+    }
+
+    template<typename P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type>
+    std::pair<iterator, bool> insert(P&& value) {
+        return emplace(std::forward<P>(value));
+    }
+
+    template<typename P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type>
+    iterator insert(const_iterator hint, P&& value) {
+            return emplace_hint(hint, std::forward<P>(value));
+        return end();
+    }
+
+    template<typename C2>
+    void merge(concurrent_map<key_type, mapped_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_map<key_type, mapped_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+
+    template<typename C2>
+    void merge(concurrent_multimap<key_type, mapped_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_multimap<key_type, mapped_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+}; // class concurrent_map
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+namespace internal {
+
+using namespace tbb::internal;
+
+template<template<typename...> typename Map, typename Key, typename T, typename... Args>
+using c_map_t = Map<Key, T,
+                    std::conditional_t< (sizeof...(Args) > 0) && !is_allocator_v<pack_element_t<0, Args...> >,
+                                        pack_element_t<0, Args...>, std::less<Key> >,
+                    std::conditional_t< (sizeof...(Args) > 0) && is_allocator_v<pack_element_t<sizeof...(Args)-1, Args...> >,
+                                        pack_element_t<sizeof...(Args)-1, Args...>, tbb_allocator<std::pair<const Key, T> > > >;
+} // namespace internal
+
+template<typename It, typename... Args>
+concurrent_map(It, It, Args...)
+-> internal::c_map_t<concurrent_map, internal::iterator_key_t<It>, internal::iterator_mapped_t<It>, Args...>;
+
+template<typename Key, typename T, typename... Args>
+concurrent_map(std::initializer_list<std::pair<const Key, T>>, Args...)
+-> internal::c_map_t<concurrent_map, Key, T, Args...>;
+
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+template <typename Key, typename Value, typename Comp = std::less<Key>, typename Allocator = tbb_allocator<std::pair<const Key, Value>>>
+class concurrent_multimap
+    : public internal::concurrent_skip_list<map_traits<Key, Value, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, true>> {
+    using traits_type = map_traits<Key, Value, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, true>;
+    using base_type = internal::concurrent_skip_list<traits_type>;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using base_type::allow_multimapping;
+public:
+    using key_type = Key;
+    using mapped_type = Value;
+    using value_type = typename traits_type::value_type;
+    using size_type = typename base_type::size_type;
+    using difference_type = typename base_type::difference_type;
+    using key_compare = Comp;
+    using value_compare = typename base_type::value_compare;
+    using allocator_type = Allocator;
+
+    using reference = typename base_type::reference;
+    using const_reference = typename base_type::const_reference;
+    using pointer = typename base_type::pointer;
+    using const_pointer = typename base_type::pointer;
+
+    using iterator = typename base_type::iterator;
+    using const_iterator = typename base_type::const_iterator;
+    using reverse_iterator = typename base_type::reverse_iterator;
+    using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+    using node_type = typename base_type::node_type;
+
+    using base_type::end;
+    using base_type::find;
+    using base_type::emplace;
+    using base_type::insert;
+
+    concurrent_multimap() = default;
+
+    explicit concurrent_multimap(const key_compare& comp, const allocator_type& alloc = allocator_type()) : base_type(comp, alloc) {}
+
+    explicit concurrent_multimap(const allocator_type& alloc) : base_type(key_compare(), alloc) {}
+
+    template< class InputIt >
+    concurrent_multimap(InputIt first, InputIt last, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(first, last, comp, alloc) {}
+
+    template< class InputIt >
+    concurrent_multimap(InputIt first, InputIt last, const allocator_type& alloc) : base_type(first, last, key_compare(), alloc) {}
+
+    /** Copy constructor */
+    concurrent_multimap(const concurrent_multimap&) = default;
+
+    concurrent_multimap(const concurrent_multimap& other, const allocator_type& alloc) : base_type(other, alloc) {}
+
+    concurrent_multimap(concurrent_multimap&&) = default;
+
+    concurrent_multimap(concurrent_multimap&& other, const allocator_type& alloc) : base_type(std::move(other), alloc) {}
+
+    concurrent_multimap(std::initializer_list<value_type> init, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(comp, alloc) {
+        insert(init);
+    }
+
+    concurrent_multimap(std::initializer_list<value_type> init, const allocator_type& alloc)
+        : base_type(key_compare(), alloc) {
+        insert(init);
+    }
+
+    concurrent_multimap& operator=(const concurrent_multimap& other) {
+        return static_cast<concurrent_multimap&>(base_type::operator=(other));
+    }
+
+    concurrent_multimap& operator=(concurrent_multimap&& other) {
+        return static_cast<concurrent_multimap&>(base_type::operator=(std::move(other)));
+    }
+
+    template<typename P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type>
+    std::pair<iterator, bool> insert(P&& value) {
+        return emplace(std::forward<P>(value));
+    }
+
+    template<typename P, typename std::enable_if<std::is_constructible<value_type, P&&>::value>::type>
+    iterator insert(const_iterator hint, P&& value) {
+            return emplace_hint(hint, std::forward<P>(value));
+        return end();
+    }
+
+    template<typename C2>
+    void merge(concurrent_multimap<key_type, mapped_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_multimap<key_type, mapped_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+
+    template<typename C2>
+    void merge(concurrent_map<key_type, mapped_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_map<key_type, mapped_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+
+}; // class concurrent_multimap
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+template<typename It, typename... Args>
+concurrent_multimap(It, It, Args...)
+-> internal::c_map_t<concurrent_multimap, internal::iterator_key_t<It>, internal::iterator_mapped_t<It>, Args...>;
+
+template<typename Key, typename T, typename... Args>
+concurrent_multimap(std::initializer_list<std::pair<const Key, T>>, Args...)
+-> internal::c_map_t<concurrent_multimap, Key, T, Args...>;
+
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+} // namespace interface10
+
+using interface10::concurrent_map;
+using interface10::concurrent_multimap;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_map_H_include_area
+
+#endif // __TBB_CONCURRENT_ORDERED_CONTAINERS_PRESENT
+#endif // __TBB_concurrent_map_H
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_priority_queue.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_priority_queue.h
@@ -0,0 +1,552 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_priority_queue_H
+#define __TBB_concurrent_priority_queue_H
+
+#define __TBB_concurrent_priority_queue_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "atomic.h"
+#include "cache_aligned_allocator.h"
+#include "tbb_exception.h"
+#include "tbb_stddef.h"
+#include "tbb_profiling.h"
+#include "internal/_aggregator_impl.h"
+#include "internal/_template_helpers.h"
+#include "internal/_allocator_traits.h"
+#include <vector>
+#include <iterator>
+#include <functional>
+#include __TBB_STD_SWAP_HEADER
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    #include <initializer_list>
+#endif
+
+#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
+    #include <type_traits>
+#endif
+
+namespace tbb {
+namespace interface5 {
+namespace internal {
+#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
+    template<typename T, bool C = std::is_copy_constructible<T>::value>
+    struct use_element_copy_constructor {
+        typedef tbb::internal::true_type type;
+    };
+    template<typename T>
+    struct use_element_copy_constructor <T,false> {
+        typedef tbb::internal::false_type type;
+    };
+#else
+    template<typename>
+    struct use_element_copy_constructor {
+        typedef tbb::internal::true_type type;
+    };
+#endif
+} // namespace internal
+
+using namespace tbb::internal;
+
+//! Concurrent priority queue
+template <typename T, typename Compare=std::less<T>, typename A=cache_aligned_allocator<T> >
+class concurrent_priority_queue {
+ public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    typedef size_t size_type;
+
+    //! Difference type for iterator
+    typedef ptrdiff_t difference_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Constructs a new concurrent_priority_queue with default capacity
+    explicit concurrent_priority_queue(const allocator_type& a = allocator_type()) : mark(0), my_size(0), compare(), data(a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! Constructs a new concurrent_priority_queue with default capacity
+    explicit concurrent_priority_queue(const Compare& c, const allocator_type& a = allocator_type()) : mark(0), my_size(0), compare(c), data(a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! Constructs a new concurrent_priority_queue with init_sz capacity
+    explicit concurrent_priority_queue(size_type init_capacity, const allocator_type& a = allocator_type()) :
+        mark(0), my_size(0), compare(), data(a)
+    {
+        data.reserve(init_capacity);
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! Constructs a new concurrent_priority_queue with init_sz capacity
+    explicit concurrent_priority_queue(size_type init_capacity, const Compare& c, const allocator_type& a = allocator_type()) :
+        mark(0), my_size(0), compare(c), data(a)
+    {
+        data.reserve(init_capacity);
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_priority_queue(InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
+        mark(0), compare(), data(begin, end, a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+        my_size = data.size();
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_priority_queue(InputIterator begin, InputIterator end, const Compare& c, const allocator_type& a = allocator_type()) :
+        mark(0), compare(c), data(begin, end, a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+        my_size = data.size();
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from std::initializer_list
+    concurrent_priority_queue(std::initializer_list<T> init_list, const allocator_type &a = allocator_type()) :
+        mark(0), compare(), data(init_list.begin(), init_list.end(), a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+        my_size = data.size();
+    }
+
+    //! Constructor from std::initializer_list
+    concurrent_priority_queue(std::initializer_list<T> init_list, const Compare& c, const allocator_type &a = allocator_type()) :
+        mark(0), compare(c), data(init_list.begin(), init_list.end(), a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+        my_size = data.size();
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+    //! Copy constructor
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue(const concurrent_priority_queue& src) : mark(src.mark),
+        my_size(src.my_size), data(src.data.begin(), src.data.end(), src.data.get_allocator())
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+    }
+
+    //! Copy constructor with specific allocator
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue(const concurrent_priority_queue& src, const allocator_type& a) : mark(src.mark),
+        my_size(src.my_size), data(src.data.begin(), src.data.end(), a)
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+        heapify();
+    }
+
+    //! Assignment operator
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue& operator=(const concurrent_priority_queue& src) {
+        if (this != &src) {
+            vector_t(src.data.begin(), src.data.end(), src.data.get_allocator()).swap(data);
+            mark = src.mark;
+            my_size = src.my_size;
+        }
+        return *this;
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move constructor
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue(concurrent_priority_queue&& src) : mark(src.mark),
+        my_size(src.my_size), data(std::move(src.data))
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+    }
+
+    //! Move constructor with specific allocator
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue(concurrent_priority_queue&& src, const allocator_type& a) : mark(src.mark),
+        my_size(src.my_size),
+#if __TBB_ALLOCATOR_TRAITS_PRESENT
+        data(std::move(src.data), a)
+#else
+    // Some early version of C++11 STL vector does not have a constructor of vector(vector&& , allocator).
+    // It seems that the reason is absence of support of allocator_traits (stateful allocators).
+        data(a)
+#endif //__TBB_ALLOCATOR_TRAITS_PRESENT
+    {
+        my_aggregator.initialize_handler(my_functor_t(this));
+#if !__TBB_ALLOCATOR_TRAITS_PRESENT
+        if (a != src.data.get_allocator()){
+            data.reserve(src.data.size());
+            data.assign(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()));
+        }else{
+            data = std::move(src.data);
+        }
+#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
+    }
+
+    //! Move assignment operator
+    /** This operation is unsafe if there are pending concurrent operations on the src queue. */
+    concurrent_priority_queue& operator=( concurrent_priority_queue&& src) {
+        if (this != &src) {
+            mark = src.mark;
+            my_size = src.my_size;
+#if !__TBB_ALLOCATOR_TRAITS_PRESENT
+            if (data.get_allocator() != src.data.get_allocator()){
+                vector_t(std::make_move_iterator(src.data.begin()), std::make_move_iterator(src.data.end()), data.get_allocator()).swap(data);
+            }else
+#endif //!__TBB_ALLOCATOR_TRAITS_PRESENT
+            {
+                data = std::move(src.data);
+            }
+        }
+        return *this;
+    }
+#endif //__TBB_CPP11_RVALUE_REF_PRESENT
+
+    //! Assign the queue from [begin,end) range, not thread-safe
+    template<typename InputIterator>
+    void assign(InputIterator begin, InputIterator end) {
+        vector_t(begin, end, data.get_allocator()).swap(data);
+        mark = 0;
+        my_size = data.size();
+        heapify();
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Assign the queue from std::initializer_list, not thread-safe
+    void assign(std::initializer_list<T> il) { this->assign(il.begin(), il.end()); }
+
+    //! Assign from std::initializer_list, not thread-safe
+    concurrent_priority_queue& operator=(std::initializer_list<T> il) {
+        this->assign(il.begin(), il.end());
+        return *this;
+    }
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+    //! Returns true if empty, false otherwise
+    /** Returned value may not reflect results of pending operations.
+        This operation reads shared data and will trigger a race condition. */
+    bool empty() const { return size()==0; }
+
+    //! Returns the current number of elements contained in the queue
+    /** Returned value may not reflect results of pending operations.
+        This operation reads shared data and will trigger a race condition. */
+    size_type size() const { return __TBB_load_with_acquire(my_size); }
+
+    //! Pushes elem onto the queue, increasing capacity of queue if necessary
+    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
+    void push(const_reference elem) {
+#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
+        __TBB_STATIC_ASSERT( std::is_copy_constructible<value_type>::value, "The type is not copy constructible. Copying push operation is impossible." );
+#endif
+        cpq_operation op_data(elem, PUSH_OP);
+        my_aggregator.execute(&op_data);
+        if (op_data.status == FAILED) // exception thrown
+            throw_exception(eid_bad_alloc);
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Pushes elem onto the queue, increasing capacity of queue if necessary
+    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
+    void push(value_type &&elem) {
+        cpq_operation op_data(elem, PUSH_RVALUE_OP);
+        my_aggregator.execute(&op_data);
+        if (op_data.status == FAILED) // exception thrown
+            throw_exception(eid_bad_alloc);
+    }
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    //! Constructs a new element using args as the arguments for its construction and pushes it onto the queue */
+    /** This operation can be safely used concurrently with other push, try_pop or emplace operations. */
+    template<typename... Args>
+    void emplace(Args&&... args) {
+        push(value_type(std::forward<Args>(args)...));
+    }
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! Gets a reference to and removes highest priority element
+    /** If a highest priority element was found, sets elem and returns true,
+        otherwise returns false.
+        This operation can be safely used concurrently with other push, try_pop or emplace operations. */
+    bool try_pop(reference elem) {
+        cpq_operation op_data(POP_OP);
+        op_data.elem = &elem;
+        my_aggregator.execute(&op_data);
+        return op_data.status==SUCCEEDED;
+    }
+
+    //! Clear the queue; not thread-safe
+    /** This operation is unsafe if there are pending concurrent operations on the queue.
+        Resets size, effectively emptying queue; does not free space.
+        May not clear elements added in pending operations. */
+    void clear() {
+        data.clear();
+        mark = 0;
+        my_size = 0;
+    }
+
+    //! Swap this queue with another; not thread-safe
+    /** This operation is unsafe if there are pending concurrent operations on the queue. */
+    void swap(concurrent_priority_queue& q) {
+        using std::swap;
+        data.swap(q.data);
+        swap(mark, q.mark);
+        swap(my_size, q.my_size);
+    }
+
+    //! Return allocator object
+    allocator_type get_allocator() const { return data.get_allocator(); }
+
+ private:
+    enum operation_type {INVALID_OP, PUSH_OP, POP_OP, PUSH_RVALUE_OP};
+    enum operation_status { WAIT=0, SUCCEEDED, FAILED };
+
+    class cpq_operation : public aggregated_operation<cpq_operation> {
+     public:
+        operation_type type;
+        union {
+            value_type *elem;
+            size_type sz;
+        };
+        cpq_operation(const_reference e, operation_type t) :
+            type(t), elem(const_cast<value_type*>(&e)) {}
+        cpq_operation(operation_type t) : type(t) {}
+    };
+
+    class my_functor_t {
+        concurrent_priority_queue<T, Compare, A> *cpq;
+     public:
+        my_functor_t() {}
+        my_functor_t(concurrent_priority_queue<T, Compare, A> *cpq_) : cpq(cpq_) {}
+        void operator()(cpq_operation* op_list) {
+            cpq->handle_operations(op_list);
+        }
+    };
+
+    typedef tbb::internal::aggregator< my_functor_t, cpq_operation > aggregator_t;
+    aggregator_t my_aggregator;
+    //! Padding added to avoid false sharing
+    char padding1[NFS_MaxLineSize - sizeof(aggregator_t)];
+    //! The point at which unsorted elements begin
+    size_type mark;
+    __TBB_atomic size_type my_size;
+    Compare compare;
+    //! Padding added to avoid false sharing
+    char padding2[NFS_MaxLineSize - (2*sizeof(size_type)) - sizeof(Compare)];
+    //! Storage for the heap of elements in queue, plus unheapified elements
+    /** data has the following structure:
+
+         binary unheapified
+          heap   elements
+        ____|_______|____
+        |       |       |
+        v       v       v
+        [_|...|_|_|...|_| |...| ]
+         0       ^       ^       ^
+                 |       |       |__capacity
+                 |       |__my_size
+                 |__mark
+
+        Thus, data stores the binary heap starting at position 0 through
+        mark-1 (it may be empty).  Then there are 0 or more elements
+        that have not yet been inserted into the heap, in positions
+        mark through my_size-1. */
+    typedef std::vector<value_type, allocator_type> vector_t;
+    vector_t data;
+
+    void handle_operations(cpq_operation *op_list) {
+        cpq_operation *tmp, *pop_list=NULL;
+
+        __TBB_ASSERT(mark == data.size(), NULL);
+
+        // First pass processes all constant (amortized; reallocation may happen) time pushes and pops.
+        while (op_list) {
+            // ITT note: &(op_list->status) tag is used to cover accesses to op_list
+            // node. This thread is going to handle the operation, and so will acquire it
+            // and perform the associated operation w/o triggering a race condition; the
+            // thread that created the operation is waiting on the status field, so when
+            // this thread is done with the operation, it will perform a
+            // store_with_release to give control back to the waiting thread in
+            // aggregator::insert_operation.
+            call_itt_notify(acquired, &(op_list->status));
+            __TBB_ASSERT(op_list->type != INVALID_OP, NULL);
+            tmp = op_list;
+            op_list = itt_hide_load_word(op_list->next);
+            if (tmp->type == POP_OP) {
+                if (mark < data.size() &&
+                    compare(data[0], data[data.size()-1])) {
+                    // there are newly pushed elems and the last one
+                    // is higher than top
+                    *(tmp->elem) = tbb::internal::move(data[data.size()-1]);
+                    __TBB_store_with_release(my_size, my_size-1);
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    data.pop_back();
+                    __TBB_ASSERT(mark<=data.size(), NULL);
+                }
+                else { // no convenient item to pop; postpone
+                    itt_hide_store_word(tmp->next, pop_list);
+                    pop_list = tmp;
+                }
+            } else { // PUSH_OP or PUSH_RVALUE_OP
+                __TBB_ASSERT(tmp->type == PUSH_OP || tmp->type == PUSH_RVALUE_OP, "Unknown operation" );
+                __TBB_TRY{
+                    if (tmp->type == PUSH_OP) {
+                        push_back_helper(*(tmp->elem), typename internal::use_element_copy_constructor<value_type>::type());
+                    } else {
+                        data.push_back(tbb::internal::move(*(tmp->elem)));
+                    }
+                    __TBB_store_with_release(my_size, my_size + 1);
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                } __TBB_CATCH(...) {
+                    itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
+                }
+            }
+        }
+
+        // second pass processes pop operations
+        while (pop_list) {
+            tmp = pop_list;
+            pop_list = itt_hide_load_word(pop_list->next);
+            __TBB_ASSERT(tmp->type == POP_OP, NULL);
+            if (data.empty()) {
+                itt_store_word_with_release(tmp->status, uintptr_t(FAILED));
+            }
+            else {
+                __TBB_ASSERT(mark<=data.size(), NULL);
+                if (mark < data.size() &&
+                    compare(data[0], data[data.size()-1])) {
+                    // there are newly pushed elems and the last one is
+                    // higher than top
+                    *(tmp->elem) = tbb::internal::move(data[data.size()-1]);
+                    __TBB_store_with_release(my_size, my_size-1);
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    data.pop_back();
+                }
+                else { // extract top and push last element down heap
+                    *(tmp->elem) = tbb::internal::move(data[0]);
+                    __TBB_store_with_release(my_size, my_size-1);
+                    itt_store_word_with_release(tmp->status, uintptr_t(SUCCEEDED));
+                    reheap();
+                }
+            }
+        }
+
+        // heapify any leftover pushed elements before doing the next
+        // batch of operations
+        if (mark<data.size()) heapify();
+        __TBB_ASSERT(mark == data.size(), NULL);
+    }
+
+    //! Merge unsorted elements into heap
+    void heapify() {
+        if (!mark && data.size()>0) mark = 1;
+        for (; mark<data.size(); ++mark) {
+            // for each unheapified element under size
+            size_type cur_pos = mark;
+            value_type to_place = tbb::internal::move(data[mark]);
+            do { // push to_place up the heap
+                size_type parent = (cur_pos-1)>>1;
+                if (!compare(data[parent], to_place)) break;
+                data[cur_pos] = tbb::internal::move(data[parent]);
+                cur_pos = parent;
+            } while( cur_pos );
+            data[cur_pos] = tbb::internal::move(to_place);
+        }
+    }
+
+    //! Re-heapify after an extraction
+    /** Re-heapify by pushing last element down the heap from the root. */
+    void reheap() {
+        size_type cur_pos=0, child=1;
+
+        while (child < mark) {
+            size_type target = child;
+            if (child+1 < mark && compare(data[child], data[child+1]))
+                ++target;
+            // target now has the higher priority child
+            if (compare(data[target], data[data.size()-1])) break;
+            data[cur_pos] = tbb::internal::move(data[target]);
+            cur_pos = target;
+            child = (cur_pos<<1)+1;
+        }
+        if (cur_pos != data.size()-1)
+            data[cur_pos] = tbb::internal::move(data[data.size()-1]);
+        data.pop_back();
+        if (mark > data.size()) mark = data.size();
+    }
+
+    void push_back_helper(const T& t, tbb::internal::true_type) {
+        data.push_back(t);
+    }
+
+    void push_back_helper(const T&, tbb::internal::false_type) {
+        __TBB_ASSERT( false, "The type is not copy constructible. Copying push operation is impossible." );
+    }
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+namespace internal {
+
+template<typename T, typename... Args>
+using priority_queue_t = concurrent_priority_queue<
+    T,
+    std::conditional_t< (sizeof...(Args)>0) && !is_allocator_v< pack_element_t<0, Args...> >,
+                        pack_element_t<0, Args...>, std::less<T> >,
+    std::conditional_t< (sizeof...(Args)>0) && is_allocator_v< pack_element_t<sizeof...(Args)-1, Args...> >,
+                         pack_element_t<sizeof...(Args)-1, Args...>, cache_aligned_allocator<T> >
+>;
+}
+
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename... Args
+> concurrent_priority_queue(InputIterator, InputIterator, Args...)
+-> internal::priority_queue_t<T, Args...>;
+
+template<typename T, typename CompareOrAllocalor>
+concurrent_priority_queue(std::initializer_list<T> init_list, CompareOrAllocalor)
+-> internal::priority_queue_t<T, CompareOrAllocalor>;
+
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+} // namespace interface5
+
+using interface5::concurrent_priority_queue;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_priority_queue_H_include_area
+
+#endif /* __TBB_concurrent_priority_queue_H */
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_queue.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_queue.h
@@ -0,0 +1,479 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_queue_H
+#define __TBB_concurrent_queue_H
+
+#define __TBB_concurrent_queue_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "internal/_concurrent_queue_impl.h"
+#include "internal/_allocator_traits.h"
+
+namespace tbb {
+
+namespace strict_ppl {
+
+//! A high-performance thread-safe non-blocking concurrent queue.
+/** Multiple threads may each push and pop concurrently.
+    Assignment construction is not allowed.
+    @ingroup containers */
+template<typename T, typename A = cache_aligned_allocator<T> >
+class concurrent_queue: public internal::concurrent_queue_base_v3<T> {
+    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
+
+    //! Allocator type
+    typedef typename tbb::internal::allocator_rebind<A, char>::type page_allocator_type;
+    page_allocator_type my_allocator;
+
+    //! Allocates a block of size n (bytes)
+    virtual void *allocate_block( size_t n ) __TBB_override {
+        void *b = reinterpret_cast<void*>(my_allocator.allocate( n ));
+        if( !b )
+            internal::throw_exception(internal::eid_bad_alloc);
+        return b;
+    }
+
+    //! Deallocates block created by allocate_block.
+    virtual void deallocate_block( void *b, size_t n ) __TBB_override {
+        my_allocator.deallocate( reinterpret_cast<char*>(b), n );
+    }
+
+    static void copy_construct_item(T* location, const void* src){
+        new (location) T(*static_cast<const T*>(src));
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    static void move_construct_item(T* location, const void* src) {
+        new (location) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
+    }
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    typedef size_t size_type;
+
+    //! Difference type for iterator
+    typedef ptrdiff_t difference_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Construct empty queue
+    explicit concurrent_queue(const allocator_type& a = allocator_type()) :
+        my_allocator( a )
+    {
+    }
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_queue( InputIterator begin, InputIterator end, const allocator_type& a = allocator_type()) :
+        my_allocator( a )
+    {
+        for( ; begin != end; ++begin )
+            this->push(*begin);
+    }
+
+    //! Copy constructor
+    concurrent_queue( const concurrent_queue& src, const allocator_type& a = allocator_type()) :
+        internal::concurrent_queue_base_v3<T>(), my_allocator( a )
+    {
+        this->assign( src, copy_construct_item );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move constructors
+    concurrent_queue( concurrent_queue&& src ) :
+        internal::concurrent_queue_base_v3<T>(), my_allocator( std::move(src.my_allocator) )
+    {
+        this->internal_swap( src );
+    }
+
+    concurrent_queue( concurrent_queue&& src, const allocator_type& a ) :
+        internal::concurrent_queue_base_v3<T>(), my_allocator( a )
+    {
+        // checking that memory allocated by one instance of allocator can be deallocated
+        // with another
+        if( my_allocator == src.my_allocator) {
+            this->internal_swap( src );
+        } else {
+            // allocators are different => performing per-element move
+            this->assign( src, move_construct_item );
+            src.clear();
+        }
+    }
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! Destroy queue
+    ~concurrent_queue();
+
+    //! Enqueue an item at tail of queue.
+    void push( const T& source ) {
+        this->internal_push( &source, copy_construct_item );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    void push( T&& source ) {
+        this->internal_push( &source, move_construct_item );
+    }
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    template<typename... Arguments>
+    void emplace( Arguments&&... args ) {
+        push( T(std::forward<Arguments>( args )...) );
+    }
+#endif //__TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! Attempt to dequeue an item from head of queue.
+    /** Does not wait for item to become available.
+        Returns true if successful; false otherwise. */
+    bool try_pop( T& result ) {
+        return this->internal_try_pop( &result );
+    }
+
+    //! Return the number of items in the queue; thread unsafe
+    size_type unsafe_size() const {return this->internal_size();}
+
+    //! Equivalent to size()==0.
+    bool empty() const {return this->internal_empty();}
+
+    //! Clear the queue. not thread-safe.
+    void clear() ;
+
+    //! Return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    typedef internal::concurrent_queue_iterator<concurrent_queue,T> iterator;
+    typedef internal::concurrent_queue_iterator<concurrent_queue,const T> const_iterator;
+
+    //------------------------------------------------------------------------
+    // The iterators are intended only for debugging.  They are slow and not thread safe.
+    //------------------------------------------------------------------------
+    iterator unsafe_begin() {return iterator(*this);}
+    iterator unsafe_end() {return iterator();}
+    const_iterator unsafe_begin() const {return const_iterator(*this);}
+    const_iterator unsafe_end() const {return const_iterator();}
+} ;
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// Deduction guide for the constructor from two iterators
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
+template<typename T, class A>
+concurrent_queue<T,A>::~concurrent_queue() {
+    clear();
+    this->internal_finish_clear();
+}
+
+template<typename T, class A>
+void concurrent_queue<T,A>::clear() {
+    T value;
+    while( !empty() ) try_pop(value);
+}
+
+} // namespace strict_ppl
+
+//! A high-performance thread-safe blocking concurrent bounded queue.
+/** This is the pre-PPL TBB concurrent queue which supports boundedness and blocking semantics.
+    Note that method names agree with the PPL-style concurrent queue.
+    Multiple threads may each push and pop concurrently.
+    Assignment construction is not allowed.
+    @ingroup containers */
+template<typename T, class A = cache_aligned_allocator<T> >
+class concurrent_bounded_queue: public internal::concurrent_queue_base_v8 {
+    template<typename Container, typename Value> friend class internal::concurrent_queue_iterator;
+    typedef typename tbb::internal::allocator_rebind<A, char>::type page_allocator_type;
+
+    //! Allocator type
+    page_allocator_type my_allocator;
+
+    typedef typename concurrent_queue_base_v3::padded_page<T> padded_page;
+    typedef typename concurrent_queue_base_v3::copy_specifics copy_specifics;
+
+    //! Class used to ensure exception-safety of method "pop"
+    class destroyer: internal::no_copy {
+        T& my_value;
+    public:
+        destroyer( T& value ) : my_value(value) {}
+        ~destroyer() {my_value.~T();}
+    };
+
+    T& get_ref( page& p, size_t index ) {
+        __TBB_ASSERT( index<items_per_page, NULL );
+        return (&static_cast<padded_page*>(static_cast<void*>(&p))->last)[index];
+    }
+
+    virtual void copy_item( page& dst, size_t index, const void* src ) __TBB_override {
+        new( &get_ref(dst,index) ) T(*static_cast<const T*>(src));
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    virtual void move_item( page& dst, size_t index, const void* src ) __TBB_override {
+        new( &get_ref(dst,index) ) T( std::move(*static_cast<T*>(const_cast<void*>(src))) );
+    }
+#else
+    virtual void move_item( page&, size_t, const void* ) __TBB_override {
+        __TBB_ASSERT( false, "Unreachable code" );
+    }
+#endif
+
+    virtual void copy_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) __TBB_override {
+        new( &get_ref(dst,dindex) ) T( get_ref( const_cast<page&>(src), sindex ) );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    virtual void move_page_item( page& dst, size_t dindex, const page& src, size_t sindex ) __TBB_override {
+        new( &get_ref(dst,dindex) ) T( std::move(get_ref( const_cast<page&>(src), sindex )) );
+    }
+#else
+    virtual void move_page_item( page&, size_t, const page&, size_t ) __TBB_override {
+        __TBB_ASSERT( false, "Unreachable code" );
+    }
+#endif
+
+    virtual void assign_and_destroy_item( void* dst, page& src, size_t index ) __TBB_override {
+        T& from = get_ref(src,index);
+        destroyer d(from);
+        *static_cast<T*>(dst) = tbb::internal::move( from );
+    }
+
+    virtual page *allocate_page() __TBB_override {
+        size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
+        page *p = reinterpret_cast<page*>(my_allocator.allocate( n ));
+        if( !p )
+            internal::throw_exception(internal::eid_bad_alloc);
+        return p;
+    }
+
+    virtual void deallocate_page( page *p ) __TBB_override {
+        size_t n = sizeof(padded_page) + (items_per_page-1)*sizeof(T);
+        my_allocator.deallocate( reinterpret_cast<char*>(p), n );
+    }
+
+public:
+    //! Element type in the queue.
+    typedef T value_type;
+
+    //! Allocator type
+    typedef A allocator_type;
+
+    //! Reference type
+    typedef T& reference;
+
+    //! Const reference type
+    typedef const T& const_reference;
+
+    //! Integral type for representing size of the queue.
+    /** Note that the size_type is a signed integral type.
+        This is because the size can be negative if there are pending pops without corresponding pushes. */
+    typedef std::ptrdiff_t size_type;
+
+    //! Difference type for iterator
+    typedef std::ptrdiff_t difference_type;
+
+    //! Construct empty queue
+    explicit concurrent_bounded_queue(const allocator_type& a = allocator_type()) :
+        concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
+    {
+    }
+
+    //! Copy constructor
+    concurrent_bounded_queue( const concurrent_bounded_queue& src, const allocator_type& a = allocator_type())
+        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
+    {
+        assign( src );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move constructors
+    concurrent_bounded_queue( concurrent_bounded_queue&& src )
+        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( std::move(src.my_allocator) )
+    {
+        internal_swap( src );
+    }
+
+    concurrent_bounded_queue( concurrent_bounded_queue&& src, const allocator_type& a )
+        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
+    {
+        // checking that memory allocated by one instance of allocator can be deallocated
+        // with another
+        if( my_allocator == src.my_allocator) {
+            this->internal_swap( src );
+        } else {
+            // allocators are different => performing per-element move
+            this->move_content( src );
+            src.clear();
+        }
+    }
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! [begin,end) constructor
+    template<typename InputIterator>
+    concurrent_bounded_queue( InputIterator begin, InputIterator end,
+                              const allocator_type& a = allocator_type())
+        : concurrent_queue_base_v8( sizeof(T) ), my_allocator( a )
+    {
+        for( ; begin != end; ++begin )
+            internal_push_if_not_full(&*begin);
+    }
+
+    //! Destroy queue
+    ~concurrent_bounded_queue();
+
+    //! Enqueue an item at tail of queue.
+    void push( const T& source ) {
+        internal_push( &source );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move an item at tail of queue.
+    void push( T&& source ) {
+        internal_push_move( &source );
+    }
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    template<typename... Arguments>
+    void emplace( Arguments&&... args ) {
+        push( T(std::forward<Arguments>( args )...) );
+    }
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! Dequeue item from head of queue.
+    /** Block until an item becomes available, and then dequeue it. */
+    void pop( T& destination ) {
+        internal_pop( &destination );
+    }
+
+#if TBB_USE_EXCEPTIONS
+    //! Abort all pending queue operations
+    void abort() {
+        internal_abort();
+    }
+#endif
+
+    //! Enqueue an item at tail of queue if queue is not already full.
+    /** Does not wait for queue to become not full.
+        Returns true if item is pushed; false if queue was already full. */
+    bool try_push( const T& source ) {
+        return internal_push_if_not_full( &source );
+    }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    //! Move an item at tail of queue if queue is not already full.
+    /** Does not wait for queue to become not full.
+        Returns true if item is pushed; false if queue was already full. */
+    bool try_push( T&& source ) {
+        return internal_push_move_if_not_full( &source );
+    }
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    template<typename... Arguments>
+    bool try_emplace( Arguments&&... args ) {
+        return try_push( T(std::forward<Arguments>( args )...) );
+    }
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+    //! Attempt to dequeue an item from head of queue.
+    /** Does not wait for item to become available.
+        Returns true if successful; false otherwise. */
+    bool try_pop( T& destination ) {
+        return internal_pop_if_present( &destination );
+    }
+
+    //! Return number of pushes minus number of pops.
+    /** Note that the result can be negative if there are pops waiting for the
+        corresponding pushes.  The result can also exceed capacity() if there
+        are push operations in flight. */
+    size_type size() const {return internal_size();}
+
+    //! Equivalent to size()<=0.
+    bool empty() const {return internal_empty();}
+
+    //! Maximum number of allowed elements
+    size_type capacity() const {
+        return my_capacity;
+    }
+
+    //! Set the capacity
+    /** Setting the capacity to 0 causes subsequent try_push operations to always fail,
+        and subsequent push operations to block forever. */
+    void set_capacity( size_type new_capacity ) {
+        internal_set_capacity( new_capacity, sizeof(T) );
+    }
+
+    //! return allocator object
+    allocator_type get_allocator() const { return this->my_allocator; }
+
+    //! clear the queue. not thread-safe.
+    void clear() ;
+
+    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,T> iterator;
+    typedef internal::concurrent_queue_iterator<concurrent_bounded_queue,const T> const_iterator;
+
+    //------------------------------------------------------------------------
+    // The iterators are intended only for debugging.  They are slow and not thread safe.
+    //------------------------------------------------------------------------
+    iterator unsafe_begin() {return iterator(*this);}
+    iterator unsafe_end() {return iterator();}
+    const_iterator unsafe_begin() const {return const_iterator(*this);}
+    const_iterator unsafe_end() const {return const_iterator();}
+
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+// guide for concurrent_bounded_queue(InputIterator, InputIterator, ...)
+template<typename InputIterator,
+         typename T = typename std::iterator_traits<InputIterator>::value_type,
+         typename A = cache_aligned_allocator<T>
+> concurrent_bounded_queue(InputIterator, InputIterator, const A& = A())
+-> concurrent_bounded_queue<T, A>;
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
+template<typename T, class A>
+concurrent_bounded_queue<T,A>::~concurrent_bounded_queue() {
+    clear();
+    internal_finish_clear();
+}
+
+template<typename T, class A>
+void concurrent_bounded_queue<T,A>::clear() {
+    T value;
+    while( try_pop(value) ) /*noop*/;
+}
+
+using strict_ppl::concurrent_queue;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_queue_H_include_area
+
+#endif /* __TBB_concurrent_queue_H */
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_set.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_set.h
@@ -0,0 +1,304 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_concurrent_set_H
+#define __TBB_concurrent_set_H
+
+#define __TBB_concurrent_set_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if !TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS
+#error Set TBB_PREVIEW_CONCURRENT_ORDERED_CONTAINERS to include concurrent_set.h
+#endif
+
+#include "tbb/tbb_config.h"
+
+// concurrent_set requires C++11 support
+#if __TBB_CONCURRENT_ORDERED_CONTAINERS_PRESENT
+
+#include "internal/_concurrent_skip_list_impl.h"
+
+namespace tbb {
+namespace interface10 {
+
+// TODO: test this class
+template<typename Key, typename KeyCompare, typename RandomGenerator, size_t MAX_LEVELS, typename Allocator, bool AllowMultimapping>
+class set_traits {
+public:
+    static constexpr size_t MAX_LEVEL = MAX_LEVELS;
+    using random_level_generator_type = RandomGenerator;
+    using key_type = Key;
+    using value_type = key_type;
+    using compare_type = KeyCompare;
+    using value_compare = compare_type;
+    using reference = value_type & ;
+    using const_reference = const value_type&;
+    using allocator_type = Allocator;
+    using mutex_type = tbb::spin_mutex;
+    using node_type = tbb::internal::node_handle<key_type, value_type, internal::skip_list_node<value_type, mutex_type>, allocator_type>;
+
+    static const bool allow_multimapping = AllowMultimapping;
+
+    static const key_type& get_key(const_reference val) {
+        return val;
+    }
+
+    static value_compare value_comp(compare_type comp) { return comp; }
+};
+
+template <typename Key, typename Comp, typename Allocator>
+class concurrent_multiset;
+
+template <typename Key, typename Comp = std::less<Key>, typename Allocator = tbb_allocator<Key>>
+class concurrent_set
+    : public internal::concurrent_skip_list<set_traits<Key, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, false>> {
+    using traits_type = set_traits<Key, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, false>;
+    using base_type = internal::concurrent_skip_list<traits_type>;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using base_type::allow_multimapping;
+public:
+    using key_type = Key;
+    using value_type = typename traits_type::value_type;
+    using size_type = typename base_type::size_type;
+    using difference_type = typename base_type::difference_type;
+    using key_compare = Comp;
+    using value_compare = typename base_type::value_compare;
+    using allocator_type = Allocator;
+
+    using reference = typename base_type::reference;
+    using const_reference = typename base_type::const_reference;
+    using pointer = typename base_type::pointer;
+    using const_pointer = typename base_type::pointer;
+
+    using iterator = typename base_type::iterator;
+    using const_iterator = typename base_type::const_iterator;
+    using reverse_iterator = typename base_type::reverse_iterator;
+    using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+    using node_type = typename base_type::node_type;
+
+    using base_type::insert;
+
+    concurrent_set() = default;
+
+    explicit concurrent_set(const key_compare& comp, const allocator_type& alloc = allocator_type()) : base_type(comp, alloc) {}
+
+    explicit concurrent_set(const allocator_type& alloc) : base_type(key_compare(), alloc) {}
+
+    template< class InputIt >
+    concurrent_set(InputIt first, InputIt last, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(first, last, comp, alloc) {}
+
+    template< class InputIt >
+    concurrent_set(InputIt first, InputIt last, const allocator_type& alloc) : base_type(first, last, key_compare(), alloc) {}
+
+    /** Copy constructor */
+    concurrent_set(const concurrent_set&) = default;
+
+    concurrent_set(const concurrent_set& other, const allocator_type& alloc) : base_type(other, alloc) {}
+
+    concurrent_set(concurrent_set&&) = default;
+
+    concurrent_set(concurrent_set&& other, const allocator_type& alloc) : base_type(std::move(other), alloc) {}
+
+    concurrent_set(std::initializer_list<value_type> init, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(comp, alloc) {
+        insert(init);
+    }
+
+    concurrent_set(std::initializer_list<value_type> init, const allocator_type& alloc)
+        : base_type(key_compare(), alloc) {
+        insert(init);
+    }
+
+    concurrent_set& operator=(const concurrent_set& other) {
+        return static_cast<concurrent_set&>(base_type::operator=(other));
+    }
+
+    concurrent_set& operator=(concurrent_set&& other) {
+        return static_cast<concurrent_set&>(base_type::operator=(std::move(other)));
+    }
+
+    template<typename C2>
+    void merge(concurrent_set<key_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_set<key_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+
+    template<typename C2>
+    void merge(concurrent_multiset<key_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_multiset<key_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+}; // class concurrent_set
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+namespace internal {
+
+using namespace tbb::internal;
+
+template<template<typename...> typename Set, typename Key, typename... Args>
+using c_set_t = Set<Key,
+                    std::conditional_t< (sizeof...(Args) > 0) && !is_allocator_v<pack_element_t<0, Args...> >,
+                                        pack_element_t<0, Args...>, std::less<Key> >,
+                    std::conditional_t< (sizeof...(Args) > 0) && is_allocator_v<pack_element_t<sizeof...(Args)-1, Args...> >,
+                                        pack_element_t<sizeof...(Args)-1, Args...>, tbb_allocator<Key> > >;
+} // namespace internal
+
+template<typename It, typename... Args>
+concurrent_set(It, It, Args...)
+-> internal::c_set_t<concurrent_set, internal::iterator_value_t<It>, Args...>;
+
+template<typename Key, typename... Args>
+concurrent_set(std::initializer_list<Key>, Args...)
+-> internal::c_set_t<concurrent_set, Key, Args...>;
+
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+template <typename Key, typename Comp = std::less<Key>, typename Allocator = tbb_allocator<Key>>
+class concurrent_multiset
+    : public internal::concurrent_skip_list<set_traits<Key, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, true>> {
+    using traits_type = set_traits<Key, Comp, internal::concurrent_geometric_level_generator<64>, 64, Allocator, true>;
+    using base_type = internal::concurrent_skip_list<traits_type>;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using base_type::allow_multimapping;
+public:
+    using key_type = Key;
+    using value_type = typename traits_type::value_type;
+    using size_type = typename base_type::size_type;
+    using difference_type = typename base_type::difference_type;
+    using key_compare = Comp;
+    using value_compare = typename base_type::value_compare;
+    using allocator_type = Allocator;
+
+    using reference = typename base_type::reference;
+    using const_reference = typename base_type::const_reference;
+    using pointer = typename base_type::pointer;
+    using const_pointer = typename base_type::pointer;
+
+    using iterator = typename base_type::iterator;
+    using const_iterator = typename base_type::const_iterator;
+    using reverse_iterator = typename base_type::reverse_iterator;
+    using const_reverse_iterator = typename base_type::const_reverse_iterator;
+
+    using node_type = typename base_type::node_type;
+
+    using base_type::insert;
+
+    concurrent_multiset() = default;
+
+    explicit concurrent_multiset(const key_compare& comp, const allocator_type& alloc = allocator_type()) : base_type(comp, alloc) {}
+
+    explicit concurrent_multiset(const allocator_type& alloc) : base_type(key_compare(), alloc) {}
+
+    template< class InputIt >
+    concurrent_multiset(InputIt first, InputIt last, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(comp, alloc) {
+        insert(first, last);
+    }
+
+    template< class InputIt >
+    concurrent_multiset(InputIt first, InputIt last, const allocator_type& alloc) : base_type(key_compare(), alloc) {
+        insert(first, last);
+    }
+
+    /** Copy constructor */
+    concurrent_multiset(const concurrent_multiset&) = default;
+
+    concurrent_multiset(const concurrent_multiset& other, const allocator_type& alloc) : base_type(other, alloc) {}
+
+    concurrent_multiset(concurrent_multiset&&) = default;
+
+    concurrent_multiset(concurrent_multiset&& other, const allocator_type& alloc) : base_type(std::move(other), alloc) {}
+
+    concurrent_multiset(std::initializer_list<value_type> init, const key_compare& comp = Comp(), const allocator_type& alloc = allocator_type())
+        : base_type(comp, alloc) {
+        insert(init);
+    }
+
+    concurrent_multiset(std::initializer_list<value_type> init, const allocator_type& alloc)
+        : base_type(key_compare(), alloc) {
+        insert(init);
+    }
+
+    concurrent_multiset& operator=(const concurrent_multiset& other) {
+        return static_cast<concurrent_multiset&>(base_type::operator=(other));
+    }
+
+    concurrent_multiset& operator=(concurrent_multiset&& other) {
+        return static_cast<concurrent_multiset&>(base_type::operator=(std::move(other)));
+    }
+
+    template<typename C2>
+    void merge(concurrent_set<key_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_set<key_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+
+    template<typename C2>
+    void merge(concurrent_multiset<key_type, C2, Allocator>& source) {
+        this->internal_merge(source);
+    }
+
+    template<typename C2>
+    void merge(concurrent_multiset<key_type, C2, Allocator>&& source) {
+        this->internal_merge(std::move(source));
+    }
+}; // class concurrent_multiset
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+
+template<typename It, typename... Args>
+concurrent_multiset(It, It, Args...)
+-> internal::c_set_t<concurrent_multiset, internal::iterator_value_t<It>, Args...>;
+
+template<typename Key, typename... Args>
+concurrent_multiset(std::initializer_list<Key>, Args...)
+-> internal::c_set_t<concurrent_multiset, Key, Args...>;
+
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+} // namespace interface10
+
+using interface10::concurrent_set;
+using interface10::concurrent_multiset;
+
+} // namespace tbb
+
+#endif // __TBB_CONCURRENT_ORDERED_CONTAINERS_PRESENT
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_set_H_include_area
+
+#endif // __TBB_concurrent_set_H
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_unordered_map.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_unordered_map.h
@@ -0,0 +1,492 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+/* Container implementations in this header are based on PPL implementations
+   provided by Microsoft. */
+
+#ifndef __TBB_concurrent_unordered_map_H
+#define __TBB_concurrent_unordered_map_H
+
+#define __TBB_concurrent_unordered_map_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "internal/_concurrent_unordered_impl.h"
+
+namespace tbb
+{
+
+namespace interface5 {
+
+// Template class for hash map traits
+template<typename Key, typename T, typename Hash_compare, typename Allocator, bool Allow_multimapping>
+class concurrent_unordered_map_traits
+{
+protected:
+    typedef std::pair<const Key, T> value_type;
+    typedef Key key_type;
+    typedef Hash_compare hash_compare;
+    typedef typename tbb::internal::allocator_rebind<Allocator, value_type>::type allocator_type;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef tbb::internal::node_handle<key_type, value_type,
+                                  typename internal::split_ordered_list<value_type, allocator_type>::node,
+                                  allocator_type> node_type;
+#endif // __TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    enum { allow_multimapping = Allow_multimapping };
+
+    concurrent_unordered_map_traits() : my_hash_compare() {}
+    concurrent_unordered_map_traits(const hash_compare& hc) : my_hash_compare(hc) {}
+
+    template<class Type1, class Type2>
+    static const Key& get_key(const std::pair<Type1, Type2>& value) {
+        return (value.first);
+    }
+
+    hash_compare my_hash_compare; // the comparator predicate for keys
+};
+
+template<typename Key, typename T, typename Hasher, typename Key_equality, typename Allocator>
+class concurrent_unordered_multimap;
+
+template <typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
+         typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
+class concurrent_unordered_map :
+    public internal::concurrent_unordered_base< concurrent_unordered_map_traits<Key, T,
+    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
+{
+    // Base type definitions
+    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
+    typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, false> traits_type;
+    typedef internal::concurrent_unordered_base< traits_type > base_type;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using traits_type::allow_multimapping;
+public:
+    using base_type::end;
+    using base_type::find;
+    using base_type::insert;
+
+    // Type definitions
+    typedef Key key_type;
+    typedef typename base_type::value_type value_type;
+    typedef T mapped_type;
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+    typedef hash_compare key_compare;
+
+    typedef typename base_type::allocator_type allocator_type;
+    typedef typename base_type::pointer pointer;
+    typedef typename base_type::const_pointer const_pointer;
+    typedef typename base_type::reference reference;
+    typedef typename base_type::const_reference const_reference;
+
+    typedef typename base_type::size_type size_type;
+    typedef typename base_type::difference_type difference_type;
+
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+    typedef typename base_type::iterator local_iterator;
+    typedef typename base_type::const_iterator const_local_iterator;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef typename base_type::node_type node_type;
+#endif // __TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    // Construction/destruction/copying
+    explicit concurrent_unordered_map(size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {}
+
+    concurrent_unordered_map(size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {}
+
+    concurrent_unordered_map(size_type n_of_buckets, const hasher& a_hasher, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {}
+
+    explicit concurrent_unordered_map(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
+    {}
+
+    template <typename Iterator>
+    concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_map(Iterator first, Iterator last, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_map(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(il.begin(),il.end());
+    }
+
+    concurrent_unordered_map(std::initializer_list<value_type> il, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    concurrent_unordered_map(std::initializer_list<value_type> il, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+    concurrent_unordered_map(const concurrent_unordered_map& table)
+        : base_type(table)
+    {}
+
+    concurrent_unordered_map& operator=(const concurrent_unordered_map& table)
+    {
+        return static_cast<concurrent_unordered_map&>(base_type::operator=(table));
+    }
+
+    concurrent_unordered_map(concurrent_unordered_map&& table)
+        : base_type(std::move(table))
+    {}
+
+    concurrent_unordered_map& operator=(concurrent_unordered_map&& table)
+    {
+        return static_cast<concurrent_unordered_map&>(base_type::operator=(std::move(table)));
+    }
+#endif //__TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    concurrent_unordered_map(concurrent_unordered_map&& table, const Allocator& a) : base_type(std::move(table), a)
+    {}
+#endif /*__TBB_CPP11_RVALUE_REF_PRESENT*/
+
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_map<Key, T, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_map<Key, T, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multimap<Key, T, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multimap<Key, T, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+#endif //__TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    concurrent_unordered_map(const concurrent_unordered_map& table, const Allocator& a)
+        : base_type(table, a)
+    {}
+
+    // Observers
+    mapped_type& operator[](const key_type& key)
+    {
+        iterator where = find(key);
+
+        if (where == end())
+        {
+            where = insert(std::pair<key_type, mapped_type>(key, mapped_type())).first;
+        }
+
+        return ((*where).second);
+    }
+
+    mapped_type& at(const key_type& key)
+    {
+        iterator where = find(key);
+
+        if (where == end())
+        {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return ((*where).second);
+    }
+
+    const mapped_type& at(const key_type& key) const
+    {
+        const_iterator where = find(key);
+
+        if (where == end())
+        {
+            tbb::internal::throw_exception(tbb::internal::eid_invalid_key);
+        }
+
+        return ((*where).second);
+    }
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+namespace internal {
+using namespace tbb::internal;
+
+template<template<typename...> typename Map, typename Key, typename Element, typename... Args>
+using cu_map_t = Map<
+    Key, Element,
+    std::conditional_t< (sizeof...(Args)>0) && !is_allocator_v< pack_element_t<0, Args...> >,
+                        pack_element_t<0, Args...>, tbb_hash<Key> >,
+    std::conditional_t< (sizeof...(Args)>1) && !is_allocator_v< pack_element_t<1, Args...> >,
+                        pack_element_t<1, Args...>, std::equal_to<Key> >,
+    std::conditional_t< (sizeof...(Args)>0) && is_allocator_v< pack_element_t<sizeof...(Args)-1, Args...> >,
+                        pack_element_t<sizeof...(Args)-1, Args...>, tbb_allocator<std::pair<const Key, Element> > >
+>;
+}
+
+// Deduction guide for the constructor from two iterators
+template<typename I>
+concurrent_unordered_map (I, I)
+-> internal::cu_map_t<concurrent_unordered_map, internal::iterator_key_t<I>, internal::iterator_mapped_t<I>>;
+
+// Deduction guide for the constructor from two iterators and hasher/equality/allocator
+template<typename I, typename... Args>
+concurrent_unordered_map(I, I, size_t, Args...)
+-> internal::cu_map_t<concurrent_unordered_map, internal::iterator_key_t<I>, internal::iterator_mapped_t<I>, Args...>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename Key, typename Element>
+concurrent_unordered_map(std::initializer_list<std::pair<const Key, Element>>)
+-> internal::cu_map_t<concurrent_unordered_map, Key, Element>;
+
+// Deduction guide for the constructor from an initializer_list and hasher/equality/allocator
+template<typename Key, typename Element, typename... Args>
+concurrent_unordered_map(std::initializer_list<std::pair<const Key, Element>>, size_t, Args...)
+-> internal::cu_map_t<concurrent_unordered_map, Key, Element, Args...>;
+
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
+template < typename Key, typename T, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
+        typename Allocator = tbb::tbb_allocator<std::pair<const Key, T> > >
+class concurrent_unordered_multimap :
+    public internal::concurrent_unordered_base< concurrent_unordered_map_traits< Key, T,
+    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
+{
+    // Base type definitions
+    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
+    typedef concurrent_unordered_map_traits<Key, T, hash_compare, Allocator, true> traits_type;
+    typedef internal::concurrent_unordered_base<traits_type> base_type;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using traits_type::allow_multimapping;
+public:
+    using base_type::insert;
+
+    // Type definitions
+    typedef Key key_type;
+    typedef typename base_type::value_type value_type;
+    typedef T mapped_type;
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+    typedef hash_compare key_compare;
+
+    typedef typename base_type::allocator_type allocator_type;
+    typedef typename base_type::pointer pointer;
+    typedef typename base_type::const_pointer const_pointer;
+    typedef typename base_type::reference reference;
+    typedef typename base_type::const_reference const_reference;
+
+    typedef typename base_type::size_type size_type;
+    typedef typename base_type::difference_type difference_type;
+
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+    typedef typename base_type::iterator local_iterator;
+    typedef typename base_type::const_iterator const_local_iterator;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef typename base_type::node_type node_type;
+#endif //__TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    // Construction/destruction/copying
+    explicit concurrent_unordered_multimap(size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {}
+
+    concurrent_unordered_multimap(size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {}
+
+    concurrent_unordered_multimap(size_type n_of_buckets, const hasher& a_hasher, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {}
+
+    explicit concurrent_unordered_multimap(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
+    {}
+
+    template <typename Iterator>
+    concurrent_unordered_multimap(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets,key_compare(a_hasher,a_keyeq), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_multimap(Iterator first, Iterator last, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_multimap(Iterator first, Iterator last, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_multimap(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(il.begin(),il.end());
+    }
+
+    concurrent_unordered_multimap(std::initializer_list<value_type> il, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    concurrent_unordered_multimap(std::initializer_list<value_type> il, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+    concurrent_unordered_multimap(const concurrent_unordered_multimap& table)
+        : base_type(table)
+    {}
+
+    concurrent_unordered_multimap& operator=(const concurrent_unordered_multimap& table)
+    {
+        return static_cast<concurrent_unordered_multimap&>(base_type::operator=(table));
+    }
+
+    concurrent_unordered_multimap(concurrent_unordered_multimap&& table)
+        : base_type(std::move(table))
+    {}
+
+    concurrent_unordered_multimap& operator=(concurrent_unordered_multimap&& table)
+    {
+        return static_cast<concurrent_unordered_multimap&>(base_type::operator=(std::move(table)));
+    }
+#endif //__TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    concurrent_unordered_multimap(concurrent_unordered_multimap&& table, const Allocator& a) : base_type(std::move(table), a)
+    {}
+#endif /*__TBB_CPP11_RVALUE_REF_PRESENT*/
+
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_map<Key, T, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_map<Key, T, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multimap<Key, T, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multimap<Key, T, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+#endif //__TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    concurrent_unordered_multimap(const concurrent_unordered_multimap& table, const Allocator& a)
+        : base_type(table, a)
+    {}
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+// Deduction guide for the constructor from two iterators
+template<typename I>
+concurrent_unordered_multimap (I, I)
+-> internal::cu_map_t<concurrent_unordered_multimap, internal::iterator_key_t<I>, internal::iterator_mapped_t<I>>;
+
+// Deduction guide for the constructor from two iterators and hasher/equality/allocator
+template<typename I, typename... Args>
+concurrent_unordered_multimap(I, I, size_t, Args...)
+-> internal::cu_map_t<concurrent_unordered_multimap, internal::iterator_key_t<I>, internal::iterator_mapped_t<I>, Args...>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename Key, typename Element>
+concurrent_unordered_multimap(std::initializer_list<std::pair<const Key, Element>>)
+-> internal::cu_map_t<concurrent_unordered_multimap, Key, Element>;
+
+// Deduction guide for the constructor from an initializer_list and hasher/equality/allocator
+template<typename Key, typename Element, typename... Args>
+concurrent_unordered_multimap(std::initializer_list<std::pair<const Key, Element>>, size_t, Args...)
+-> internal::cu_map_t<concurrent_unordered_multimap, Key, Element, Args...>;
+
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+} // namespace interface5
+
+using interface5::concurrent_unordered_map;
+using interface5::concurrent_unordered_multimap;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_unordered_map_H_include_area
+
+#endif// __TBB_concurrent_unordered_map_H
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_unordered_set.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_unordered_set.h
@@ -0,0 +1,448 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+/* Container implementations in this header are based on PPL implementations
+   provided by Microsoft. */
+
+#ifndef __TBB_concurrent_unordered_set_H
+#define __TBB_concurrent_unordered_set_H
+
+#define __TBB_concurrent_unordered_set_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "internal/_concurrent_unordered_impl.h"
+
+namespace tbb
+{
+
+namespace interface5 {
+
+// Template class for hash set traits
+template<typename Key, typename Hash_compare, typename Allocator, bool Allow_multimapping>
+class concurrent_unordered_set_traits
+{
+protected:
+    typedef Key value_type;
+    typedef Key key_type;
+    typedef Hash_compare hash_compare;
+    typedef typename tbb::internal::allocator_rebind<Allocator, value_type>::type allocator_type;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef tbb::internal::node_handle<key_type, key_type,
+                                  typename internal::split_ordered_list<key_type, allocator_type>::node,
+                                  allocator_type> node_type;
+#endif // __TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    enum { allow_multimapping = Allow_multimapping };
+
+    concurrent_unordered_set_traits() : my_hash_compare() {}
+    concurrent_unordered_set_traits(const hash_compare& hc) : my_hash_compare(hc) {}
+
+    static const Key& get_key(const value_type& value) {
+        return value;
+    }
+
+    hash_compare my_hash_compare; // the comparator predicate for keys
+};
+
+template<typename Key, typename Hasher, typename Key_equality, typename Allocator>
+class concurrent_unordered_multiset;
+
+template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>, typename Allocator = tbb::tbb_allocator<Key> >
+class concurrent_unordered_set : public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key, internal::hash_compare<Key, Hasher, Key_equality>, Allocator, false> >
+{
+    // Base type definitions
+    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
+    typedef concurrent_unordered_set_traits<Key, hash_compare, Allocator, false> traits_type;
+    typedef internal::concurrent_unordered_base< traits_type > base_type;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using traits_type::allow_multimapping;
+public:
+    using base_type::insert;
+
+    // Type definitions
+    typedef Key key_type;
+    typedef typename base_type::value_type value_type;
+    typedef Key mapped_type;
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+    typedef hash_compare key_compare;
+
+    typedef typename base_type::allocator_type allocator_type;
+    typedef typename base_type::pointer pointer;
+    typedef typename base_type::const_pointer const_pointer;
+    typedef typename base_type::reference reference;
+    typedef typename base_type::const_reference const_reference;
+
+    typedef typename base_type::size_type size_type;
+    typedef typename base_type::difference_type difference_type;
+
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+    typedef typename base_type::iterator local_iterator;
+    typedef typename base_type::const_iterator const_local_iterator;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef typename base_type::node_type node_type;
+#endif /*__TBB_UNORDERED_NODE_HANDLE_PRESENT*/
+
+    // Construction/destruction/copying
+    explicit concurrent_unordered_set(size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {}
+
+    concurrent_unordered_set(size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {}
+
+    concurrent_unordered_set(size_type n_of_buckets, const hasher& a_hasher, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {}
+
+    explicit concurrent_unordered_set(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
+    {}
+
+    template <typename Iterator>
+    concurrent_unordered_set(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_set(Iterator first, Iterator last, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_set(Iterator first, Iterator last, size_type n_of_buckets, const hasher& a_hasher, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_set(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number, const hasher& a_hasher = hasher(),
+        const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(il.begin(),il.end());
+    }
+
+    concurrent_unordered_set(std::initializer_list<value_type> il, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    concurrent_unordered_set(std::initializer_list<value_type> il, size_type n_of_buckets, const hasher& a_hasher, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+    concurrent_unordered_set(const concurrent_unordered_set& table)
+        : base_type(table)
+    {}
+
+    concurrent_unordered_set& operator=(const concurrent_unordered_set& table)
+    {
+        return static_cast<concurrent_unordered_set&>(base_type::operator=(table));
+    }
+
+    concurrent_unordered_set(concurrent_unordered_set&& table)
+        : base_type(std::move(table))
+    {}
+
+    concurrent_unordered_set& operator=(concurrent_unordered_set&& table)
+    {
+        return static_cast<concurrent_unordered_set&>(base_type::operator=(std::move(table)));
+    }
+#endif //__TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    concurrent_unordered_set(concurrent_unordered_set&& table, const Allocator& a)
+        : base_type(std::move(table), a)
+    {}
+#endif /*__TBB_CPP11_RVALUE_REF_PRESENT*/
+
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_set<Key, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_set<Key, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multiset<Key, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multiset<Key, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+#endif //__TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    concurrent_unordered_set(const concurrent_unordered_set& table, const Allocator& a)
+        : base_type(table, a)
+    {}
+
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+namespace internal {
+using namespace tbb::internal;
+
+template <template<typename...> typename Set, typename T, typename... Args>
+using cu_set_t = Set <
+    T,
+    std::conditional_t< (sizeof...(Args)>0) && !is_allocator_v< pack_element_t<0, Args...> >,
+                        pack_element_t<0, Args...>, tbb_hash<T> >,
+    std::conditional_t< (sizeof...(Args)>1) && !is_allocator_v< pack_element_t<1, Args...> >,
+                        pack_element_t<1, Args...>, std::equal_to<T> >,
+    std::conditional_t< (sizeof...(Args)>0) && is_allocator_v< pack_element_t<sizeof...(Args)-1, Args...> >,
+                        pack_element_t<sizeof...(Args)-1, Args...>, tbb_allocator<T> >
+>;
+}
+
+// Deduction guide for the constructor from two iterators
+template<typename I>
+concurrent_unordered_set(I, I)
+-> internal::cu_set_t<concurrent_unordered_set, internal::iterator_value_t<I>>;
+
+// Deduction guide for the constructor from two iterators and hasher/equality/allocator
+template<typename I, typename... Args>
+concurrent_unordered_set(I, I, size_t, Args...)
+-> internal::cu_set_t<concurrent_unordered_set, internal::iterator_value_t<I>, Args...>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename T>
+concurrent_unordered_set(std::initializer_list<T>)
+-> internal::cu_set_t<concurrent_unordered_set, T>;
+
+// Deduction guide for the constructor from an initializer_list and hasher/equality/allocator
+template<typename T, typename... Args>
+concurrent_unordered_set(std::initializer_list<T>, size_t, Args...)
+-> internal::cu_set_t<concurrent_unordered_set, T, Args...>;
+
+#endif /*__TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+
+template <typename Key, typename Hasher = tbb::tbb_hash<Key>, typename Key_equality = std::equal_to<Key>,
+         typename Allocator = tbb::tbb_allocator<Key> >
+class concurrent_unordered_multiset :
+    public internal::concurrent_unordered_base< concurrent_unordered_set_traits<Key,
+    internal::hash_compare<Key, Hasher, Key_equality>, Allocator, true> >
+{
+    // Base type definitions
+    typedef internal::hash_compare<Key, Hasher, Key_equality> hash_compare;
+    typedef concurrent_unordered_set_traits<Key, hash_compare, Allocator, true> traits_type;
+    typedef internal::concurrent_unordered_base< traits_type > base_type;
+#if __TBB_EXTRA_DEBUG
+public:
+#endif
+    using traits_type::allow_multimapping;
+public:
+    using base_type::insert;
+
+    // Type definitions
+    typedef Key key_type;
+    typedef typename base_type::value_type value_type;
+    typedef Key mapped_type;
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+    typedef hash_compare key_compare;
+
+    typedef typename base_type::allocator_type allocator_type;
+    typedef typename base_type::pointer pointer;
+    typedef typename base_type::const_pointer const_pointer;
+    typedef typename base_type::reference reference;
+    typedef typename base_type::const_reference const_reference;
+
+    typedef typename base_type::size_type size_type;
+    typedef typename base_type::difference_type difference_type;
+
+    typedef typename base_type::iterator iterator;
+    typedef typename base_type::const_iterator const_iterator;
+    typedef typename base_type::iterator local_iterator;
+    typedef typename base_type::const_iterator const_local_iterator;
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    typedef typename base_type::node_type node_type;
+#endif // __TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    // Construction/destruction/copying
+    explicit concurrent_unordered_multiset(size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {}
+
+    concurrent_unordered_multiset(size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {}
+
+    concurrent_unordered_multiset(size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {}
+
+    explicit concurrent_unordered_multiset(const Allocator& a) : base_type(base_type::initial_bucket_number, key_compare(), a)
+    {}
+
+    template <typename Iterator>
+    concurrent_unordered_multiset(Iterator first, Iterator last, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(),
+        const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_multiset(Iterator first, Iterator last, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+    template <typename Iterator>
+    concurrent_unordered_multiset(Iterator first, Iterator last, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(first, last);
+    }
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    //! Constructor from initializer_list
+    concurrent_unordered_multiset(std::initializer_list<value_type> il, size_type n_of_buckets = base_type::initial_bucket_number,
+        const hasher& a_hasher = hasher(), const key_equal& a_keyeq = key_equal(), const allocator_type& a = allocator_type())
+        : base_type(n_of_buckets, key_compare(a_hasher, a_keyeq), a)
+    {
+        insert(il.begin(),il.end());
+    }
+
+    concurrent_unordered_multiset(std::initializer_list<value_type> il, size_type n_of_buckets, const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(hasher(), key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+    concurrent_unordered_multiset(std::initializer_list<value_type> il, size_type n_of_buckets, const hasher& a_hasher,
+        const allocator_type& a)
+        : base_type(n_of_buckets, key_compare(a_hasher, key_equal()), a)
+    {
+        insert(il.begin(), il.end());
+    }
+
+#endif //# __TBB_INITIALIZER_LISTS_PRESENT
+
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+    concurrent_unordered_multiset(const concurrent_unordered_multiset& table)
+        : base_type(table)
+    {}
+
+    concurrent_unordered_multiset& operator=(const concurrent_unordered_multiset& table)
+    {
+        return static_cast<concurrent_unordered_multiset&>(base_type::operator=(table));
+    }
+
+    concurrent_unordered_multiset(concurrent_unordered_multiset&& table)
+        : base_type(std::move(table))
+    {}
+
+    concurrent_unordered_multiset& operator=(concurrent_unordered_multiset&& table)
+    {
+        return static_cast<concurrent_unordered_multiset&>(base_type::operator=(std::move(table)));
+    }
+#endif //__TBB_CPP11_RVALUE_REF_PRESENT && !__TBB_IMPLICIT_MOVE_PRESENT
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    concurrent_unordered_multiset(concurrent_unordered_multiset&& table, const Allocator& a)
+        : base_type(std::move(table), a)
+    {
+    }
+#endif /*__TBB_CPP11_RVALUE_REF_PRESENT*/
+
+#if __TBB_UNORDERED_NODE_HANDLE_PRESENT
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_set<Key, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_set<Key, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multiset<Key, Hash, Equality, Allocator>& source)
+              { this->internal_merge(source); }
+
+    template<typename Hash, typename Equality>
+    void merge(concurrent_unordered_multiset<Key, Hash, Equality, Allocator>&& source)
+              { this->internal_merge(source); }
+
+#endif //__TBB_UNORDERED_NODE_HANDLE_PRESENT
+
+    concurrent_unordered_multiset(const concurrent_unordered_multiset& table, const Allocator& a)
+        : base_type(table, a)
+    {}
+};
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+// Deduction guide for the constructor from two iterators
+template<typename I>
+concurrent_unordered_multiset(I, I)
+-> internal::cu_set_t<concurrent_unordered_multiset, internal::iterator_value_t<I>>;
+
+// Deduction guide for the constructor from two iterators and hasher/equality/allocator
+template<typename I, typename... Args>
+concurrent_unordered_multiset(I, I, size_t, Args...)
+-> internal::cu_set_t<concurrent_unordered_multiset, internal::iterator_value_t<I>, Args...>;
+
+// Deduction guide for the constructor from an initializer_list
+template<typename T>
+concurrent_unordered_multiset(std::initializer_list<T>)
+-> internal::cu_set_t<concurrent_unordered_multiset, T>;
+
+// Deduction guide for the constructor from an initializer_list and hasher/equality/allocator
+template<typename T, typename... Args>
+concurrent_unordered_multiset(std::initializer_list<T>, size_t, Args...)
+-> internal::cu_set_t<concurrent_unordered_multiset, T, Args...>;
+
+#endif /* __TBB_CPP17_DEDUCTION_GUIDES_PRESENT */
+} // namespace interface5
+
+using interface5::concurrent_unordered_set;
+using interface5::concurrent_unordered_multiset;
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_concurrent_unordered_set_H_include_area
+
+#endif// __TBB_concurrent_unordered_set_H
--- a/cs440-acg/ext/tbb/include/tbb/concurrent_vector.h
+++ b/cs440-acg/ext/tbb/include/tbb/concurrent_vector.h
--- a/cs440-acg/ext/tbb/include/tbb/critical_section.h
+++ b/cs440-acg/ext/tbb/include/tbb/critical_section.h
@@ -0,0 +1,147 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_critical_section_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_critical_section_H
+#pragma message("TBB Warning: tbb/critical_section.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef _TBB_CRITICAL_SECTION_H_
+#define _TBB_CRITICAL_SECTION_H_
+
+#define __TBB_critical_section_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#include <errno.h>
+#endif  // _WIN32||WIN64
+
+#include "tbb_stddef.h"
+#include "tbb_thread.h"
+#include "tbb_exception.h"
+
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+    namespace internal {
+class critical_section_v4 : internal::no_copy {
+#if _WIN32||_WIN64
+    CRITICAL_SECTION my_impl;
+#else
+    pthread_mutex_t my_impl;
+#endif
+    tbb_thread::id my_tid;
+public:
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    critical_section_v4() {
+#if _WIN32||_WIN64
+        InitializeCriticalSectionEx( &my_impl, 4000, 0 );
+#else
+        pthread_mutex_init(&my_impl, NULL);
+#endif
+        internal_construct();
+    }
+
+    ~critical_section_v4() {
+        __TBB_ASSERT(my_tid == tbb_thread::id(), "Destroying a still-held critical section");
+#if _WIN32||_WIN64
+        DeleteCriticalSection(&my_impl);
+#else
+        pthread_mutex_destroy(&my_impl);
+#endif
+    }
+
+    class scoped_lock : internal::no_copy {
+    private:
+        critical_section_v4 &my_crit;
+    public:
+        scoped_lock( critical_section_v4& lock_me) :my_crit(lock_me) {
+            my_crit.lock();
+        }
+
+        ~scoped_lock() {
+            my_crit.unlock();
+        }
+    };
+
+    void lock() {
+        tbb_thread::id local_tid = this_tbb_thread::get_id();
+        if(local_tid == my_tid) throw_exception( eid_improper_lock );
+#if _WIN32||_WIN64
+        EnterCriticalSection( &my_impl );
+#else
+        int rval = pthread_mutex_lock(&my_impl);
+        __TBB_ASSERT_EX(!rval, "critical_section::lock: pthread_mutex_lock failed");
+#endif
+        __TBB_ASSERT(my_tid == tbb_thread::id(), NULL);
+        my_tid = local_tid;
+    }
+
+    bool try_lock() {
+        bool gotlock;
+        tbb_thread::id local_tid = this_tbb_thread::get_id();
+        if(local_tid == my_tid) return false;
+#if _WIN32||_WIN64
+        gotlock = TryEnterCriticalSection( &my_impl ) != 0;
+#else
+        int rval = pthread_mutex_trylock(&my_impl);
+        // valid returns are 0 (locked) and [EBUSY]
+        __TBB_ASSERT(rval == 0 || rval == EBUSY, "critical_section::trylock: pthread_mutex_trylock failed");
+        gotlock = rval == 0;
+#endif
+        if(gotlock)  {
+            my_tid = local_tid;
+        }
+        return gotlock;
+    }
+
+    void unlock() {
+        __TBB_ASSERT(this_tbb_thread::get_id() == my_tid, "thread unlocking critical_section is not thread that locked it");
+        my_tid = tbb_thread::id();
+#if _WIN32||_WIN64
+        LeaveCriticalSection( &my_impl );
+#else
+        int rval = pthread_mutex_unlock(&my_impl);
+        __TBB_ASSERT_EX(!rval, "critical_section::unlock: pthread_mutex_unlock failed");
+#endif
+    }
+
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = true;
+}; // critical_section_v4
+} // namespace internal
+__TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::critical_section is deprecated, use std::mutex") typedef internal::critical_section_v4 critical_section;
+
+__TBB_DEFINE_PROFILING_SET_NAME(critical_section)
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_critical_section_H_include_area
+
+#endif  // _TBB_CRITICAL_SECTION_H_
--- a/cs440-acg/ext/tbb/include/tbb/enumerable_thread_specific.h
+++ b/cs440-acg/ext/tbb/include/tbb/enumerable_thread_specific.h
--- a/cs440-acg/ext/tbb/include/tbb/flow_graph.h
+++ b/cs440-acg/ext/tbb/include/tbb/flow_graph.h
--- a/cs440-acg/ext/tbb/include/tbb/flow_graph_abstractions.h
+++ b/cs440-acg/ext/tbb/include/tbb/flow_graph_abstractions.h
@@ -0,0 +1,53 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_flow_graph_abstractions_H
+#define __TBB_flow_graph_abstractions_H
+
+namespace tbb {
+namespace flow {
+namespace interface11 {
+
+//! Pure virtual template classes that define interfaces for async communication
+class graph_proxy {
+public:
+    //! Inform a graph that messages may come from outside, to prevent premature graph completion
+    virtual void reserve_wait() = 0;
+
+    //! Inform a graph that a previous call to reserve_wait is no longer in effect
+    virtual void release_wait() = 0;
+
+    virtual ~graph_proxy() {}
+};
+
+template <typename Input>
+class receiver_gateway : public graph_proxy {
+public:
+    //! Type of inputing data into FG.
+    typedef Input input_type;
+
+    //! Submit signal from an asynchronous activity to FG.
+    virtual bool try_put(const input_type&) = 0;
+};
+
+} //interfaceX
+
+using interface11::graph_proxy;
+using interface11::receiver_gateway;
+
+} //flow
+} //tbb
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/flow_graph_opencl_node.h
+++ b/cs440-acg/ext/tbb/include/tbb/flow_graph_opencl_node.h
--- a/cs440-acg/ext/tbb/include/tbb/global_control.h
+++ b/cs440-acg/ext/tbb/include/tbb/global_control.h
@@ -0,0 +1,78 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_global_control_H
+#define __TBB_global_control_H
+
+#include "tbb_stddef.h"
+
+namespace tbb {
+namespace interface9 {
+
+class global_control {
+public:
+    enum parameter {
+        max_allowed_parallelism,
+        thread_stack_size,
+        parameter_max // insert new parameters above this point
+    };
+
+    global_control(parameter p, size_t value) :
+        my_value(value), my_next(NULL), my_param(p) {
+        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter");
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+        // For Windows 8 Store* apps it's impossible to set stack size
+        if (p==thread_stack_size)
+            return;
+#elif __TBB_x86_64 && (_WIN32 || _WIN64)
+        if (p==thread_stack_size)
+            __TBB_ASSERT_RELEASE((unsigned)value == value, "Stack size is limited to unsigned int range");
+#endif
+        if (my_param==max_allowed_parallelism)
+            __TBB_ASSERT_RELEASE(my_value>0, "max_allowed_parallelism cannot be 0.");
+        internal_create();
+    }
+
+    ~global_control() {
+        __TBB_ASSERT(my_param < parameter_max, "Invalid parameter. Probably the object was corrupted.");
+#if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
+        // For Windows 8 Store* apps it's impossible to set stack size
+        if (my_param==thread_stack_size)
+            return;
+#endif
+        internal_destroy();
+    }
+
+    static size_t active_value(parameter p) {
+        __TBB_ASSERT(p < parameter_max, "Invalid parameter");
+        return active_value((int)p);
+    }
+private:
+    size_t    my_value;
+    global_control *my_next;
+    parameter my_param;
+
+    void __TBB_EXPORTED_METHOD internal_create();
+    void __TBB_EXPORTED_METHOD internal_destroy();
+    static size_t __TBB_EXPORTED_FUNC active_value(int param);
+};
+} // namespace interface9
+
+using interface9::global_control;
+
+} // tbb
+
+#endif // __TBB_global_control_H
--- a/cs440-acg/ext/tbb/include/tbb/index.html
+++ b/cs440-acg/ext/tbb/include/tbb/index.html
@@ -0,0 +1,29 @@
+<HTML>
+<BODY>
+
+<H2>Overview</H2>
+Include files for Intel&reg; Threading Building Blocks classes and functions.
+
+<BR><A HREF=".">Click here</A> to see all files in the directory.
+
+<H2>Directories</H2>
+<DL>
+<DT><A HREF="compat">compat</A>
+<DD>Include files for source level compatibility with other frameworks.
+<DT><A HREF="internal">internal</A>
+<DD>Include files with implementation details; not for direct use.
+<DT><A HREF="machine">machine</A>
+<DD>Include files for low-level architecture specific functionality; not for direct use.
+</DL>
+
+<HR>
+<A HREF="../index.html">Up to parent directory</A>
+<p></p>
+Copyright &copy; 2005-2020 Intel Corporation.  All Rights Reserved.
+<P></P>
+Intel is a registered trademark or trademark of Intel Corporation
+or its subsidiaries in the United States and other countries.
+<p></p>
+* Other names and brands may be claimed as the property of others.
+</BODY>
+</HTML>
--- a/cs440-acg/ext/tbb/include/tbb/info.h
+++ b/cs440-acg/ext/tbb/include/tbb/info.h
@@ -0,0 +1,52 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_info_H
+#define __TBB_info_H
+
+#include "tbb_config.h"
+
+#if __TBB_NUMA_SUPPORT
+
+#include <vector>
+
+namespace tbb {
+    namespace internal {
+        namespace numa_topology {
+            unsigned nodes_count();
+            void fill(int* indexes_array);
+            int default_concurrency(int node_id);
+        } //namespace numa_topology
+    } // namespace internal
+
+    typedef int numa_node_id;
+
+    namespace info {
+        inline std::vector<numa_node_id> numa_nodes() {
+            std::vector<numa_node_id> nodes_indexes(tbb::internal::numa_topology::nodes_count());
+            internal::numa_topology::fill(&nodes_indexes.front());
+            return nodes_indexes;
+        }
+
+        inline int default_concurrency(numa_node_id id = -1) {
+            return internal::numa_topology::default_concurrency(id);
+        }
+    } // namespace info
+} // namespace tbb
+
+#endif /*__TBB_NUMA_SUPPORT*/
+
+#endif /*__TBB_info_H*/
--- a/cs440-acg/ext/tbb/include/tbb/internal/_aggregator_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_aggregator_impl.h
@@ -0,0 +1,180 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__aggregator_impl_H
+#define __TBB__aggregator_impl_H
+
+#include "../atomic.h"
+#if !__TBBMALLOC_BUILD
+#include "../tbb_profiling.h"
+#endif
+
+namespace tbb {
+namespace interface6 {
+namespace internal {
+
+using namespace tbb::internal;
+
+//! aggregated_operation base class
+template <typename Derived>
+class aggregated_operation {
+ public:
+    //! Zero value means "wait" status, all other values are "user" specified values and are defined into the scope of a class which uses "status".
+    uintptr_t status;
+
+    Derived *next;
+    aggregated_operation() : status(0), next(NULL) {}
+};
+
+//! Aggregator base class
+/** An aggregator for collecting operations coming from multiple sources and executing
+    them serially on a single thread.  operation_type must be derived from
+    aggregated_operation. The parameter handler_type is a functor that will be passed the
+    list of operations and is expected to handle each operation appropriately, setting the
+    status of each operation to non-zero.*/
+template < typename operation_type >
+class aggregator_generic {
+public:
+    aggregator_generic() : handler_busy(false) { pending_operations = NULL; }
+
+    //! Execute an operation
+    /** Places an operation into the waitlist (pending_operations), and either handles the list,
+        or waits for the operation to complete, or returns.
+        The long_life_time parameter specifies the life time of the given operation object.
+        Operations with long_life_time == true may be accessed after execution.
+        A "short" life time operation (long_life_time == false) can be destroyed
+        during execution, and so any access to it after it was put into the waitlist,
+        including status check, is invalid. As a consequence, waiting for completion
+        of such operation causes undefined behavior.
+    */
+    template < typename handler_type >
+    void execute(operation_type *op, handler_type &handle_operations, bool long_life_time = true) {
+        operation_type *res;
+        // op->status should be read before inserting the operation into the
+        // aggregator waitlist since it can become invalid after executing a
+        // handler (if the operation has 'short' life time.)
+        const uintptr_t status = op->status;
+
+        // ITT note: &(op->status) tag is used to cover accesses to this op node. This
+        // thread has created the operation, and now releases it so that the handler
+        // thread may handle the associated operation w/o triggering a race condition;
+        // thus this tag will be acquired just before the operation is handled in the
+        // handle_operations functor.
+        call_itt_notify(releasing, &(op->status));
+        // insert the operation in the queue.
+        do {
+            // Tools may flag the following line as a race; it is a false positive:
+            // This is an atomic read; we don't provide itt_hide_load_word for atomics
+            op->next = res = pending_operations; // NOT A RACE
+        } while (pending_operations.compare_and_swap(op, res) != res);
+        if (!res) { // first in the list; handle the operations.
+            // ITT note: &pending_operations tag covers access to the handler_busy flag,
+            // which this waiting handler thread will try to set before entering
+            // handle_operations.
+            call_itt_notify(acquired, &pending_operations);
+            start_handle_operations(handle_operations);
+            // The operation with 'short' life time can already be destroyed.
+            if (long_life_time)
+                __TBB_ASSERT(op->status, NULL);
+        }
+        // not first; wait for op to be ready.
+        else if (!status) { // operation is blocking here.
+            __TBB_ASSERT(long_life_time, "Waiting for an operation object that might be destroyed during processing.");
+            call_itt_notify(prepare, &(op->status));
+            spin_wait_while_eq(op->status, uintptr_t(0));
+            itt_load_word_with_acquire(op->status);
+        }
+    }
+
+ private:
+    //! An atomically updated list (aka mailbox) of pending operations
+    atomic<operation_type *> pending_operations;
+    //! Controls thread access to handle_operations
+    uintptr_t handler_busy;
+
+    //! Trigger the handling of operations when the handler is free
+    template < typename handler_type >
+    void start_handle_operations( handler_type &handle_operations ) {
+        operation_type *op_list;
+
+        // ITT note: &handler_busy tag covers access to pending_operations as it is passed
+        // between active and waiting handlers.  Below, the waiting handler waits until
+        // the active handler releases, and the waiting handler acquires &handler_busy as
+        // it becomes the active_handler. The release point is at the end of this
+        // function, when all operations in pending_operations have been handled by the
+        // owner of this aggregator.
+        call_itt_notify(prepare, &handler_busy);
+        // get the handler_busy:
+        // only one thread can possibly spin here at a time
+        spin_wait_until_eq(handler_busy, uintptr_t(0));
+        call_itt_notify(acquired, &handler_busy);
+        // acquire fence not necessary here due to causality rule and surrounding atomics
+        __TBB_store_with_release(handler_busy, uintptr_t(1));
+
+        // ITT note: &pending_operations tag covers access to the handler_busy flag
+        // itself. Capturing the state of the pending_operations signifies that
+        // handler_busy has been set and a new active handler will now process that list's
+        // operations.
+        call_itt_notify(releasing, &pending_operations);
+        // grab pending_operations
+        op_list = pending_operations.fetch_and_store(NULL);
+
+        // handle all the operations
+        handle_operations(op_list);
+
+        // release the handler
+        itt_store_word_with_release(handler_busy, uintptr_t(0));
+    }
+};
+
+template < typename handler_type, typename operation_type >
+class aggregator : public aggregator_generic<operation_type> {
+    handler_type handle_operations;
+public:
+    aggregator() {}
+    explicit aggregator(handler_type h) : handle_operations(h) {}
+
+    void initialize_handler(handler_type h) { handle_operations = h; }
+
+    void execute(operation_type *op) {
+        aggregator_generic<operation_type>::execute(op, handle_operations);
+    }
+};
+
+// the most-compatible friend declaration (vs, gcc, icc) is
+//    template<class U, class V> friend class aggregating_functor;
+template<typename aggregating_class, typename operation_list>
+class aggregating_functor {
+    aggregating_class *fi;
+public:
+    aggregating_functor() : fi() {}
+    aggregating_functor(aggregating_class *fi_) : fi(fi_) {}
+    void operator()(operation_list* op_list) { fi->handle_operations(op_list); }
+};
+
+} // namespace internal
+} // namespace interface6
+
+namespace internal {
+    using interface6::internal::aggregated_operation;
+    using interface6::internal::aggregator_generic;
+    using interface6::internal::aggregator;
+    using interface6::internal::aggregating_functor;
+} // namespace internal
+
+} // namespace tbb
+
+#endif  // __TBB__aggregator_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_allocator_traits.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_allocator_traits.h
@@ -0,0 +1,156 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_allocator_traits_H
+#define __TBB_allocator_traits_H
+
+#include "../tbb_stddef.h" // true/false_type
+
+#if __TBB_ALLOCATOR_TRAITS_PRESENT
+#include <memory> // for allocator_traits
+#endif
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+#include <utility> // for std::move
+#endif
+
+// For allocator_swap helper
+#include __TBB_STD_SWAP_HEADER
+
+namespace tbb {
+namespace internal {
+
+//! Internal implementation of allocator traits, propagate_on_* use internal boolean_constant.
+//! In order to avoid code duplication, check what implementation of boolean constant will likely be passed.
+#if __TBB_ALLOCATOR_TRAITS_PRESENT
+typedef std::true_type traits_true_type;
+typedef std::false_type traits_false_type;
+#else
+typedef tbb::internal::true_type traits_true_type;
+typedef tbb::internal::false_type traits_false_type;
+#endif
+
+//! Copy assignment implementation for allocator if propagate_on_container_copy_assignment == true_type
+//! Noop if pocca == false_type
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_copy_assignment(MyAlloc& my_allocator, OtherAlloc& other_allocator, traits_true_type) {
+    my_allocator = other_allocator;
+}
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_copy_assignment(MyAlloc&, OtherAlloc&, traits_false_type) { /* NO COPY */}
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+//! Move assignment implementation for allocator if propagate_on_container_move_assignment == true_type.
+//! Noop if pocma == false_type.
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_move_assignment(MyAlloc& my_allocator, OtherAlloc& other_allocator, traits_true_type) {
+    my_allocator = std::move(other_allocator);
+}
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_move_assignment(MyAlloc&, OtherAlloc&, traits_false_type) { /* NO MOVE */ }
+#endif
+
+//! Swap implementation for allocators if propagate_on_container_swap == true_type.
+//! Noop if pocs == false_type.
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_swap(MyAlloc& my_allocator, OtherAlloc& other_allocator, traits_true_type) {
+    using std::swap;
+    swap(my_allocator, other_allocator);
+}
+template <typename MyAlloc, typename OtherAlloc>
+inline void allocator_swap(MyAlloc&, OtherAlloc&, traits_false_type) { /* NO SWAP */ }
+
+#if __TBB_ALLOCATOR_TRAITS_PRESENT
+using std::allocator_traits;
+#else
+//! Internal allocator_traits implementation, which relies on C++03 standard
+//! [20.1.5] allocator requirements
+template<typename Alloc>
+struct allocator_traits {
+    // C++03 allocator doesn't have to be assignable or swappable, therefore
+    // define these traits as false_type to do not require additional operations
+    // that are not supposed to be in.
+    typedef tbb::internal::false_type propagate_on_container_move_assignment;
+    typedef tbb::internal::false_type propagate_on_container_copy_assignment;
+    typedef tbb::internal::false_type propagate_on_container_swap;
+
+    typedef Alloc allocator_type;
+    typedef typename allocator_type::value_type value_type;
+
+    typedef typename allocator_type::pointer pointer;
+    typedef typename allocator_type::const_pointer const_pointer;
+    typedef typename allocator_type::difference_type difference_type;
+    typedef typename allocator_type::size_type size_type;
+
+    template <typename U> struct rebind_alloc {
+        typedef typename Alloc::template rebind<U>::other other;
+    };
+
+    static pointer allocate(Alloc& a, size_type n) {
+        return a.allocate(n);
+    }
+
+    static void deallocate(Alloc& a, pointer p, size_type n) {
+        a.deallocate(p, n);
+    }
+
+    template<typename PT>
+    static void construct(Alloc&, PT* p) {
+        ::new (static_cast<void*>(p)) PT();
+    }
+
+    template<typename PT, typename T1>
+    static void construct(Alloc&, PT* p, __TBB_FORWARDING_REF(T1) t1) {
+        ::new (static_cast<void*>(p)) PT(tbb::internal::forward<T1>(t1));
+    }
+
+    template<typename PT, typename T1, typename T2>
+    static void construct(Alloc&, PT* p, __TBB_FORWARDING_REF(T1) t1, __TBB_FORWARDING_REF(T2) t2) {
+        ::new (static_cast<void*>(p)) PT(tbb::internal::forward<T1>(t1), tbb::internal::forward<T2>(t2));
+    }
+
+    template<typename PT, typename T1, typename T2, typename T3>
+    static void construct(Alloc&, PT* p, __TBB_FORWARDING_REF(T1) t1,
+                          __TBB_FORWARDING_REF(T2) t2, __TBB_FORWARDING_REF(T3) t3) {
+        ::new (static_cast<void*>(p)) PT(tbb::internal::forward<T1>(t1), tbb::internal::forward<T2>(t2),
+                                         tbb::internal::forward<T3>(t3));
+    }
+
+    template<typename T>
+    static void destroy(Alloc&, T* p) {
+        p->~T();
+        tbb::internal::suppress_unused_warning(p);
+    }
+
+    static Alloc select_on_container_copy_construction(const Alloc& a) { return a; }
+};
+#endif // __TBB_ALLOCATOR_TRAITS_PRESENT
+
+//! C++03/C++11 compliant rebind helper, even if no std::allocator_traits available
+//! or rebind is not defined for allocator type
+template<typename Alloc, typename T>
+struct allocator_rebind {
+#if __TBB_ALLOCATOR_TRAITS_PRESENT
+    typedef typename allocator_traits<Alloc>::template rebind_alloc<T> type;
+#else
+    typedef typename allocator_traits<Alloc>::template rebind_alloc<T>::other type;
+#endif
+};
+
+}} // namespace tbb::internal
+
+#endif // __TBB_allocator_traits_H
+
--- a/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_queue_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_queue_impl.h
--- a/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_skip_list_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_skip_list_impl.h
--- a/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_unordered_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_concurrent_unordered_impl.h
--- a/cs440-acg/ext/tbb/include/tbb/internal/_deprecated_header_message_guard.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_deprecated_header_message_guard.h
@@ -0,0 +1,69 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "tbb/tbb_config.h"
+
+#if (!defined(TBB_SUPPRESS_DEPRECATED_MESSAGES) || (TBB_SUPPRESS_DEPRECATED_MESSAGES == 0)) && !defined(__TBB_INTERNAL_INCLUDES_DEPRECATION_MESSAGE) && \
+!defined(__TBB_condition_variable_H_include_area) && \
+!defined(__TBB_ppl_H_include_area) && \
+!defined(__TBB_thread_H_include_area) && \
+!defined(__TBB_tuple_H_include_area) && \
+!defined(__TBB_aggregator_H_include_area) && \
+!defined(__TBB_aligned_space_H_include_area) && \
+!defined(__TBB_atomic_H_include_area) && \
+!defined(__TBB_combinable_H_include_area) && \
+!defined(__TBB_concurrent_hash_map_H_include_area) && \
+!defined(__TBB_concurrent_lru_cache_H_include_area) && \
+!defined(__TBB_concurrent_map_H_include_area) && \
+!defined(__TBB_concurrent_priority_queue_H_include_area) && \
+!defined(__TBB_concurrent_queue_H_include_area) && \
+!defined(__TBB_concurrent_set_H_include_area) && \
+!defined(__TBB_concurrent_unordered_map_H_include_area) && \
+!defined(__TBB_concurrent_unordered_set_H_include_area) && \
+!defined(__TBB_concurrent_vector_H_include_area) && \
+!defined(__TBB_critical_section_H_include_area) && \
+!defined(__TBB_enumerable_thread_specific_H_include_area) && \
+!defined(__TBB_flow_graph_opencl_node_H_include_area) && \
+!defined(__TBB_flow_graph_H_include_area) && \
+!defined(__TBB_mutex_H_include_area) && \
+!defined(__TBB_parallel_do_H_include_area) && \
+!defined(__TBB_parallel_for_H_include_area) && \
+!defined(__TBB_parallel_invoke_H_include_area) && \
+!defined(__TBB_parallel_reduce_H_include_area) && \
+!defined(__TBB_parallel_scan_H_include_area) && \
+!defined(__TBB_parallel_sort_H_include_area) && \
+!defined(__TBB_parallel_while_H_include_area) && \
+!defined(__TBB_partitioner_H_include_area) && \
+!defined(__TBB_pipeline_H_include_area) && \
+!defined(__TBB_queuing_mutex_H_include_area) && \
+!defined(__TBB_queuing_rw_mutex_H_include_area) && \
+!defined(__TBB_reader_writer_lock_H_include_area) && \
+!defined(__TBB_recursive_mutex_H_include_area) && \
+!defined(__TBB_runtime_loader_H_include_area) && \
+!defined(__TBB_task_scheduler_init_H_include_area) && \
+!defined(__TBB_spin_mutex_H_include_area) && \
+!defined(__TBB_task_arena_H_include_area) && \
+!defined(__TBB_task_group_H_include_area) && \
+!defined(__TBB_task_scheduler_observer_H_include_area) && \
+!defined(__TBB_task_H_include_area) && \
+!defined(__TBB_tbb_exception_H_include_area) && \
+!defined(__TBB_tbb_profiling_H_include_area) && \
+!defined(__TBB_tbb_thread_H_include_area) && \
+!defined(__TBB_tbb_H_include_area)
+
+#define __TBB_show_deprecated_header_message
+
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_async_msg_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_async_msg_impl.h
@@ -0,0 +1,153 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_async_msg_impl_H
+#define __TBB__flow_graph_async_msg_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+namespace internal {
+
+template <typename T>
+class async_storage {
+public:
+    typedef receiver<T> async_storage_client;
+
+    async_storage() : my_graph(nullptr) {
+        my_data_ready.store<tbb::relaxed>(false);
+    }
+
+    ~async_storage() {
+        // Release reference to the graph if async_storage
+        // was destructed before set() call
+        if (my_graph) {
+            my_graph->release_wait();
+            my_graph = nullptr;
+        }
+    }
+
+    template<typename C>
+    async_storage(C&& data) : my_graph(nullptr), my_data( std::forward<C>(data) ) {
+        using namespace tbb::internal;
+        __TBB_STATIC_ASSERT( (is_same_type<typename strip<C>::type, typename strip<T>::type>::value), "incoming type must be T" );
+
+        my_data_ready.store<tbb::relaxed>(true);
+    }
+
+    template<typename C>
+    bool set(C&& data) {
+        using namespace tbb::internal;
+        __TBB_STATIC_ASSERT( (is_same_type<typename strip<C>::type, typename strip<T>::type>::value), "incoming type must be T" );
+
+        {
+            tbb::spin_mutex::scoped_lock locker(my_mutex);
+
+            if (my_data_ready.load<tbb::relaxed>()) {
+                __TBB_ASSERT(false, "double set() call");
+                return false;
+            }
+
+            my_data = std::forward<C>(data);
+            my_data_ready.store<tbb::release>(true);
+        }
+
+        // Thread sync is on my_data_ready flag
+        for (typename subscriber_list_type::iterator it = my_clients.begin(); it != my_clients.end(); ++it) {
+            (*it)->try_put(my_data);
+        }
+
+        // Data was sent, release reference to the graph
+        if (my_graph) {
+            my_graph->release_wait();
+            my_graph = nullptr;
+        }
+
+        return true;
+    }
+
+    task* subscribe(async_storage_client& client, graph& g) {
+        if (! my_data_ready.load<tbb::acquire>())
+        {
+            tbb::spin_mutex::scoped_lock locker(my_mutex);
+
+            if (! my_data_ready.load<tbb::relaxed>()) {
+#if TBB_USE_ASSERT
+                for (typename subscriber_list_type::iterator it = my_clients.begin(); it != my_clients.end(); ++it) {
+                    __TBB_ASSERT(*it != &client, "unexpected double subscription");
+                }
+#endif // TBB_USE_ASSERT
+
+                // Increase graph lifetime
+                my_graph = &g;
+                my_graph->reserve_wait();
+
+                // Subscribe
+                my_clients.push_back(&client);
+                return SUCCESSFULLY_ENQUEUED;
+            }
+        }
+
+        __TBB_ASSERT(my_data_ready.load<tbb::relaxed>(), "data is NOT ready");
+        return client.try_put_task(my_data);
+    }
+
+private:
+    graph* my_graph;
+    tbb::spin_mutex my_mutex;
+    tbb::atomic<bool> my_data_ready;
+    T my_data;
+    typedef std::vector<async_storage_client*> subscriber_list_type;
+    subscriber_list_type my_clients;
+};
+
+} // namespace internal
+
+template <typename T>
+class __TBB_DEPRECATED async_msg {
+    template< typename > friend class receiver;
+    template< typename, typename > friend struct internal::async_helpers;
+public:
+    typedef T async_msg_data_type;
+
+    async_msg() : my_storage(std::make_shared< internal::async_storage<T> >()) {}
+
+    async_msg(const T& t) : my_storage(std::make_shared< internal::async_storage<T> >(t)) {}
+
+    async_msg(T&& t) : my_storage(std::make_shared< internal::async_storage<T> >( std::move(t) )) {}
+
+    virtual ~async_msg() {}
+
+    void set(const T& t) {
+        my_storage->set(t);
+    }
+
+    void set(T&& t) {
+        my_storage->set( std::move(t) );
+    }
+
+protected:
+    // Can be overridden in derived class to inform that 
+    // async calculation chain is over
+    virtual void finalize() const {}
+
+private:
+    typedef std::shared_ptr< internal::async_storage<T> > async_storage_ptr;
+    async_storage_ptr my_storage;
+};
+
+#endif  // __TBB__flow_graph_async_msg_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_body_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_body_impl.h
@@ -0,0 +1,449 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_body_impl_H
+#define __TBB__flow_graph_body_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interfaceX (in flow_graph.h)
+
+namespace internal {
+
+typedef tbb::internal::uint64_t tag_value;
+
+using tbb::internal::strip;
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+
+template<typename ... Policies> struct Policy {};
+
+template<typename ... Policies> struct has_policy;
+
+template<typename ExpectedPolicy, typename FirstPolicy, typename ...Policies>
+struct has_policy<ExpectedPolicy, FirstPolicy, Policies...> :
+    tbb::internal::bool_constant<has_policy<ExpectedPolicy, FirstPolicy>::value ||
+                                 has_policy<ExpectedPolicy, Policies...>::value> {};
+
+template<typename ExpectedPolicy, typename SinglePolicy>
+struct has_policy<ExpectedPolicy, SinglePolicy> :
+    tbb::internal::bool_constant<tbb::internal::is_same_type<ExpectedPolicy, SinglePolicy>::value> {};
+
+template<typename ExpectedPolicy, typename ...Policies>
+struct has_policy<ExpectedPolicy, Policy<Policies...> > : has_policy<ExpectedPolicy, Policies...> {};
+
+#else
+
+template<typename P1, typename P2 = void> struct Policy {};
+
+template<typename ExpectedPolicy, typename SinglePolicy>
+struct has_policy : tbb::internal::bool_constant<tbb::internal::is_same_type<ExpectedPolicy, SinglePolicy>::value> {};
+
+template<typename ExpectedPolicy, typename P>
+struct has_policy<ExpectedPolicy, Policy<P> > : has_policy<ExpectedPolicy, P> {};
+
+template<typename ExpectedPolicy, typename P1, typename P2>
+struct has_policy<ExpectedPolicy, Policy<P1, P2> > :
+    tbb::internal::bool_constant<has_policy<ExpectedPolicy, P1>::value || has_policy<ExpectedPolicy, P2>::value> {};
+
+#endif
+
+namespace graph_policy_namespace {
+
+    struct rejecting { };
+    struct reserving { };
+    struct queueing  { };
+    struct lightweight  { };
+
+    // K == type of field used for key-matching.  Each tag-matching port will be provided
+    // functor that, given an object accepted by the port, will return the
+    /// field of type K being used for matching.
+    template<typename K, typename KHash=tbb_hash_compare<typename strip<K>::type > >
+    struct key_matching {
+        typedef K key_type;
+        typedef typename strip<K>::type base_key_type;
+        typedef KHash hash_compare_type;
+    };
+
+    // old tag_matching join's new specifier
+    typedef key_matching<tag_value> tag_matching;
+
+    // Aliases for Policy combinations
+    typedef interface11::internal::Policy<queueing, lightweight> queueing_lightweight;
+    typedef interface11::internal::Policy<rejecting, lightweight>  rejecting_lightweight;
+
+} // namespace graph_policy_namespace
+
+// -------------- function_body containers ----------------------
+
+//! A functor that takes no input and generates a value of type Output
+template< typename Output >
+class source_body : tbb::internal::no_assign {
+public:
+    virtual ~source_body() {}
+    virtual bool operator()(Output &output) = 0;
+    virtual source_body* clone() = 0;
+};
+
+//! The leaf for source_body
+template< typename Output, typename Body>
+class source_body_leaf : public source_body<Output> {
+public:
+    source_body_leaf( const Body &_body ) : body(_body) { }
+    bool operator()(Output &output) __TBB_override { return body( output ); }
+    source_body_leaf* clone() __TBB_override {
+        return new source_body_leaf< Output, Body >(body);
+    }
+    Body get_body() { return body; }
+private:
+    Body body;
+};
+
+//! A functor that takes an Input and generates an Output
+template< typename Input, typename Output >
+class function_body : tbb::internal::no_assign {
+public:
+    virtual ~function_body() {}
+    virtual Output operator()(const Input &input) = 0;
+    virtual function_body* clone() = 0;
+};
+
+//! the leaf for function_body
+template <typename Input, typename Output, typename B>
+class function_body_leaf : public function_body< Input, Output > {
+public:
+    function_body_leaf( const B &_body ) : body(_body) { }
+    Output operator()(const Input &i) __TBB_override { return body(i); }
+    B get_body() { return body; }
+    function_body_leaf* clone() __TBB_override {
+        return new function_body_leaf< Input, Output, B >(body);
+    }
+private:
+    B body;
+};
+
+//! the leaf for function_body specialized for Input and output of continue_msg
+template <typename B>
+class function_body_leaf< continue_msg, continue_msg, B> : public function_body< continue_msg, continue_msg > {
+public:
+    function_body_leaf( const B &_body ) : body(_body) { }
+    continue_msg operator()( const continue_msg &i ) __TBB_override {
+        body(i);
+        return i;
+    }
+    B get_body() { return body; }
+    function_body_leaf* clone() __TBB_override {
+        return new function_body_leaf< continue_msg, continue_msg, B >(body);
+    }
+private:
+    B body;
+};
+
+//! the leaf for function_body specialized for Output of continue_msg
+template <typename Input, typename B>
+class function_body_leaf< Input, continue_msg, B> : public function_body< Input, continue_msg > {
+public:
+    function_body_leaf( const B &_body ) : body(_body) { }
+    continue_msg operator()(const Input &i) __TBB_override {
+        body(i);
+        return continue_msg();
+    }
+    B get_body() { return body; }
+    function_body_leaf* clone() __TBB_override {
+        return new function_body_leaf< Input, continue_msg, B >(body);
+    }
+private:
+    B body;
+};
+
+//! the leaf for function_body specialized for Input of continue_msg
+template <typename Output, typename B>
+class function_body_leaf< continue_msg, Output, B > : public function_body< continue_msg, Output > {
+public:
+    function_body_leaf( const B &_body ) : body(_body) { }
+    Output operator()(const continue_msg &i) __TBB_override {
+        return body(i);
+    }
+    B get_body() { return body; }
+    function_body_leaf* clone() __TBB_override {
+        return new function_body_leaf< continue_msg, Output, B >(body);
+    }
+private:
+    B body;
+};
+
+//! function_body that takes an Input and a set of output ports
+template<typename Input, typename OutputSet>
+class multifunction_body : tbb::internal::no_assign {
+public:
+    virtual ~multifunction_body () {}
+    virtual void operator()(const Input &/* input*/, OutputSet &/*oset*/) = 0;
+    virtual multifunction_body* clone() = 0;
+    virtual void* get_body_ptr() = 0;
+};
+
+//! leaf for multifunction.  OutputSet can be a std::tuple or a vector.
+template<typename Input, typename OutputSet, typename B >
+class multifunction_body_leaf : public multifunction_body<Input, OutputSet> {
+public:
+    multifunction_body_leaf(const B &_body) : body(_body) { }
+    void operator()(const Input &input, OutputSet &oset) __TBB_override {
+        body(input, oset); // body may explicitly put() to one or more of oset.
+    }
+    void* get_body_ptr() __TBB_override { return &body; }
+    multifunction_body_leaf* clone() __TBB_override {
+        return new multifunction_body_leaf<Input, OutputSet,B>(body);
+    }
+
+private:
+    B body;
+};
+
+// ------ function bodies for hash_buffers and key-matching joins.
+
+template<typename Input, typename Output>
+class type_to_key_function_body : tbb::internal::no_assign {
+    public:
+        virtual ~type_to_key_function_body() {}
+        virtual Output operator()(const Input &input) = 0;  // returns an Output
+        virtual type_to_key_function_body* clone() = 0;
+};
+
+// specialization for ref output
+template<typename Input, typename Output>
+class type_to_key_function_body<Input,Output&> : tbb::internal::no_assign {
+    public:
+        virtual ~type_to_key_function_body() {}
+        virtual const Output & operator()(const Input &input) = 0;  // returns a const Output&
+        virtual type_to_key_function_body* clone() = 0;
+};
+
+template <typename Input, typename Output, typename B>
+class type_to_key_function_body_leaf : public type_to_key_function_body<Input, Output> {
+public:
+    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
+    Output operator()(const Input &i) __TBB_override { return body(i); }
+    B get_body() { return body; }
+    type_to_key_function_body_leaf* clone() __TBB_override {
+        return new type_to_key_function_body_leaf< Input, Output, B>(body);
+    }
+private:
+    B body;
+};
+
+template <typename Input, typename Output, typename B>
+class type_to_key_function_body_leaf<Input,Output&,B> : public type_to_key_function_body< Input, Output&> {
+public:
+    type_to_key_function_body_leaf( const B &_body ) : body(_body) { }
+    const Output& operator()(const Input &i) __TBB_override {
+        return body(i);
+    }
+    B get_body() { return body; }
+    type_to_key_function_body_leaf* clone() __TBB_override {
+        return new type_to_key_function_body_leaf< Input, Output&, B>(body);
+    }
+private:
+    B body;
+};
+
+// --------------------------- end of function_body containers ------------------------
+
+// --------------------------- node task bodies ---------------------------------------
+
+//! A task that calls a node's forward_task function
+template< typename NodeType >
+class forward_task_bypass : public graph_task {
+
+    NodeType &my_node;
+
+public:
+
+    forward_task_bypass( NodeType &n
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+                         , node_priority_t node_priority = no_priority
+    ) : graph_task(node_priority),
+#else
+    ) :
+#endif
+    my_node(n) {}
+
+    task *execute() __TBB_override {
+        task * new_task = my_node.forward_task();
+        if (new_task == SUCCESSFULLY_ENQUEUED) new_task = NULL;
+        return new_task;
+    }
+};
+
+//! A task that calls a node's apply_body_bypass function, passing in an input of type Input
+//  return the task* unless it is SUCCESSFULLY_ENQUEUED, in which case return NULL
+template< typename NodeType, typename Input >
+class apply_body_task_bypass : public graph_task {
+
+    NodeType &my_node;
+    Input my_input;
+
+public:
+
+    apply_body_task_bypass( NodeType &n, const Input &i
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+                            , node_priority_t node_priority = no_priority
+    ) : graph_task(node_priority),
+#else
+    ) :
+#endif
+        my_node(n), my_input(i) {}
+
+    task *execute() __TBB_override {
+        task * next_task = my_node.apply_body_bypass( my_input );
+        if(next_task == SUCCESSFULLY_ENQUEUED) next_task = NULL;
+        return next_task;
+    }
+};
+
+//! A task that calls a node's apply_body_bypass function with no input
+template< typename NodeType >
+class source_task_bypass : public graph_task {
+
+    NodeType &my_node;
+
+public:
+
+    source_task_bypass( NodeType &n ) : my_node(n) {}
+
+    task *execute() __TBB_override {
+        task *new_task = my_node.apply_body_bypass( );
+        if(new_task == SUCCESSFULLY_ENQUEUED) return NULL;
+        return new_task;
+    }
+};
+
+// ------------------------ end of node task bodies -----------------------------------
+
+//! An empty functor that takes an Input and returns a default constructed Output
+template< typename Input, typename Output >
+struct empty_body {
+    Output operator()( const Input & ) const { return Output(); }
+};
+
+template<typename T, typename DecrementType, typename DummyType = void>
+class decrementer;
+
+template<typename T, typename DecrementType>
+class decrementer<T, DecrementType,
+                  typename tbb::internal::enable_if<
+                      tbb::internal::is_integral<DecrementType>::value, void>::type
+                  > : public receiver<DecrementType>, tbb::internal::no_copy {
+    T* my_node;
+protected:
+
+    task* try_put_task( const DecrementType& value ) __TBB_override {
+        task* result = my_node->decrement_counter( value );
+        if( !result )
+            result = SUCCESSFULLY_ENQUEUED;
+        return result;
+    }
+
+    graph& graph_reference() const __TBB_override {
+        return my_node->my_graph;
+    }
+
+    template<typename U, typename V> friend class tbb::flow::interface11::limiter_node;
+    void reset_receiver( reset_flags f ) __TBB_override {
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        if (f & rf_clear_edges)
+            my_built_predecessors.clear();
+#else
+        tbb::internal::suppress_unused_warning( f );
+#endif
+    }
+
+public:
+    // Since decrementer does not make use of possibly unconstructed owner inside its
+    // constructor, my_node can be directly initialized with 'this' pointer passed from the
+    // owner, hence making method 'set_owner' needless.
+    decrementer() : my_node(NULL) {}
+    void set_owner( T *node ) { my_node = node; }
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    spin_mutex my_mutex;
+    //! The predecessor type for this node
+    typedef typename receiver<DecrementType>::predecessor_type predecessor_type;
+
+    typedef internal::edge_container<predecessor_type> built_predecessors_type;
+    typedef typename built_predecessors_type::edge_list_type predecessor_list_type;
+    built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
+
+    void internal_add_built_predecessor( predecessor_type &s) __TBB_override {
+        spin_mutex::scoped_lock l(my_mutex);
+        my_built_predecessors.add_edge( s );
+    }
+
+    void internal_delete_built_predecessor( predecessor_type &s) __TBB_override {
+        spin_mutex::scoped_lock l(my_mutex);
+        my_built_predecessors.delete_edge(s);
+    }
+
+    void copy_predecessors( predecessor_list_type &v) __TBB_override {
+        spin_mutex::scoped_lock l(my_mutex);
+        my_built_predecessors.copy_edges(v);
+    }
+
+    size_t predecessor_count() __TBB_override {
+        spin_mutex::scoped_lock l(my_mutex);
+        return my_built_predecessors.edge_count();
+    }
+protected:
+    built_predecessors_type my_built_predecessors;
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+};
+
+template<typename T>
+class decrementer<T, continue_msg, void> : public continue_receiver, tbb::internal::no_copy {
+
+    T *my_node;
+
+    task *execute() __TBB_override {
+        return my_node->decrement_counter( 1 );
+    }
+
+protected:
+
+    graph& graph_reference() const __TBB_override {
+        return my_node->my_graph;
+    }
+
+public:
+
+    typedef continue_msg input_type;
+    typedef continue_msg output_type;
+    decrementer( int number_of_predecessors = 0 )
+        : continue_receiver(
+            __TBB_FLOW_GRAPH_PRIORITY_ARG1(number_of_predecessors, tbb::flow::internal::no_priority)
+        )
+          // Since decrementer does not make use of possibly unconstructed owner inside its
+          // constructor, my_node can be directly initialized with 'this' pointer passed from the
+          // owner, hence making method 'set_owner' needless.
+        , my_node(NULL)
+    {}
+    void set_owner( T *node ) { my_node = node; }
+};
+
+} // namespace internal
+
+#endif // __TBB__flow_graph_body_impl_H
+
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_cache_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_cache_impl.h
@@ -0,0 +1,592 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_cache_impl_H
+#define __TBB__flow_graph_cache_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interfaceX (in flow_graph.h)
+
+namespace internal {
+
+//! A node_cache maintains a std::queue of elements of type T.  Each operation is protected by a lock.
+template< typename T, typename M=spin_mutex >
+class node_cache {
+    public:
+
+    typedef size_t size_type;
+
+    bool empty() {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        return internal_empty();
+    }
+
+    void add( T &n ) {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        internal_push(n);
+    }
+
+    void remove( T &n ) {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        for ( size_t i = internal_size(); i != 0; --i ) {
+            T &s = internal_pop();
+            if ( &s == &n )  return;  // only remove one predecessor per request
+            internal_push(s);
+        }
+    }
+
+    void clear() {
+        while( !my_q.empty()) (void)my_q.pop();
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        my_built_predecessors.clear();
+#endif
+    }
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    typedef edge_container<T> built_predecessors_type;
+    built_predecessors_type &built_predecessors() { return my_built_predecessors; }
+
+    typedef typename edge_container<T>::edge_list_type predecessor_list_type;
+    void internal_add_built_predecessor( T &n ) {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        my_built_predecessors.add_edge(n);
+    }
+
+    void internal_delete_built_predecessor( T &n ) {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        my_built_predecessors.delete_edge(n);
+    }
+
+    void copy_predecessors( predecessor_list_type &v) {
+        typename mutex_type::scoped_lock lock( my_mutex );
+        my_built_predecessors.copy_edges(v);
+    }
+
+    size_t predecessor_count() {
+        typename mutex_type::scoped_lock lock(my_mutex);
+        return (size_t)(my_built_predecessors.edge_count());
+    }
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+
+protected:
+
+    typedef M mutex_type;
+    mutex_type my_mutex;
+    std::queue< T * > my_q;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    built_predecessors_type my_built_predecessors;
+#endif
+
+    // Assumes lock is held
+    inline bool internal_empty( )  {
+        return my_q.empty();
+    }
+
+    // Assumes lock is held
+    inline size_type internal_size( )  {
+        return my_q.size();
+    }
+
+    // Assumes lock is held
+    inline void internal_push( T &n )  {
+        my_q.push(&n);
+    }
+
+    // Assumes lock is held
+    inline T &internal_pop() {
+        T *v = my_q.front();
+        my_q.pop();
+        return *v;
+    }
+
+};
+
+//! A cache of predecessors that only supports try_get
+template< typename T, typename M=spin_mutex >
+#if __TBB_PREVIEW_ASYNC_MSG
+// TODO: make predecessor_cache type T-independent when async_msg becomes regular feature
+class predecessor_cache : public node_cache< untyped_sender, M > {
+#else
+class predecessor_cache : public node_cache< sender<T>, M > {
+#endif // __TBB_PREVIEW_ASYNC_MSG
+public:
+    typedef M mutex_type;
+    typedef T output_type;
+#if __TBB_PREVIEW_ASYNC_MSG
+    typedef untyped_sender predecessor_type;
+    typedef untyped_receiver successor_type;
+#else
+    typedef sender<output_type> predecessor_type;
+    typedef receiver<output_type> successor_type;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+
+    predecessor_cache( ) : my_owner( NULL ) { }
+
+    void set_owner( successor_type *owner ) { my_owner = owner; }
+
+    bool get_item( output_type &v ) {
+
+        bool msg = false;
+
+        do {
+            predecessor_type *src;
+            {
+                typename mutex_type::scoped_lock lock(this->my_mutex);
+                if ( this->internal_empty() ) {
+                    break;
+                }
+                src = &this->internal_pop();
+            }
+
+            // Try to get from this sender
+            msg = src->try_get( v );
+
+            if (msg == false) {
+                // Relinquish ownership of the edge
+                if (my_owner)
+                    src->register_successor( *my_owner );
+            } else {
+                // Retain ownership of the edge
+                this->add(*src);
+            }
+        } while ( msg == false );
+        return msg;
+    }
+
+    // If we are removing arcs (rf_clear_edges), call clear() rather than reset().
+    void reset() {
+        if (my_owner) {
+            for(;;) {
+                predecessor_type *src;
+                {
+                    if (this->internal_empty()) break;
+                    src = &this->internal_pop();
+                }
+                src->register_successor( *my_owner );
+            }
+        }
+    }
+
+protected:
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    using node_cache< predecessor_type, M >::my_built_predecessors;
+#endif
+    successor_type *my_owner;
+};
+
+//! An cache of predecessors that supports requests and reservations
+// TODO: make reservable_predecessor_cache type T-independent when async_msg becomes regular feature
+template< typename T, typename M=spin_mutex >
+class reservable_predecessor_cache : public predecessor_cache< T, M > {
+public:
+    typedef M mutex_type;
+    typedef T output_type;
+#if __TBB_PREVIEW_ASYNC_MSG
+    typedef untyped_sender predecessor_type;
+    typedef untyped_receiver successor_type;
+#else
+    typedef sender<T> predecessor_type;
+    typedef receiver<T> successor_type;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+
+    reservable_predecessor_cache( ) : reserved_src(NULL) { }
+
+    bool
+    try_reserve( output_type &v ) {
+        bool msg = false;
+
+        do {
+            {
+                typename mutex_type::scoped_lock lock(this->my_mutex);
+                if ( reserved_src || this->internal_empty() )
+                    return false;
+
+                reserved_src = &this->internal_pop();
+            }
+
+            // Try to get from this sender
+            msg = reserved_src->try_reserve( v );
+
+            if (msg == false) {
+                typename mutex_type::scoped_lock lock(this->my_mutex);
+                // Relinquish ownership of the edge
+                reserved_src->register_successor( *this->my_owner );
+                reserved_src = NULL;
+            } else {
+                // Retain ownership of the edge
+                this->add( *reserved_src );
+            }
+        } while ( msg == false );
+
+        return msg;
+    }
+
+    bool
+    try_release( ) {
+        reserved_src->try_release( );
+        reserved_src = NULL;
+        return true;
+    }
+
+    bool
+    try_consume( ) {
+        reserved_src->try_consume( );
+        reserved_src = NULL;
+        return true;
+    }
+
+    void reset( ) {
+        reserved_src = NULL;
+        predecessor_cache<T,M>::reset( );
+    }
+
+    void clear() {
+        reserved_src = NULL;
+        predecessor_cache<T,M>::clear();
+    }
+
+private:
+    predecessor_type *reserved_src;
+};
+
+
+//! An abstract cache of successors
+// TODO: make successor_cache type T-independent when async_msg becomes regular feature
+template<typename T, typename M=spin_rw_mutex >
+class successor_cache : tbb::internal::no_copy {
+protected:
+
+    typedef M mutex_type;
+    mutex_type my_mutex;
+
+#if __TBB_PREVIEW_ASYNC_MSG
+    typedef untyped_receiver successor_type;
+    typedef untyped_receiver *pointer_type;
+    typedef untyped_sender owner_type;
+#else
+    typedef receiver<T> successor_type;
+    typedef receiver<T> *pointer_type;
+    typedef sender<T> owner_type;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+    typedef std::list< pointer_type > successors_type;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    edge_container<successor_type> my_built_successors;
+#endif
+    successors_type my_successors;
+
+    owner_type *my_owner;
+
+public:
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    typedef typename edge_container<successor_type>::edge_list_type successor_list_type;
+
+    edge_container<successor_type> &built_successors() { return my_built_successors; }
+
+    void internal_add_built_successor( successor_type &r) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_built_successors.add_edge( r );
+    }
+
+    void internal_delete_built_successor( successor_type &r) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_built_successors.delete_edge(r);
+    }
+
+    void copy_successors( successor_list_type &v) {
+        typename mutex_type::scoped_lock l(my_mutex, false);
+        my_built_successors.copy_edges(v);
+    }
+
+    size_t successor_count() {
+        typename mutex_type::scoped_lock l(my_mutex,false);
+        return my_built_successors.edge_count();
+    }
+
+#endif /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+
+    successor_cache( ) : my_owner(NULL) {}
+
+    void set_owner( owner_type *owner ) { my_owner = owner; }
+
+    virtual ~successor_cache() {}
+
+    void register_successor( successor_type &r ) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_successors.push_back( &r );
+    }
+
+    void remove_successor( successor_type &r ) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        for ( typename successors_type::iterator i = my_successors.begin();
+              i != my_successors.end(); ++i ) {
+            if ( *i == & r ) {
+                my_successors.erase(i);
+                break;
+            }
+        }
+    }
+
+    bool empty() {
+        typename mutex_type::scoped_lock l(my_mutex, false);
+        return my_successors.empty();
+    }
+
+    void clear() {
+        my_successors.clear();
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        my_built_successors.clear();
+#endif
+    }
+
+#if !__TBB_PREVIEW_ASYNC_MSG
+    virtual task * try_put_task( const T &t ) = 0;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+ };  // successor_cache<T>
+
+//! An abstract cache of successors, specialized to continue_msg
+template<typename M>
+class successor_cache< continue_msg, M > : tbb::internal::no_copy {
+protected:
+
+    typedef M mutex_type;
+    mutex_type my_mutex;
+
+#if __TBB_PREVIEW_ASYNC_MSG
+    typedef untyped_receiver successor_type;
+    typedef untyped_receiver *pointer_type;
+#else
+    typedef receiver<continue_msg> successor_type;
+    typedef receiver<continue_msg> *pointer_type;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+    typedef std::list< pointer_type > successors_type;
+    successors_type my_successors;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    edge_container<successor_type> my_built_successors;
+    typedef edge_container<successor_type>::edge_list_type successor_list_type;
+#endif
+
+    sender<continue_msg> *my_owner;
+
+public:
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+
+    edge_container<successor_type> &built_successors() { return my_built_successors; }
+
+    void internal_add_built_successor( successor_type &r) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_built_successors.add_edge( r );
+    }
+
+    void internal_delete_built_successor( successor_type &r) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_built_successors.delete_edge(r);
+    }
+
+    void copy_successors( successor_list_type &v) {
+        typename mutex_type::scoped_lock l(my_mutex, false);
+        my_built_successors.copy_edges(v);
+    }
+
+    size_t successor_count() {
+        typename mutex_type::scoped_lock l(my_mutex,false);
+        return my_built_successors.edge_count();
+    }
+
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+
+    successor_cache( ) : my_owner(NULL) {}
+
+    void set_owner( sender<continue_msg> *owner ) { my_owner = owner; }
+
+    virtual ~successor_cache() {}
+
+    void register_successor( successor_type &r ) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        my_successors.push_back( &r );
+        if ( my_owner && r.is_continue_receiver() ) {
+            r.register_predecessor( *my_owner );
+        }
+    }
+
+    void remove_successor( successor_type &r ) {
+        typename mutex_type::scoped_lock l(my_mutex, true);
+        for ( successors_type::iterator i = my_successors.begin();
+              i != my_successors.end(); ++i ) {
+            if ( *i == & r ) {
+                // TODO: Check if we need to test for continue_receiver before
+                // removing from r.
+                if ( my_owner )
+                    r.remove_predecessor( *my_owner );
+                my_successors.erase(i);
+                break;
+            }
+        }
+    }
+
+    bool empty() {
+        typename mutex_type::scoped_lock l(my_mutex, false);
+        return my_successors.empty();
+    }
+
+    void clear() {
+        my_successors.clear();
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        my_built_successors.clear();
+#endif
+    }
+
+#if !__TBB_PREVIEW_ASYNC_MSG
+    virtual task * try_put_task( const continue_msg &t ) = 0;
+#endif // __TBB_PREVIEW_ASYNC_MSG
+
+};  // successor_cache< continue_msg >
+
+//! A cache of successors that are broadcast to
+// TODO: make broadcast_cache type T-independent when async_msg becomes regular feature
+template<typename T, typename M=spin_rw_mutex>
+class broadcast_cache : public successor_cache<T, M> {
+    typedef M mutex_type;
+    typedef typename successor_cache<T,M>::successors_type successors_type;
+
+public:
+
+    broadcast_cache( ) {}
+
+    // as above, but call try_put_task instead, and return the last task we received (if any)
+#if __TBB_PREVIEW_ASYNC_MSG
+    template<typename X>
+    task * try_put_task( const X &t ) {
+#else
+    task * try_put_task( const T &t ) __TBB_override {
+#endif // __TBB_PREVIEW_ASYNC_MSG
+        task * last_task = NULL;
+        bool upgraded = true;
+        typename mutex_type::scoped_lock l(this->my_mutex, upgraded);
+        typename successors_type::iterator i = this->my_successors.begin();
+        while ( i != this->my_successors.end() ) {
+            task *new_task = (*i)->try_put_task(t);
+            // workaround for icc bug
+            graph& graph_ref = (*i)->graph_reference();
+            last_task = combine_tasks(graph_ref, last_task, new_task);  // enqueue if necessary
+            if(new_task) {
+                ++i;
+            }
+            else {  // failed
+                if ( (*i)->register_predecessor(*this->my_owner) ) {
+                    if (!upgraded) {
+                        l.upgrade_to_writer();
+                        upgraded = true;
+                    }
+                    i = this->my_successors.erase(i);
+                } else {
+                    ++i;
+                }
+            }
+        }
+        return last_task;
+    }
+
+    // call try_put_task and return list of received tasks
+#if __TBB_PREVIEW_ASYNC_MSG
+    template<typename X>
+    bool gather_successful_try_puts( const X &t, task_list &tasks ) {
+#else
+    bool gather_successful_try_puts( const T &t, task_list &tasks ) {
+#endif // __TBB_PREVIEW_ASYNC_MSG
+        bool upgraded = true;
+        bool is_at_least_one_put_successful = false;
+        typename mutex_type::scoped_lock l(this->my_mutex, upgraded);
+        typename successors_type::iterator i = this->my_successors.begin();
+        while ( i != this->my_successors.end() ) {
+            task * new_task = (*i)->try_put_task(t);
+            if(new_task) {
+                ++i;
+                if(new_task != SUCCESSFULLY_ENQUEUED) {
+                    tasks.push_back(*new_task);
+                }
+                is_at_least_one_put_successful = true;
+            }
+            else {  // failed
+                if ( (*i)->register_predecessor(*this->my_owner) ) {
+                    if (!upgraded) {
+                        l.upgrade_to_writer();
+                        upgraded = true;
+                    }
+                    i = this->my_successors.erase(i);
+                } else {
+                    ++i;
+                }
+            }
+        }
+        return is_at_least_one_put_successful;
+    }
+};
+
+//! A cache of successors that are put in a round-robin fashion
+// TODO: make round_robin_cache type T-independent when async_msg becomes regular feature
+template<typename T, typename M=spin_rw_mutex >
+class round_robin_cache : public successor_cache<T, M> {
+    typedef size_t size_type;
+    typedef M mutex_type;
+    typedef typename successor_cache<T,M>::successors_type successors_type;
+
+public:
+
+    round_robin_cache( ) {}
+
+    size_type size() {
+        typename mutex_type::scoped_lock l(this->my_mutex, false);
+        return this->my_successors.size();
+    }
+
+#if __TBB_PREVIEW_ASYNC_MSG
+    template<typename X>
+    task * try_put_task( const X &t ) {
+#else
+    task *try_put_task( const T &t ) __TBB_override {
+#endif // __TBB_PREVIEW_ASYNC_MSG
+        bool upgraded = true;
+        typename mutex_type::scoped_lock l(this->my_mutex, upgraded);
+        typename successors_type::iterator i = this->my_successors.begin();
+        while ( i != this->my_successors.end() ) {
+            task *new_task = (*i)->try_put_task(t);
+            if ( new_task ) {
+                return new_task;
+            } else {
+               if ( (*i)->register_predecessor(*this->my_owner) ) {
+                   if (!upgraded) {
+                       l.upgrade_to_writer();
+                       upgraded = true;
+                   }
+                   i = this->my_successors.erase(i);
+               }
+               else {
+                   ++i;
+               }
+            }
+        }
+        return NULL;
+    }
+};
+
+} // namespace internal
+
+#endif // __TBB__flow_graph_cache_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_impl.h
@@ -0,0 +1,547 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_flow_graph_impl_H
+#define __TBB_flow_graph_impl_H
+
+#include "../tbb_stddef.h"
+#include "../task.h"
+#include "../task_arena.h"
+#include "../flow_graph_abstractions.h"
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+#include "../concurrent_priority_queue.h"
+#endif
+
+#include <list>
+
+#if TBB_DEPRECATED_FLOW_ENQUEUE
+#define FLOW_SPAWN(a) tbb::task::enqueue((a))
+#else
+#define FLOW_SPAWN(a) tbb::task::spawn((a))
+#endif
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+#define __TBB_FLOW_GRAPH_PRIORITY_EXPR( expr ) expr
+#define __TBB_FLOW_GRAPH_PRIORITY_ARG0( priority ) , priority
+#define __TBB_FLOW_GRAPH_PRIORITY_ARG1( arg1, priority ) arg1, priority
+#else
+#define __TBB_FLOW_GRAPH_PRIORITY_EXPR( expr )
+#define __TBB_FLOW_GRAPH_PRIORITY_ARG0( priority )
+#define __TBB_FLOW_GRAPH_PRIORITY_ARG1( arg1, priority ) arg1
+#endif // __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+
+#if TBB_DEPRECATED_LIMITER_NODE_CONSTRUCTOR
+#define __TBB_DEPRECATED_LIMITER_EXPR( expr ) expr
+#define __TBB_DEPRECATED_LIMITER_ARG2( arg1, arg2 ) arg1, arg2
+#define __TBB_DEPRECATED_LIMITER_ARG4( arg1, arg2, arg3, arg4 ) arg1, arg3, arg4
+#else
+#define __TBB_DEPRECATED_LIMITER_EXPR( expr )
+#define __TBB_DEPRECATED_LIMITER_ARG2( arg1, arg2 ) arg1
+#define __TBB_DEPRECATED_LIMITER_ARG4( arg1, arg2, arg3, arg4 ) arg1, arg2
+#endif // TBB_DEPRECATED_LIMITER_NODE_CONSTRUCTOR
+
+namespace tbb {
+namespace flow {
+
+namespace internal {
+static tbb::task * const SUCCESSFULLY_ENQUEUED = (task *)-1;
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+typedef unsigned int node_priority_t;
+static const node_priority_t no_priority = node_priority_t(0);
+#endif
+}
+
+namespace interface10 {
+class graph;
+}
+
+namespace interface11 {
+
+using tbb::flow::internal::SUCCESSFULLY_ENQUEUED;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+using tbb::flow::internal::node_priority_t;
+using tbb::flow::internal::no_priority;
+//! Base class for tasks generated by graph nodes.
+struct graph_task : public task {
+    graph_task( node_priority_t node_priority = no_priority ) : priority( node_priority ) {}
+    node_priority_t priority;
+};
+#else
+typedef task graph_task;
+#endif /* __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES */
+
+class graph_node;
+
+template <typename GraphContainerType, typename GraphNodeType>
+class graph_iterator {
+    friend class tbb::flow::interface10::graph;
+    friend class graph_node;
+public:
+    typedef size_t size_type;
+    typedef GraphNodeType value_type;
+    typedef GraphNodeType* pointer;
+    typedef GraphNodeType& reference;
+    typedef const GraphNodeType& const_reference;
+    typedef std::forward_iterator_tag iterator_category;
+
+    //! Default constructor
+    graph_iterator() : my_graph(NULL), current_node(NULL) {}
+
+    //! Copy constructor
+    graph_iterator(const graph_iterator& other) :
+        my_graph(other.my_graph), current_node(other.current_node)
+    {}
+
+    //! Assignment
+    graph_iterator& operator=(const graph_iterator& other) {
+        if (this != &other) {
+            my_graph = other.my_graph;
+            current_node = other.current_node;
+        }
+        return *this;
+    }
+
+    //! Dereference
+    reference operator*() const;
+
+    //! Dereference
+    pointer operator->() const;
+
+    //! Equality
+    bool operator==(const graph_iterator& other) const {
+        return ((my_graph == other.my_graph) && (current_node == other.current_node));
+    }
+
+    //! Inequality
+    bool operator!=(const graph_iterator& other) const { return !(operator==(other)); }
+
+    //! Pre-increment
+    graph_iterator& operator++() {
+        internal_forward();
+        return *this;
+    }
+
+    //! Post-increment
+    graph_iterator operator++(int) {
+        graph_iterator result = *this;
+        operator++();
+        return result;
+    }
+
+private:
+    // the graph over which we are iterating
+    GraphContainerType *my_graph;
+    // pointer into my_graph's my_nodes list
+    pointer current_node;
+
+    //! Private initializing constructor for begin() and end() iterators
+    graph_iterator(GraphContainerType *g, bool begin);
+    void internal_forward();
+};  // class graph_iterator
+
+// flags to modify the behavior of the graph reset().  Can be combined.
+enum reset_flags {
+    rf_reset_protocol = 0,
+    rf_reset_bodies = 1 << 0,  // delete the current node body, reset to a copy of the initial node body.
+    rf_clear_edges = 1 << 1   // delete edges
+};
+
+namespace internal {
+
+void activate_graph(tbb::flow::interface10::graph& g);
+void deactivate_graph(tbb::flow::interface10::graph& g);
+bool is_graph_active(tbb::flow::interface10::graph& g);
+tbb::task& prioritize_task(tbb::flow::interface10::graph& g, tbb::task& arena_task);
+void spawn_in_graph_arena(tbb::flow::interface10::graph& g, tbb::task& arena_task);
+void enqueue_in_graph_arena(tbb::flow::interface10::graph &g, tbb::task& arena_task);
+void add_task_to_graph_reset_list(tbb::flow::interface10::graph& g, tbb::task *tp);
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+struct graph_task_comparator {
+    bool operator()(const graph_task* left, const graph_task* right) {
+        return left->priority < right->priority;
+    }
+};
+
+typedef tbb::concurrent_priority_queue<graph_task*, graph_task_comparator> graph_task_priority_queue_t;
+
+class priority_task_selector : public task {
+public:
+    priority_task_selector(graph_task_priority_queue_t& priority_queue)
+        : my_priority_queue(priority_queue) {}
+    task* execute() __TBB_override {
+        graph_task* t = NULL;
+        bool result = my_priority_queue.try_pop(t);
+        __TBB_ASSERT_EX( result, "Number of critical tasks for scheduler and tasks"
+                         " in graph's priority queue mismatched" );
+        __TBB_ASSERT( t && t != SUCCESSFULLY_ENQUEUED,
+                      "Incorrect task submitted to graph priority queue" );
+        __TBB_ASSERT( t->priority != tbb::flow::internal::no_priority,
+                      "Tasks from graph's priority queue must have priority" );
+        task* t_next = t->execute();
+        task::destroy(*t);
+        return t_next;
+    }
+private:
+    graph_task_priority_queue_t& my_priority_queue;
+};
+#endif /* __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES */
+
+}
+
+} // namespace interfaceX
+namespace interface10 {
+//! The graph class
+/** This class serves as a handle to the graph */
+class graph : tbb::internal::no_copy, public tbb::flow::graph_proxy {
+    friend class tbb::flow::interface11::graph_node;
+
+    template< typename Body >
+    class run_task : public tbb::flow::interface11::graph_task {
+    public:
+        run_task(Body& body
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+                 , tbb::flow::interface11::node_priority_t node_priority = tbb::flow::interface11::no_priority
+        ) : tbb::flow::interface11::graph_task(node_priority),
+#else
+        ) :
+#endif
+        my_body(body) { }
+        tbb::task *execute() __TBB_override {
+            my_body();
+            return NULL;
+        }
+    private:
+        Body my_body;
+    };
+
+    template< typename Receiver, typename Body >
+    class run_and_put_task : public tbb::flow::interface11::graph_task {
+    public:
+        run_and_put_task(Receiver &r, Body& body) : my_receiver(r), my_body(body) {}
+        tbb::task *execute() __TBB_override {
+            tbb::task *res = my_receiver.try_put_task(my_body());
+            if (res == tbb::flow::interface11::SUCCESSFULLY_ENQUEUED) res = NULL;
+            return res;
+        }
+    private:
+        Receiver &my_receiver;
+        Body my_body;
+    };
+    typedef std::list<tbb::task *> task_list_type;
+
+    class wait_functor {
+        tbb::task* graph_root_task;
+    public:
+        wait_functor(tbb::task* t) : graph_root_task(t) {}
+        void operator()() const { graph_root_task->wait_for_all(); }
+    };
+
+    //! A functor that spawns a task
+    class spawn_functor : tbb::internal::no_assign {
+        tbb::task& spawn_task;
+    public:
+        spawn_functor(tbb::task& t) : spawn_task(t) {}
+        void operator()() const {
+            FLOW_SPAWN(spawn_task);
+        }
+    };
+
+    void prepare_task_arena(bool reinit = false) {
+        if (reinit) {
+            __TBB_ASSERT(my_task_arena, "task arena is NULL");
+            my_task_arena->terminate();
+            my_task_arena->initialize(tbb::task_arena::attach());
+        }
+        else {
+            __TBB_ASSERT(my_task_arena == NULL, "task arena is not NULL");
+            my_task_arena = new tbb::task_arena(tbb::task_arena::attach());
+        }
+        if (!my_task_arena->is_active()) // failed to attach
+            my_task_arena->initialize(); // create a new, default-initialized arena
+        __TBB_ASSERT(my_task_arena->is_active(), "task arena is not active");
+    }
+
+public:
+    //! Constructs a graph with isolated task_group_context
+    graph();
+
+    //! Constructs a graph with use_this_context as context
+    explicit graph(tbb::task_group_context& use_this_context);
+
+    //! Destroys the graph.
+    /** Calls wait_for_all, then destroys the root task and context. */
+    ~graph();
+
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+    void set_name(const char *name);
+#endif
+
+    void increment_wait_count() {
+        reserve_wait();
+    }
+
+    void decrement_wait_count() {
+        release_wait();
+    }
+
+    //! Used to register that an external entity may still interact with the graph.
+    /** The graph will not return from wait_for_all until a matching number of decrement_wait_count calls
+    is made. */
+    void reserve_wait() __TBB_override;
+
+    //! Deregisters an external entity that may have interacted with the graph.
+    /** The graph will not return from wait_for_all until all the number of decrement_wait_count calls
+    matches the number of increment_wait_count calls. */
+    void release_wait() __TBB_override;
+
+    //! Spawns a task that runs a body and puts its output to a specific receiver
+    /** The task is spawned as a child of the graph. This is useful for running tasks
+    that need to block a wait_for_all() on the graph.  For example a one-off source. */
+    template< typename Receiver, typename Body >
+    void run(Receiver &r, Body body) {
+        if (tbb::flow::interface11::internal::is_graph_active(*this)) {
+            task* rtask = new (task::allocate_additional_child_of(*root_task()))
+                run_and_put_task< Receiver, Body >(r, body);
+            my_task_arena->execute(spawn_functor(*rtask));
+        }
+    }
+
+    //! Spawns a task that runs a function object
+    /** The task is spawned as a child of the graph. This is useful for running tasks
+    that need to block a wait_for_all() on the graph. For example a one-off source. */
+    template< typename Body >
+    void run(Body body) {
+        if (tbb::flow::interface11::internal::is_graph_active(*this)) {
+            task* rtask = new (task::allocate_additional_child_of(*root_task())) run_task< Body >(body);
+            my_task_arena->execute(spawn_functor(*rtask));
+        }
+    }
+
+    //! Wait until graph is idle and decrement_wait_count calls equals increment_wait_count calls.
+    /** The waiting thread will go off and steal work while it is block in the wait_for_all. */
+    void wait_for_all() {
+        cancelled = false;
+        caught_exception = false;
+        if (my_root_task) {
+#if TBB_USE_EXCEPTIONS
+            try {
+#endif
+                my_task_arena->execute(wait_functor(my_root_task));
+#if __TBB_TASK_GROUP_CONTEXT
+                cancelled = my_context->is_group_execution_cancelled();
+#endif
+#if TBB_USE_EXCEPTIONS
+            }
+            catch (...) {
+                my_root_task->set_ref_count(1);
+                my_context->reset();
+                caught_exception = true;
+                cancelled = true;
+                throw;
+            }
+#endif
+#if __TBB_TASK_GROUP_CONTEXT
+            // TODO: the "if" condition below is just a work-around to support the concurrent wait
+            // mode. The cancellation and exception mechanisms are still broken in this mode.
+            // Consider using task group not to re-implement the same functionality.
+            if (!(my_context->traits() & tbb::task_group_context::concurrent_wait)) {
+                my_context->reset();  // consistent with behavior in catch()
+#endif
+                my_root_task->set_ref_count(1);
+#if __TBB_TASK_GROUP_CONTEXT
+            }
+#endif
+        }
+    }
+
+    //! Returns the root task of the graph
+    tbb::task * root_task() {
+        return my_root_task;
+    }
+
+    // ITERATORS
+    template<typename C, typename N>
+    friend class tbb::flow::interface11::graph_iterator;
+
+    // Graph iterator typedefs
+    typedef tbb::flow::interface11::graph_iterator<graph, tbb::flow::interface11::graph_node> iterator;
+    typedef tbb::flow::interface11::graph_iterator<const graph, const tbb::flow::interface11::graph_node> const_iterator;
+
+    // Graph iterator constructors
+    //! start iterator
+    iterator begin();
+    //! end iterator
+    iterator end();
+    //! start const iterator
+    const_iterator begin() const;
+    //! end const iterator
+    const_iterator end() const;
+    //! start const iterator
+    const_iterator cbegin() const;
+    //! end const iterator
+    const_iterator cend() const;
+
+    //! return status of graph execution
+    bool is_cancelled() { return cancelled; }
+    bool exception_thrown() { return caught_exception; }
+
+    // thread-unsafe state reset.
+    void reset(tbb::flow::interface11::reset_flags f = tbb::flow::interface11::rf_reset_protocol);
+
+private:
+    tbb::task *my_root_task;
+#if __TBB_TASK_GROUP_CONTEXT
+    tbb::task_group_context *my_context;
+#endif
+    bool own_context;
+    bool cancelled;
+    bool caught_exception;
+    bool my_is_active;
+    task_list_type my_reset_task_list;
+
+    tbb::flow::interface11::graph_node *my_nodes, *my_nodes_last;
+
+    tbb::spin_mutex nodelist_mutex;
+    void register_node(tbb::flow::interface11::graph_node *n);
+    void remove_node(tbb::flow::interface11::graph_node *n);
+
+    tbb::task_arena* my_task_arena;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+    tbb::flow::interface11::internal::graph_task_priority_queue_t my_priority_queue;
+#endif
+
+    friend void tbb::flow::interface11::internal::activate_graph(graph& g);
+    friend void tbb::flow::interface11::internal::deactivate_graph(graph& g);
+    friend bool tbb::flow::interface11::internal::is_graph_active(graph& g);
+    friend tbb::task& tbb::flow::interface11::internal::prioritize_task(graph& g, tbb::task& arena_task);
+    friend void tbb::flow::interface11::internal::spawn_in_graph_arena(graph& g, tbb::task& arena_task);
+    friend void tbb::flow::interface11::internal::enqueue_in_graph_arena(graph &g, tbb::task& arena_task);
+    friend void tbb::flow::interface11::internal::add_task_to_graph_reset_list(graph& g, tbb::task *tp);
+
+    friend class tbb::interface7::internal::task_arena_base;
+
+};  // class graph
+} // namespace interface10
+
+namespace interface11 {
+
+using tbb::flow::interface10::graph;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+namespace internal{
+class get_graph_helper;
+}
+#endif
+
+//! The base of all graph nodes.
+class graph_node : tbb::internal::no_copy {
+    friend class graph;
+    template<typename C, typename N>
+    friend class graph_iterator;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+    friend class internal::get_graph_helper;
+#endif
+
+protected:
+    graph& my_graph;
+    graph_node *next, *prev;
+public:
+    explicit graph_node(graph& g);
+
+    virtual ~graph_node();
+
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+    virtual void set_name(const char *name) = 0;
+#endif
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    virtual void extract() = 0;
+#endif
+
+protected:
+    // performs the reset on an individual node.
+    virtual void reset_node(reset_flags f = rf_reset_protocol) = 0;
+};  // class graph_node
+
+namespace internal {
+
+inline void activate_graph(graph& g) {
+    g.my_is_active = true;
+}
+
+inline void deactivate_graph(graph& g) {
+    g.my_is_active = false;
+}
+
+inline bool is_graph_active(graph& g) {
+    return g.my_is_active;
+}
+
+#if __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES
+inline tbb::task& prioritize_task(graph& g, tbb::task& t) {
+    task* critical_task = &t;
+    // TODO: change flow graph's interfaces to work with graph_task type instead of tbb::task.
+    graph_task* gt = static_cast<graph_task*>(&t);
+    if( gt->priority != no_priority ) {
+        //! Non-preemptive priority pattern. The original task is submitted as a work item to the
+        //! priority queue, and a new critical task is created to take and execute a work item with
+        //! the highest known priority. The reference counting responsibility is transferred (via
+        //! allocate_continuation) to the new task.
+        critical_task = new( gt->allocate_continuation() ) priority_task_selector(g.my_priority_queue);
+        tbb::internal::make_critical( *critical_task );
+        g.my_priority_queue.push(gt);
+    }
+    return *critical_task;
+}
+#else
+inline tbb::task& prioritize_task(graph&, tbb::task& t) {
+    return t;
+}
+#endif /* __TBB_PREVIEW_FLOW_GRAPH_PRIORITIES */
+
+//! Spawns a task inside graph arena
+inline void spawn_in_graph_arena(graph& g, tbb::task& arena_task) {
+    if (is_graph_active(g)) {
+        graph::spawn_functor s_fn(prioritize_task(g, arena_task));
+        __TBB_ASSERT(g.my_task_arena && g.my_task_arena->is_active(), NULL);
+        g.my_task_arena->execute(s_fn);
+    }
+}
+
+//! Enqueues a task inside graph arena
+inline void enqueue_in_graph_arena(graph &g, tbb::task& arena_task) {
+    if (is_graph_active(g)) {
+        __TBB_ASSERT( g.my_task_arena && g.my_task_arena->is_active(), "Is graph's arena initialized and active?" );
+        task::enqueue(prioritize_task(g, arena_task), *g.my_task_arena);
+    }
+}
+
+inline void add_task_to_graph_reset_list(graph& g, tbb::task *tp) {
+    g.my_reset_task_list.push_back(tp);
+}
+
+} // namespace internal
+
+} // namespace interfaceX
+} // namespace flow
+} // namespace tbb
+
+#endif // __TBB_flow_graph_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_indexer_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_indexer_impl.h
@@ -0,0 +1,480 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_indexer_impl_H
+#define __TBB__flow_graph_indexer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "_flow_graph_types_impl.h"
+
+namespace internal {
+
+    // Output of the indexer_node is a tbb::flow::tagged_msg, and will be of
+    // the form  tagged_msg<tag, result>
+    // where the value of tag will indicate which result was put to the
+    // successor.
+
+    template<typename IndexerNodeBaseType, typename T, size_t K>
+    task* do_try_put(const T &v, void *p) {
+        typename IndexerNodeBaseType::output_type o(K, v);
+        return reinterpret_cast<IndexerNodeBaseType *>(p)->try_put_task(&o);
+    }
+
+    template<typename TupleTypes,int N>
+    struct indexer_helper {
+        template<typename IndexerNodeBaseType, typename PortTuple>
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
+            typedef typename tuple_element<N-1, TupleTypes>::type T;
+            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, N-1>;
+            tbb::flow::get<N-1>(my_input).set_up(p, indexer_node_put_task, g);
+            indexer_helper<TupleTypes,N-1>::template set_indexer_node_pointer<IndexerNodeBaseType,PortTuple>(my_input, p, g);
+        }
+        template<typename InputTuple>
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
+            indexer_helper<TupleTypes,N-1>::reset_inputs(my_input, f);
+            tbb::flow::get<N-1>(my_input).reset_receiver(f);
+        }
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        template<typename InputTuple>
+        static inline void extract(InputTuple &my_input) {
+            indexer_helper<TupleTypes,N-1>::extract(my_input);
+            tbb::flow::get<N-1>(my_input).extract_receiver();
+        }
+#endif
+    };
+
+    template<typename TupleTypes>
+    struct indexer_helper<TupleTypes,1> {
+        template<typename IndexerNodeBaseType, typename PortTuple>
+        static inline void set_indexer_node_pointer(PortTuple &my_input, IndexerNodeBaseType *p, graph& g) {
+            typedef typename tuple_element<0, TupleTypes>::type T;
+            task *(*indexer_node_put_task)(const T&, void *) = do_try_put<IndexerNodeBaseType, T, 0>;
+            tbb::flow::get<0>(my_input).set_up(p, indexer_node_put_task, g);
+        }
+        template<typename InputTuple>
+        static inline void reset_inputs(InputTuple &my_input, reset_flags f) {
+            tbb::flow::get<0>(my_input).reset_receiver(f);
+        }
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        template<typename InputTuple>
+        static inline void extract(InputTuple &my_input) {
+            tbb::flow::get<0>(my_input).extract_receiver();
+        }
+#endif
+    };
+
+    template<typename T>
+    class indexer_input_port : public receiver<T> {
+    private:
+        void* my_indexer_ptr;
+        typedef task* (* forward_function_ptr)(T const &, void* );
+        forward_function_ptr my_try_put_task;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        spin_mutex my_pred_mutex;
+        typedef typename receiver<T>::built_predecessors_type built_predecessors_type;
+        built_predecessors_type my_built_predecessors;
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+        graph* my_graph;
+    public:
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        indexer_input_port() : my_pred_mutex(), my_graph(NULL) {}
+        indexer_input_port( const indexer_input_port & other) : receiver<T>(), my_pred_mutex(), my_graph(other.my_graph) {
+        }
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+        void set_up(void* p, forward_function_ptr f, graph& g) {
+            my_indexer_ptr = p;
+            my_try_put_task = f;
+            my_graph = &g;
+        }
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        typedef typename receiver<T>::predecessor_list_type predecessor_list_type;
+        typedef typename receiver<T>::predecessor_type predecessor_type;
+
+        built_predecessors_type &built_predecessors() __TBB_override { return my_built_predecessors; }
+
+        size_t predecessor_count() __TBB_override {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            return my_built_predecessors.edge_count();
+        }
+        void internal_add_built_predecessor(predecessor_type &p) __TBB_override {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            my_built_predecessors.add_edge(p);
+        }
+        void internal_delete_built_predecessor(predecessor_type &p) __TBB_override {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            my_built_predecessors.delete_edge(p);
+        }
+        void copy_predecessors( predecessor_list_type &v) __TBB_override {
+            spin_mutex::scoped_lock l(my_pred_mutex);
+            my_built_predecessors.copy_edges(v);
+        }
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+    protected:
+        template< typename R, typename B > friend class run_and_put_task;
+        template<typename X, typename Y> friend class internal::broadcast_cache;
+        template<typename X, typename Y> friend class internal::round_robin_cache;
+        task *try_put_task(const T &v) __TBB_override {
+            return my_try_put_task(v, my_indexer_ptr);
+        }
+
+        graph& graph_reference() const __TBB_override {
+            return *my_graph;
+        }
+
+    public:
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        void reset_receiver(reset_flags f) __TBB_override { if(f&rf_clear_edges) my_built_predecessors.clear(); }
+#else
+        void reset_receiver(reset_flags /*f*/) __TBB_override { }
+#endif
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        void extract_receiver() { my_built_predecessors.receiver_extract(*this); }
+#endif
+    };
+
+    template<typename InputTuple, typename OutputType, typename StructTypes>
+    class indexer_node_FE {
+    public:
+        static const int N = tbb::flow::tuple_size<InputTuple>::value;
+        typedef OutputType output_type;
+        typedef InputTuple input_type;
+
+        // Some versions of Intel(R) C++ Compiler fail to generate an implicit constructor for the class which has std::tuple as a member.
+        indexer_node_FE() : my_inputs() {}
+
+        input_type &input_ports() { return my_inputs; }
+    protected:
+        input_type my_inputs;
+    };
+
+    //! indexer_node_base
+    template<typename InputTuple, typename OutputType, typename StructTypes>
+    class indexer_node_base : public graph_node, public indexer_node_FE<InputTuple, OutputType,StructTypes>,
+                           public sender<OutputType> {
+    protected:
+       using graph_node::my_graph;
+    public:
+        static const size_t N = tbb::flow::tuple_size<InputTuple>::value;
+        typedef OutputType output_type;
+        typedef StructTypes tuple_types;
+        typedef typename sender<output_type>::successor_type successor_type;
+        typedef indexer_node_FE<InputTuple, output_type,StructTypes> input_ports_type;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        typedef typename sender<output_type>::built_successors_type built_successors_type;
+        typedef typename sender<output_type>::successor_list_type successor_list_type;
+#endif
+
+    private:
+        // ----------- Aggregator ------------
+        enum op_type { reg_succ, rem_succ, try__put_task
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+            , add_blt_succ, del_blt_succ,
+             blt_succ_cnt, blt_succ_cpy
+#endif
+        };
+        typedef indexer_node_base<InputTuple,output_type,StructTypes> class_type;
+
+        class indexer_node_base_operation : public aggregated_operation<indexer_node_base_operation> {
+        public:
+            char type;
+            union {
+                output_type const *my_arg;
+                successor_type *my_succ;
+                task *bypass_t;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+                size_t cnt_val;
+                successor_list_type *succv;
+#endif
+            };
+            indexer_node_base_operation(const output_type* e, op_type t) :
+                type(char(t)), my_arg(e) {}
+            indexer_node_base_operation(const successor_type &s, op_type t) : type(char(t)),
+                my_succ(const_cast<successor_type *>(&s)) {}
+            indexer_node_base_operation(op_type t) : type(char(t)) {}
+        };
+
+        typedef internal::aggregating_functor<class_type, indexer_node_base_operation> handler_type;
+        friend class internal::aggregating_functor<class_type, indexer_node_base_operation>;
+        aggregator<handler_type, indexer_node_base_operation> my_aggregator;
+
+        void handle_operations(indexer_node_base_operation* op_list) {
+            indexer_node_base_operation *current;
+            while(op_list) {
+                current = op_list;
+                op_list = op_list->next;
+                switch(current->type) {
+
+                case reg_succ:
+                    my_successors.register_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+
+                case rem_succ:
+                    my_successors.remove_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case try__put_task: {
+                        current->bypass_t = my_successors.try_put_task(*(current->my_arg));
+                        __TBB_store_with_release(current->status, SUCCEEDED);  // return of try_put_task actual return value
+                    }
+                    break;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+                case add_blt_succ:
+                    my_successors.internal_add_built_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case del_blt_succ:
+                    my_successors.internal_delete_built_successor(*(current->my_succ));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case blt_succ_cnt:
+                    current->cnt_val = my_successors.successor_count();
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+                case blt_succ_cpy:
+                    my_successors.copy_successors(*(current->succv));
+                    __TBB_store_with_release(current->status, SUCCEEDED);
+                    break;
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+                }
+            }
+        }
+        // ---------- end aggregator -----------
+    public:
+        indexer_node_base(graph& g) : graph_node(g), input_ports_type() {
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, g);
+            my_successors.set_owner(this);
+            my_aggregator.initialize_handler(handler_type(this));
+        }
+
+        indexer_node_base(const indexer_node_base& other) : graph_node(other.my_graph), input_ports_type(), sender<output_type>() {
+            indexer_helper<StructTypes,N>::set_indexer_node_pointer(this->my_inputs, this, other.my_graph);
+            my_successors.set_owner(this);
+            my_aggregator.initialize_handler(handler_type(this));
+        }
+
+        bool register_successor(successor_type &r) __TBB_override {
+            indexer_node_base_operation op_data(r, reg_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        bool remove_successor( successor_type &r) __TBB_override {
+            indexer_node_base_operation op_data(r, rem_succ);
+            my_aggregator.execute(&op_data);
+            return op_data.status == SUCCEEDED;
+        }
+
+        task * try_put_task(output_type const *v) { // not a virtual method in this class
+            indexer_node_base_operation op_data(v, try__put_task);
+            my_aggregator.execute(&op_data);
+            return op_data.bypass_t;
+        }
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+
+        built_successors_type &built_successors() __TBB_override { return my_successors.built_successors(); }
+
+        void internal_add_built_successor( successor_type &r) __TBB_override {
+            indexer_node_base_operation op_data(r, add_blt_succ);
+            my_aggregator.execute(&op_data);
+        }
+
+        void internal_delete_built_successor( successor_type &r) __TBB_override {
+            indexer_node_base_operation op_data(r, del_blt_succ);
+            my_aggregator.execute(&op_data);
+        }
+
+        size_t successor_count() __TBB_override {
+            indexer_node_base_operation op_data(blt_succ_cnt);
+            my_aggregator.execute(&op_data);
+            return op_data.cnt_val;
+        }
+
+        void copy_successors( successor_list_type &v) __TBB_override {
+            indexer_node_base_operation op_data(blt_succ_cpy);
+            op_data.succv = &v;
+            my_aggregator.execute(&op_data);
+        }
+        void extract() __TBB_override {
+            my_successors.built_successors().sender_extract(*this);
+            indexer_helper<StructTypes,N>::extract(this->my_inputs);
+        }
+#endif /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+    protected:
+        void reset_node(reset_flags f) __TBB_override {
+            if(f & rf_clear_edges) {
+                my_successors.clear();
+                indexer_helper<StructTypes,N>::reset_inputs(this->my_inputs,f);
+            }
+        }
+
+    private:
+        broadcast_cache<output_type, null_rw_mutex> my_successors;
+    };  //indexer_node_base
+
+
+    template<int N, typename InputTuple> struct input_types;
+
+    template<typename InputTuple>
+    struct input_types<1, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename internal::tagged_msg<size_t, first_type > type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<2, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<3, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<4, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<5, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<6, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<7, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type> type;
+    };
+
+
+    template<typename InputTuple>
+    struct input_types<8, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type> type;
+    };
+
+
+    template<typename InputTuple>
+    struct input_types<9, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename tuple_element<8, InputTuple>::type nineth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type, nineth_type> type;
+    };
+
+    template<typename InputTuple>
+    struct input_types<10, InputTuple> {
+        typedef typename tuple_element<0, InputTuple>::type first_type;
+        typedef typename tuple_element<1, InputTuple>::type second_type;
+        typedef typename tuple_element<2, InputTuple>::type third_type;
+        typedef typename tuple_element<3, InputTuple>::type fourth_type;
+        typedef typename tuple_element<4, InputTuple>::type fifth_type;
+        typedef typename tuple_element<5, InputTuple>::type sixth_type;
+        typedef typename tuple_element<6, InputTuple>::type seventh_type;
+        typedef typename tuple_element<7, InputTuple>::type eighth_type;
+        typedef typename tuple_element<8, InputTuple>::type nineth_type;
+        typedef typename tuple_element<9, InputTuple>::type tenth_type;
+        typedef typename internal::tagged_msg<size_t, first_type, second_type, third_type,
+                                                      fourth_type, fifth_type, sixth_type,
+                                                      seventh_type, eighth_type, nineth_type,
+                                                      tenth_type> type;
+    };
+
+    // type generators
+    template<typename OutputTuple>
+    struct indexer_types : public input_types<tuple_size<OutputTuple>::value, OutputTuple> {
+        static const int N = tbb::flow::tuple_size<OutputTuple>::value;
+        typedef typename input_types<N, OutputTuple>::type output_type;
+        typedef typename wrap_tuple_elements<N,indexer_input_port,OutputTuple>::type input_ports_type;
+        typedef internal::indexer_node_FE<input_ports_type,output_type,OutputTuple> indexer_FE_type;
+        typedef internal::indexer_node_base<input_ports_type, output_type, OutputTuple> indexer_base_type;
+    };
+
+    template<class OutputTuple>
+    class unfolded_indexer_node : public indexer_types<OutputTuple>::indexer_base_type {
+    public:
+        typedef typename indexer_types<OutputTuple>::input_ports_type input_ports_type;
+        typedef OutputTuple tuple_types;
+        typedef typename indexer_types<OutputTuple>::output_type output_type;
+    private:
+        typedef typename indexer_types<OutputTuple>::indexer_base_type base_type;
+    public:
+        unfolded_indexer_node(graph& g) : base_type(g) {}
+        unfolded_indexer_node(const unfolded_indexer_node &other) : base_type(other) {}
+    };
+
+} /* namespace internal */
+
+#endif  /* __TBB__flow_graph_indexer_impl_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_item_buffer_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_item_buffer_impl.h
@@ -0,0 +1,283 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_item_buffer_impl_H
+#define __TBB__flow_graph_item_buffer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "tbb/internal/_flow_graph_types_impl.h"  // for aligned_pair
+
+// in namespace tbb::flow::interfaceX (included in _flow_graph_node_impl.h)
+
+    //! Expandable buffer of items.  The possible operations are push, pop,
+    //* tests for empty and so forth.  No mutual exclusion is built in.
+    //* objects are constructed into and explicitly-destroyed.  get_my_item gives
+    // a read-only reference to the item in the buffer.  set_my_item may be called
+    // with either an empty or occupied slot.
+
+    using internal::aligned_pair;
+    using internal::alignment_of;
+
+namespace internal {
+
+    template <typename T, typename A=cache_aligned_allocator<T> >
+    class item_buffer {
+    public:
+        typedef T item_type;
+        enum buffer_item_state { no_item=0, has_item=1, reserved_item=2 };
+    protected:
+        typedef size_t size_type;
+        typedef typename aligned_pair<item_type, buffer_item_state>::type buffer_item_type;
+        typedef typename tbb::internal::allocator_rebind<A, buffer_item_type>::type allocator_type;
+        buffer_item_type *my_array;
+        size_type my_array_size;
+        static const size_type initial_buffer_size = 4;
+        size_type my_head;
+        size_type my_tail;
+
+        bool buffer_empty() const { return my_head == my_tail; }
+
+        buffer_item_type &item(size_type i) {
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value),NULL);
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
+            return my_array[i & (my_array_size - 1) ];
+        }
+
+        const buffer_item_type &item(size_type i) const {
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].second))%alignment_of<buffer_item_state>::value), NULL);
+            __TBB_ASSERT(!(size_type(&(my_array[i&(my_array_size-1)].first))%alignment_of<item_type>::value), NULL);
+            return my_array[i & (my_array_size-1)];
+        }
+
+        bool my_item_valid(size_type i) const { return (i < my_tail) && (i >= my_head) && (item(i).second != no_item); }
+        bool my_item_reserved(size_type i) const { return item(i).second == reserved_item; }
+
+        // object management in buffer
+        const item_type &get_my_item(size_t i) const {
+            __TBB_ASSERT(my_item_valid(i),"attempt to get invalid item");
+            item_type *itm = (tbb::internal::punned_cast<item_type *>(&(item(i).first)));
+            return *(const item_type *)itm;
+        }
+
+        // may be called with an empty slot or a slot that has already been constructed into.
+        void set_my_item(size_t i, const item_type &o) {
+            if(item(i).second != no_item) {
+                destroy_item(i);
+            }
+            new(&(item(i).first)) item_type(o);
+            item(i).second = has_item;
+        }
+
+        // destructively-fetch an object from the buffer
+        void fetch_item(size_t i, item_type &o) {
+            __TBB_ASSERT(my_item_valid(i), "Trying to fetch an empty slot");
+            o = get_my_item(i);  // could have std::move assign semantics
+            destroy_item(i);
+        }
+
+        // move an existing item from one slot to another.  The moved-to slot must be unoccupied,
+        // the moved-from slot must exist and not be reserved.  The after, from will be empty,
+        // to will be occupied but not reserved
+        void move_item(size_t to, size_t from) {
+            __TBB_ASSERT(!my_item_valid(to), "Trying to move to a non-empty slot");
+            __TBB_ASSERT(my_item_valid(from), "Trying to move from an empty slot");
+            set_my_item(to, get_my_item(from));   // could have std::move semantics
+            destroy_item(from);
+
+        }
+
+        // put an item in an empty slot.  Return true if successful, else false
+        bool place_item(size_t here, const item_type &me) {
+#if !TBB_DEPRECATED_SEQUENCER_DUPLICATES
+            if(my_item_valid(here)) return false;
+#endif
+            set_my_item(here, me);
+            return true;
+        }
+
+        // could be implemented with std::move semantics
+        void swap_items(size_t i, size_t j) {
+            __TBB_ASSERT(my_item_valid(i) && my_item_valid(j), "attempt to swap invalid item(s)");
+            item_type temp = get_my_item(i);
+            set_my_item(i, get_my_item(j));
+            set_my_item(j, temp);
+        }
+
+        void destroy_item(size_type i) {
+            __TBB_ASSERT(my_item_valid(i), "destruction of invalid item");
+            (tbb::internal::punned_cast<item_type *>(&(item(i).first)))->~item_type();
+            item(i).second = no_item;
+        }
+
+        // returns the front element
+        const item_type& front() const
+        {
+            __TBB_ASSERT(my_item_valid(my_head), "attempt to fetch head non-item");
+            return get_my_item(my_head);
+        }
+
+        // returns  the back element
+        const item_type& back() const
+        {
+            __TBB_ASSERT(my_item_valid(my_tail - 1), "attempt to fetch head non-item");
+            return get_my_item(my_tail - 1);
+        }
+
+        // following methods are for reservation of the front of a buffer.
+        void reserve_item(size_type i) { __TBB_ASSERT(my_item_valid(i) && !my_item_reserved(i), "item cannot be reserved"); item(i).second = reserved_item; }
+        void release_item(size_type i) { __TBB_ASSERT(my_item_reserved(i), "item is not reserved"); item(i).second = has_item; }
+
+        void destroy_front() { destroy_item(my_head); ++my_head; }
+        void destroy_back() { destroy_item(my_tail-1); --my_tail; }
+
+        // we have to be able to test against a new tail value without changing my_tail
+        // grow_array doesn't work if we change my_tail when the old array is too small
+        size_type size(size_t new_tail = 0) { return (new_tail ? new_tail : my_tail) - my_head; }
+        size_type capacity() { return my_array_size; }
+        // sequencer_node does not use this method, so we don't
+        // need a version that passes in the new_tail value.
+        bool buffer_full() { return size() >= capacity(); }
+
+        //! Grows the internal array.
+        void grow_my_array( size_t minimum_size ) {
+            // test that we haven't made the structure inconsistent.
+            __TBB_ASSERT(capacity() >= my_tail - my_head, "total items exceed capacity");
+            size_type new_size = my_array_size ? 2*my_array_size : initial_buffer_size;
+            while( new_size<minimum_size )
+                new_size*=2;
+
+            buffer_item_type* new_array = allocator_type().allocate(new_size);
+
+            // initialize validity to "no"
+            for( size_type i=0; i<new_size; ++i ) { new_array[i].second = no_item; }
+
+            for( size_type i=my_head; i<my_tail; ++i) {
+                if(my_item_valid(i)) {  // sequencer_node may have empty slots
+                    // placement-new copy-construct; could be std::move
+                    char *new_space = (char *)&(new_array[i&(new_size-1)].first);
+                    (void)new(new_space) item_type(get_my_item(i));
+                    new_array[i&(new_size-1)].second = item(i).second;
+                }
+            }
+
+            clean_up_buffer(/*reset_pointers*/false);
+
+            my_array = new_array;
+            my_array_size = new_size;
+        }
+
+        bool push_back(item_type &v) {
+            if(buffer_full()) {
+                grow_my_array(size() + 1);
+            }
+            set_my_item(my_tail, v);
+            ++my_tail;
+            return true;
+        }
+
+        bool pop_back(item_type &v) {
+            if (!my_item_valid(my_tail-1)) {
+                return false;
+            }
+            v = this->back();
+            destroy_back();
+            return true;
+        }
+
+        bool pop_front(item_type &v) {
+            if(!my_item_valid(my_head)) {
+                return false;
+            }
+            v = this->front();
+            destroy_front();
+            return true;
+        }
+
+        // This is used both for reset and for grow_my_array.  In the case of grow_my_array
+        // we want to retain the values of the head and tail.
+        void clean_up_buffer(bool reset_pointers) {
+            if (my_array) {
+                for( size_type i=my_head; i<my_tail; ++i ) {
+                    if(my_item_valid(i))
+                        destroy_item(i);
+                }
+                allocator_type().deallocate(my_array,my_array_size);
+            }
+            my_array = NULL;
+            if(reset_pointers) {
+                my_head = my_tail = my_array_size = 0;
+            }
+        }
+
+    public:
+        //! Constructor
+        item_buffer( ) : my_array(NULL), my_array_size(0),
+            my_head(0), my_tail(0) {
+            grow_my_array(initial_buffer_size);
+        }
+
+        ~item_buffer() {
+            clean_up_buffer(/*reset_pointers*/true);
+        }
+
+        void reset() { clean_up_buffer(/*reset_pointers*/true); grow_my_array(initial_buffer_size); }
+
+    };
+
+    //! item_buffer with reservable front-end.  NOTE: if reserving, do not
+    //* complete operation with pop_front(); use consume_front().
+    //* No synchronization built-in.
+    template<typename T, typename A=cache_aligned_allocator<T> >
+    class reservable_item_buffer : public item_buffer<T, A> {
+    protected:
+        using item_buffer<T, A>::my_item_valid;
+        using item_buffer<T, A>::my_head;
+
+    public:
+        reservable_item_buffer() : item_buffer<T, A>(), my_reserved(false) {}
+        void reset() {my_reserved = false; item_buffer<T,A>::reset(); }
+    protected:
+
+        bool reserve_front(T &v) {
+            if(my_reserved || !my_item_valid(this->my_head)) return false;
+            my_reserved = true;
+            // reserving the head
+            v = this->front();
+            this->reserve_item(this->my_head);
+            return true;
+        }
+
+        void consume_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to consume a non-reserved item");
+            this->destroy_front();
+            my_reserved = false;
+        }
+
+        void release_front() {
+            __TBB_ASSERT(my_reserved, "Attempt to release a non-reserved item");
+            this->release_item(this->my_head);
+            my_reserved = false;
+        }
+
+        bool my_reserved;
+    };
+
+}  // namespace internal
+
+#endif // __TBB__flow_graph_item_buffer_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_join_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_join_impl.h
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_node_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_node_impl.h
@@ -0,0 +1,971 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_node_impl_H
+#define __TBB__flow_graph_node_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include "_flow_graph_item_buffer_impl.h"
+
+//! @cond INTERNAL
+namespace internal {
+
+    using tbb::internal::aggregated_operation;
+    using tbb::internal::aggregating_functor;
+    using tbb::internal::aggregator;
+
+     template< typename T, typename A >
+     class function_input_queue : public item_buffer<T,A> {
+     public:
+         bool empty() const {
+             return this->buffer_empty();
+         }
+
+         const T& front() const {
+             return this->item_buffer<T, A>::front();
+         }
+
+         bool pop( T& t ) {
+             return this->pop_front( t );
+         }
+
+         void pop() {
+             this->destroy_front();
+         }
+
+         bool push( T& t ) {
+             return this->push_back( t );
+         }
+     };
+
+    //! Input and scheduling for a function node that takes a type Input as input
+    //  The only up-ref is apply_body_impl, which should implement the function
+    //  call and any handling of the result.
+    template< typename Input, typename Policy, typename A, typename ImplType >
+    class function_input_base : public receiver<Input>, tbb::internal::no_assign {
+        enum op_type {reg_pred, rem_pred, try_fwd, tryput_bypass, app_body_bypass, occupy_concurrency
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+            , add_blt_pred, del_blt_pred,
+            blt_pred_cnt, blt_pred_cpy   // create vector copies of preds and succs
+#endif
+        };
+        typedef function_input_base<Input, Policy, A, ImplType> class_type;
+
+    public:
+
+        //! The input type of this receiver
+        typedef Input input_type;
+        typedef typename receiver<input_type>::predecessor_type predecessor_type;
+        typedef predecessor_cache<input_type, null_mutex > predecessor_cache_type;
+        typedef function_input_queue<input_type, A> input_queue_type;
+        typedef typename tbb::internal::allocator_rebind<A, input_queue_type>::type queue_allocator_type;
+        __TBB_STATIC_ASSERT(!((internal::has_policy<queueing, Policy>::value) && (internal::has_policy<rejecting, Policy>::value)),
+                              "queueing and rejecting policies can't be specified simultaneously");
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        typedef typename predecessor_cache_type::built_predecessors_type built_predecessors_type;
+        typedef typename receiver<input_type>::predecessor_list_type predecessor_list_type;
+#endif
+
+        //! Constructor for function_input_base
+        function_input_base(
+            graph &g, __TBB_FLOW_GRAPH_PRIORITY_ARG1(size_t max_concurrency, node_priority_t priority)
+        ) : my_graph_ref(g), my_max_concurrency(max_concurrency)
+          , __TBB_FLOW_GRAPH_PRIORITY_ARG1(my_concurrency(0), my_priority(priority))
+          , my_queue(!internal::has_policy<rejecting, Policy>::value ? new input_queue_type() : NULL)
+          , forwarder_busy(false)
+        {
+            my_predecessors.set_owner(this);
+            my_aggregator.initialize_handler(handler_type(this));
+        }
+
+        //! Copy constructor
+        function_input_base( const function_input_base& src)
+            : receiver<Input>(), tbb::internal::no_assign()
+            , my_graph_ref(src.my_graph_ref), my_max_concurrency(src.my_max_concurrency)
+            , __TBB_FLOW_GRAPH_PRIORITY_ARG1(my_concurrency(0), my_priority(src.my_priority))
+            , my_queue(src.my_queue ? new input_queue_type() : NULL), forwarder_busy(false)
+        {
+            my_predecessors.set_owner(this);
+            my_aggregator.initialize_handler(handler_type(this));
+        }
+
+        //! Destructor
+        // The queue is allocated by the constructor for {multi}function_node.
+        // TODO: pass the graph_buffer_policy to the base so it can allocate the queue instead.
+        // This would be an interface-breaking change.
+        virtual ~function_input_base() {
+            if ( my_queue ) delete my_queue;
+        }
+
+        task* try_put_task( const input_type& t) __TBB_override {
+            return try_put_task_impl(t, internal::has_policy<lightweight, Policy>());
+        }
+
+        //! Adds src to the list of cached predecessors.
+        bool register_predecessor( predecessor_type &src ) __TBB_override {
+            operation_type op_data(reg_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+        //! Removes src from the list of cached predecessors.
+        bool remove_predecessor( predecessor_type &src ) __TBB_override {
+            operation_type op_data(rem_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+            return true;
+        }
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        //! Adds to list of predecessors added by make_edge
+        void internal_add_built_predecessor( predecessor_type &src) __TBB_override {
+            operation_type op_data(add_blt_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+        }
+
+        //! removes from to list of predecessors (used by remove_edge)
+        void internal_delete_built_predecessor( predecessor_type &src) __TBB_override {
+            operation_type op_data(del_blt_pred);
+            op_data.r = &src;
+            my_aggregator.execute(&op_data);
+        }
+
+        size_t predecessor_count() __TBB_override {
+            operation_type op_data(blt_pred_cnt);
+            my_aggregator.execute(&op_data);
+            return op_data.cnt_val;
+        }
+
+        void copy_predecessors(predecessor_list_type &v) __TBB_override {
+            operation_type op_data(blt_pred_cpy);
+            op_data.predv = &v;
+            my_aggregator.execute(&op_data);
+        }
+
+        built_predecessors_type &built_predecessors() __TBB_override {
+            return my_predecessors.built_predecessors();
+        }
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+
+    protected:
+
+        void reset_function_input_base( reset_flags f) {
+            my_concurrency = 0;
+            if(my_queue) {
+                my_queue->reset();
+            }
+            reset_receiver(f);
+            forwarder_busy = false;
+        }
+
+        graph& my_graph_ref;
+        const size_t my_max_concurrency;
+        size_t my_concurrency;
+        __TBB_FLOW_GRAPH_PRIORITY_EXPR( node_priority_t my_priority; )
+        input_queue_type *my_queue;
+        predecessor_cache<input_type, null_mutex > my_predecessors;
+
+        void reset_receiver( reset_flags f) __TBB_override {
+            if( f & rf_clear_edges) my_predecessors.clear();
+            else
+                my_predecessors.reset();
+            __TBB_ASSERT(!(f & rf_clear_edges) || my_predecessors.empty(), "function_input_base reset failed");
+        }
+
+        graph& graph_reference() const __TBB_override {
+            return my_graph_ref;
+        }
+
+        task* try_get_postponed_task(const input_type& i) {
+            operation_type op_data(i, app_body_bypass);  // tries to pop an item or get_item
+            my_aggregator.execute(&op_data);
+            return op_data.bypass_t;
+        }
+
+    private:
+
+        friend class apply_body_task_bypass< class_type, input_type >;
+        friend class forward_task_bypass< class_type >;
+
+        class operation_type : public aggregated_operation< operation_type > {
+        public:
+            char type;
+            union {
+                input_type *elem;
+                predecessor_type *r;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+                size_t cnt_val;
+                predecessor_list_type *predv;
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+            };
+            tbb::task *bypass_t;
+            operation_type(const input_type& e, op_type t) :
+                type(char(t)), elem(const_cast<input_type*>(&e)) {}
+            operation_type(op_type t) : type(char(t)), r(NULL) {}
+        };
+
+        bool forwarder_busy;
+        typedef internal::aggregating_functor<class_type, operation_type> handler_type;
+        friend class internal::aggregating_functor<class_type, operation_type>;
+        aggregator< handler_type, operation_type > my_aggregator;
+
+        task* perform_queued_requests() {
+            task* new_task = NULL;
+            if(my_queue) {
+                if(!my_queue->empty()) {
+                    ++my_concurrency;
+                    new_task = create_body_task(my_queue->front());
+
+                    my_queue->pop();
+                }
+            }
+            else {
+                input_type i;
+                if(my_predecessors.get_item(i)) {
+                    ++my_concurrency;
+                    new_task = create_body_task(i);
+                }
+            }
+            return new_task;
+        }
+        void handle_operations(operation_type *op_list) {
+            operation_type *tmp;
+            while (op_list) {
+                tmp = op_list;
+                op_list = op_list->next;
+                switch (tmp->type) {
+                case reg_pred:
+                    my_predecessors.add(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    if (!forwarder_busy) {
+                        forwarder_busy = true;
+                        spawn_forward_task();
+                    }
+                    break;
+                case rem_pred:
+                    my_predecessors.remove(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case app_body_bypass: {
+                        tmp->bypass_t = NULL;
+                        __TBB_ASSERT(my_max_concurrency != 0, NULL);
+                        --my_concurrency;
+                        if(my_concurrency<my_max_concurrency)
+                            tmp->bypass_t = perform_queued_requests();
+
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    }
+                    break;
+                case tryput_bypass: internal_try_put_task(tmp);  break;
+                case try_fwd: internal_forward(tmp);  break;
+                case occupy_concurrency:
+                    if (my_concurrency < my_max_concurrency) {
+                        ++my_concurrency;
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    } else {
+                        __TBB_store_with_release(tmp->status, FAILED);
+                    }
+                    break;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+                case add_blt_pred: {
+                         my_predecessors.internal_add_built_predecessor(*(tmp->r));
+                        __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    }
+                    break;
+                case del_blt_pred:
+                    my_predecessors.internal_delete_built_predecessor(*(tmp->r));
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case blt_pred_cnt:
+                    tmp->cnt_val = my_predecessors.predecessor_count();
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+                case blt_pred_cpy:
+                    my_predecessors.copy_predecessors( *(tmp->predv) );
+                    __TBB_store_with_release(tmp->status, SUCCEEDED);
+                    break;
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+                }
+            }
+        }
+
+        //! Put to the node, but return the task instead of enqueueing it
+        void internal_try_put_task(operation_type *op) {
+            __TBB_ASSERT(my_max_concurrency != 0, NULL);
+            if (my_concurrency < my_max_concurrency) {
+               ++my_concurrency;
+               task * new_task = create_body_task(*(op->elem));
+               op->bypass_t = new_task;
+               __TBB_store_with_release(op->status, SUCCEEDED);
+           } else if ( my_queue && my_queue->push(*(op->elem)) ) {
+               op->bypass_t = SUCCESSFULLY_ENQUEUED;
+               __TBB_store_with_release(op->status, SUCCEEDED);
+           } else {
+               op->bypass_t = NULL;
+               __TBB_store_with_release(op->status, FAILED);
+           }
+        }
+
+        //! Creates tasks for postponed messages if available and if concurrency allows
+        void internal_forward(operation_type *op) {
+            op->bypass_t = NULL;
+            if (my_concurrency < my_max_concurrency || !my_max_concurrency)
+                op->bypass_t = perform_queued_requests();
+            if(op->bypass_t)
+                __TBB_store_with_release(op->status, SUCCEEDED);
+            else {
+                forwarder_busy = false;
+                __TBB_store_with_release(op->status, FAILED);
+            }
+        }
+
+        task* internal_try_put_bypass( const input_type& t ) {
+            operation_type op_data(t, tryput_bypass);
+            my_aggregator.execute(&op_data);
+            if( op_data.status == internal::SUCCEEDED ) {
+                return op_data.bypass_t;
+            }
+            return NULL;
+        }
+
+        task* try_put_task_impl( const input_type& t, /*lightweight=*/tbb::internal::true_type ) {
+            if( my_max_concurrency == 0 ) {
+                return apply_body_bypass(t);
+            } else {
+                operation_type check_op(t, occupy_concurrency);
+                my_aggregator.execute(&check_op);
+                if( check_op.status == internal::SUCCEEDED ) {
+                    return apply_body_bypass(t);
+                }
+                return internal_try_put_bypass(t);
+            }
+        }
+
+        task* try_put_task_impl( const input_type& t, /*lightweight=*/tbb::internal::false_type ) {
+            if( my_max_concurrency == 0 ) {
+                return create_body_task(t);
+            } else {
+                return internal_try_put_bypass(t);
+            }
+        }
+
+        //! Applies the body to the provided input
+        //  then decides if more work is available
+        task * apply_body_bypass( const input_type &i ) {
+            return static_cast<ImplType *>(this)->apply_body_impl_bypass(i);
+        }
+
+        //! allocates a task to apply a body
+        inline task * create_body_task( const input_type &input ) {
+            return (internal::is_graph_active(my_graph_ref)) ?
+                new( task::allocate_additional_child_of(*(my_graph_ref.root_task())) )
+                apply_body_task_bypass < class_type, input_type >(
+                    *this, __TBB_FLOW_GRAPH_PRIORITY_ARG1(input, my_priority))
+                : NULL;
+        }
+
+       //! This is executed by an enqueued task, the "forwarder"
+       task* forward_task() {
+           operation_type op_data(try_fwd);
+           task* rval = NULL;
+           do {
+               op_data.status = WAIT;
+               my_aggregator.execute(&op_data);
+               if(op_data.status == SUCCEEDED) {
+                   task* ttask = op_data.bypass_t;
+                   __TBB_ASSERT( ttask && ttask != SUCCESSFULLY_ENQUEUED, NULL );
+                   rval = combine_tasks(my_graph_ref, rval, ttask);
+               }
+           } while (op_data.status == SUCCEEDED);
+           return rval;
+       }
+
+       inline task *create_forward_task() {
+           return (internal::is_graph_active(my_graph_ref)) ?
+               new( task::allocate_additional_child_of(*(my_graph_ref.root_task())) )
+               forward_task_bypass< class_type >( __TBB_FLOW_GRAPH_PRIORITY_ARG1(*this, my_priority) )
+               : NULL;
+       }
+
+       //! Spawns a task that calls forward()
+       inline void spawn_forward_task() {
+           task* tp = create_forward_task();
+           if(tp) {
+               internal::spawn_in_graph_arena(graph_reference(), *tp);
+           }
+       }
+    };  // function_input_base
+
+    //! Implements methods for a function node that takes a type Input as input and sends
+    //  a type Output to its successors.
+    template< typename Input, typename Output, typename Policy, typename A>
+    class function_input : public function_input_base<Input, Policy, A, function_input<Input,Output,Policy,A> > {
+    public:
+        typedef Input input_type;
+        typedef Output output_type;
+        typedef function_body<input_type, output_type> function_body_type;
+        typedef function_input<Input, Output, Policy,A> my_class;
+        typedef function_input_base<Input, Policy, A, my_class> base_type;
+        typedef function_input_queue<input_type, A> input_queue_type;
+
+        // constructor
+        template<typename Body>
+        function_input(
+            graph &g, size_t max_concurrency,
+            __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body& body, node_priority_t priority)
+        ) : base_type(g, __TBB_FLOW_GRAPH_PRIORITY_ARG1(max_concurrency, priority))
+          , my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
+          , my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) ) {
+        }
+
+        //! Copy constructor
+        function_input( const function_input& src ) :
+                base_type(src),
+                my_body( src.my_init_body->clone() ),
+                my_init_body(src.my_init_body->clone() ) {
+        }
+
+        ~function_input() {
+            delete my_body;
+            delete my_init_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            function_body_type &body_ref = *this->my_body;
+            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
+        }
+
+        output_type apply_body_impl( const input_type& i) {
+            // There is an extra copied needed to capture the
+            // body execution without the try_put
+            tbb::internal::fgt_begin_body( my_body );
+            output_type v = (*my_body)(i);
+            tbb::internal::fgt_end_body( my_body );
+            return v;
+        }
+
+        //TODO: consider moving into the base class
+        task * apply_body_impl_bypass( const input_type &i) {
+            output_type v = apply_body_impl(i);
+#if TBB_DEPRECATED_MESSAGE_FLOW_ORDER
+            task* successor_task = successors().try_put_task(v);
+#endif
+            task* postponed_task = NULL;
+            if( base_type::my_max_concurrency != 0 ) {
+                postponed_task = base_type::try_get_postponed_task(i);
+                __TBB_ASSERT( !postponed_task || postponed_task != SUCCESSFULLY_ENQUEUED, NULL );
+            }
+#if TBB_DEPRECATED_MESSAGE_FLOW_ORDER
+            graph& g = base_type::my_graph_ref;
+            return combine_tasks(g, successor_task, postponed_task);
+#else
+            if( postponed_task ) {
+                // make the task available for other workers since we do not know successors'
+                // execution policy
+                internal::spawn_in_graph_arena(base_type::graph_reference(), *postponed_task);
+            }
+            task* successor_task = successors().try_put_task(v);
+#if _MSC_VER && !__INTEL_COMPILER
+#pragma warning (push)
+#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
+#endif
+            if(internal::has_policy<lightweight, Policy>::value) {
+#if _MSC_VER && !__INTEL_COMPILER
+#pragma warning (pop)
+#endif
+                if(!successor_task) {
+                    // Return confirmative status since current
+                    // node's body has been executed anyway
+                    successor_task = SUCCESSFULLY_ENQUEUED;
+                }
+            }
+            return successor_task;
+#endif /* TBB_DEPRECATED_MESSAGE_FLOW_ORDER */
+        }
+
+    protected:
+
+        void reset_function_input(reset_flags f) {
+            base_type::reset_function_input_base(f);
+            if(f & rf_reset_bodies) {
+                function_body_type *tmp = my_init_body->clone();
+                delete my_body;
+                my_body = tmp;
+            }
+        }
+
+        function_body_type *my_body;
+        function_body_type *my_init_body;
+        virtual broadcast_cache<output_type > &successors() = 0;
+
+    };  // function_input
+
+
+    // helper templates to clear the successor edges of the output ports of an multifunction_node
+    template<int N> struct clear_element {
+        template<typename P> static void clear_this(P &p) {
+            (void)tbb::flow::get<N-1>(p).successors().clear();
+            clear_element<N-1>::clear_this(p);
+        }
+        template<typename P> static bool this_empty(P &p) {
+            if(tbb::flow::get<N-1>(p).successors().empty())
+                return clear_element<N-1>::this_empty(p);
+            return false;
+        }
+    };
+
+    template<> struct clear_element<1> {
+        template<typename P> static void clear_this(P &p) {
+            (void)tbb::flow::get<0>(p).successors().clear();
+        }
+        template<typename P> static bool this_empty(P &p) {
+            return tbb::flow::get<0>(p).successors().empty();
+        }
+    };
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+    // helper templates to extract the output ports of an multifunction_node from graph
+    template<int N> struct extract_element {
+        template<typename P> static void extract_this(P &p) {
+            (void)tbb::flow::get<N-1>(p).successors().built_successors().sender_extract(tbb::flow::get<N-1>(p));
+            extract_element<N-1>::extract_this(p);
+        }
+    };
+
+    template<> struct extract_element<1> {
+        template<typename P> static void extract_this(P &p) {
+            (void)tbb::flow::get<0>(p).successors().built_successors().sender_extract(tbb::flow::get<0>(p));
+        }
+    };
+#endif
+
+    template <typename OutputTuple>
+    struct init_output_ports {
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+        template <typename... Args>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<Args...>&) {
+            return OutputTuple(Args(g)...);
+        }
+#else // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+        template <typename T1>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1>&) {
+            return OutputTuple(T1(g));
+        }
+
+        template <typename T1, typename T2>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1, T2>&) {
+            return OutputTuple(T1(g), T2(g));
+        }
+
+        template <typename T1, typename T2, typename T3>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1, T2, T3>&) {
+            return OutputTuple(T1(g), T2(g), T3(g));
+        }
+
+        template <typename T1, typename T2, typename T3, typename T4>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1, T2, T3, T4>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g));
+        }
+
+        template <typename T1, typename T2, typename T3, typename T4, typename T5>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1, T2, T3, T4, T5>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g));
+        }
+#if __TBB_VARIADIC_MAX >= 6
+        template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+        static OutputTuple call(graph& g, const tbb::flow::tuple<T1, T2, T3, T4, T5, T6>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g), T6(g));
+        }
+#endif
+#if __TBB_VARIADIC_MAX >= 7
+        template <typename T1, typename T2, typename T3, typename T4,
+                  typename T5, typename T6, typename T7>
+        static OutputTuple call(graph& g,
+                                const tbb::flow::tuple<T1, T2, T3, T4, T5, T6, T7>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g), T6(g), T7(g));
+        }
+#endif
+#if __TBB_VARIADIC_MAX >= 8
+        template <typename T1, typename T2, typename T3, typename T4,
+                  typename T5, typename T6, typename T7, typename T8>
+        static OutputTuple call(graph& g,
+                                const tbb::flow::tuple<T1, T2, T3, T4, T5, T6, T7, T8>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g), T6(g), T7(g), T8(g));
+        }
+#endif
+#if __TBB_VARIADIC_MAX >= 9
+        template <typename T1, typename T2, typename T3, typename T4,
+                  typename T5, typename T6, typename T7, typename T8, typename T9>
+        static OutputTuple call(graph& g,
+                                const tbb::flow::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g), T6(g), T7(g), T8(g), T9(g));
+        }
+#endif
+#if __TBB_VARIADIC_MAX >= 9
+        template <typename T1, typename T2, typename T3, typename T4, typename T5,
+                  typename T6, typename T7, typename T8, typename T9, typename T10>
+        static OutputTuple call(graph& g,
+                                const tbb::flow::tuple<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10>&) {
+            return OutputTuple(T1(g), T2(g), T3(g), T4(g), T5(g), T6(g), T7(g), T8(g), T9(g), T10(g));
+        }
+#endif
+#endif // __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    }; // struct init_output_ports
+
+    //! Implements methods for a function node that takes a type Input as input
+    //  and has a tuple of output ports specified.
+    template< typename Input, typename OutputPortSet, typename Policy, typename A>
+    class multifunction_input : public function_input_base<Input, Policy, A, multifunction_input<Input,OutputPortSet,Policy,A> > {
+    public:
+        static const int N = tbb::flow::tuple_size<OutputPortSet>::value;
+        typedef Input input_type;
+        typedef OutputPortSet output_ports_type;
+        typedef multifunction_body<input_type, output_ports_type> multifunction_body_type;
+        typedef multifunction_input<Input, OutputPortSet, Policy, A> my_class;
+        typedef function_input_base<Input, Policy, A, my_class> base_type;
+        typedef function_input_queue<input_type, A> input_queue_type;
+
+        // constructor
+        template<typename Body>
+        multifunction_input(graph &g, size_t max_concurrency,
+                            __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body& body, node_priority_t priority)
+        ) : base_type(g, __TBB_FLOW_GRAPH_PRIORITY_ARG1(max_concurrency, priority))
+          , my_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
+          , my_init_body( new internal::multifunction_body_leaf<input_type, output_ports_type, Body>(body) )
+          , my_output_ports(init_output_ports<output_ports_type>::call(g, my_output_ports)){
+        }
+
+        //! Copy constructor
+        multifunction_input( const multifunction_input& src ) :
+                base_type(src),
+                my_body( src.my_init_body->clone() ),
+                my_init_body(src.my_init_body->clone() ),
+                my_output_ports( init_output_ports<output_ports_type>::call(src.my_graph_ref, my_output_ports) ) {
+        }
+
+        ~multifunction_input() {
+            delete my_body;
+            delete my_init_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            multifunction_body_type &body_ref = *this->my_body;
+            return *static_cast<Body*>(dynamic_cast< internal::multifunction_body_leaf<input_type, output_ports_type, Body> & >(body_ref).get_body_ptr());
+        }
+
+        // for multifunction nodes we do not have a single successor as such.  So we just tell
+        // the task we were successful.
+        //TODO: consider moving common parts with implementation in function_input into separate function
+        task * apply_body_impl_bypass( const input_type &i) {
+            tbb::internal::fgt_begin_body( my_body );
+            (*my_body)(i, my_output_ports);
+            tbb::internal::fgt_end_body( my_body );
+            task* ttask = NULL;
+            if(base_type::my_max_concurrency != 0) {
+                ttask = base_type::try_get_postponed_task(i);
+            }
+            return ttask ? ttask : SUCCESSFULLY_ENQUEUED;
+        }
+
+        output_ports_type &output_ports(){ return my_output_ports; }
+
+    protected:
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        void extract() {
+            extract_element<N>::extract_this(my_output_ports);
+        }
+#endif
+
+        void reset(reset_flags f) {
+            base_type::reset_function_input_base(f);
+            if(f & rf_clear_edges)clear_element<N>::clear_this(my_output_ports);
+            if(f & rf_reset_bodies) {
+                multifunction_body_type *tmp = my_init_body->clone();
+                delete my_body;
+                my_body = tmp;
+            }
+            __TBB_ASSERT(!(f & rf_clear_edges) || clear_element<N>::this_empty(my_output_ports), "multifunction_node reset failed");
+        }
+
+        multifunction_body_type *my_body;
+        multifunction_body_type *my_init_body;
+        output_ports_type my_output_ports;
+
+    };  // multifunction_input
+
+    // template to refer to an output port of a multifunction_node
+    template<size_t N, typename MOP>
+    typename tbb::flow::tuple_element<N, typename MOP::output_ports_type>::type &output_port(MOP &op) {
+        return tbb::flow::get<N>(op.output_ports());
+    }
+
+    inline void check_task_and_spawn(graph& g, task* t) {
+        if (t && t != SUCCESSFULLY_ENQUEUED) {
+            internal::spawn_in_graph_arena(g, *t);
+        }
+    }
+
+    // helper structs for split_node
+    template<int N>
+    struct emit_element {
+        template<typename T, typename P>
+        static task* emit_this(graph& g, const T &t, P &p) {
+            // TODO: consider to collect all the tasks in task_list and spawn them all at once
+            task* last_task = tbb::flow::get<N-1>(p).try_put_task(tbb::flow::get<N-1>(t));
+            check_task_and_spawn(g, last_task);
+            return emit_element<N-1>::emit_this(g,t,p);
+        }
+    };
+
+    template<>
+    struct emit_element<1> {
+        template<typename T, typename P>
+        static task* emit_this(graph& g, const T &t, P &p) {
+            task* last_task = tbb::flow::get<0>(p).try_put_task(tbb::flow::get<0>(t));
+            check_task_and_spawn(g, last_task);
+            return SUCCESSFULLY_ENQUEUED;
+        }
+    };
+
+    //! Implements methods for an executable node that takes continue_msg as input
+    template< typename Output, typename Policy>
+    class continue_input : public continue_receiver {
+    public:
+
+        //! The input type of this receiver
+        typedef continue_msg input_type;
+
+        //! The output type of this receiver
+        typedef Output output_type;
+        typedef function_body<input_type, output_type> function_body_type;
+        typedef continue_input<output_type, Policy> class_type;
+
+        template< typename Body >
+        continue_input( graph &g, __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body& body, node_priority_t priority) )
+            : continue_receiver(__TBB_FLOW_GRAPH_PRIORITY_ARG1(/*number_of_predecessors=*/0, priority))
+            , my_graph_ref(g)
+            , my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
+            , my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
+            { }
+
+        template< typename Body >
+        continue_input( graph &g, int number_of_predecessors,
+                        __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body& body, node_priority_t priority)
+        ) : continue_receiver( __TBB_FLOW_GRAPH_PRIORITY_ARG1(number_of_predecessors, priority) )
+          , my_graph_ref(g)
+          , my_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
+          , my_init_body( new internal::function_body_leaf< input_type, output_type, Body>(body) )
+        { }
+
+        continue_input( const continue_input& src ) : continue_receiver(src),
+            my_graph_ref(src.my_graph_ref),
+            my_body( src.my_init_body->clone() ),
+            my_init_body( src.my_init_body->clone() ) {}
+
+        ~continue_input() {
+            delete my_body;
+            delete my_init_body;
+        }
+
+        template< typename Body >
+        Body copy_function_object() {
+            function_body_type &body_ref = *my_body;
+            return dynamic_cast< internal::function_body_leaf<input_type, output_type, Body> & >(body_ref).get_body();
+        }
+
+        void reset_receiver( reset_flags f) __TBB_override {
+            continue_receiver::reset_receiver(f);
+            if(f & rf_reset_bodies) {
+                function_body_type *tmp = my_init_body->clone();
+                delete my_body;
+                my_body = tmp;
+            }
+        }
+
+    protected:
+
+        graph& my_graph_ref;
+        function_body_type *my_body;
+        function_body_type *my_init_body;
+
+        virtual broadcast_cache<output_type > &successors() = 0;
+
+        friend class apply_body_task_bypass< class_type, continue_msg >;
+
+        //! Applies the body to the provided input
+        task *apply_body_bypass( input_type ) {
+            // There is an extra copied needed to capture the
+            // body execution without the try_put
+            tbb::internal::fgt_begin_body( my_body );
+            output_type v = (*my_body)( continue_msg() );
+            tbb::internal::fgt_end_body( my_body );
+            return successors().try_put_task( v );
+        }
+
+        task* execute() __TBB_override {
+            if(!internal::is_graph_active(my_graph_ref)) {
+                return NULL;
+            }
+#if _MSC_VER && !__INTEL_COMPILER
+#pragma warning (push)
+#pragma warning (disable: 4127)  /* suppress conditional expression is constant */
+#endif
+            if(internal::has_policy<lightweight, Policy>::value) {
+#if _MSC_VER && !__INTEL_COMPILER
+#pragma warning (pop)
+#endif
+                return apply_body_bypass( continue_msg() );
+            }
+            else {
+                return new ( task::allocate_additional_child_of( *(my_graph_ref.root_task()) ) )
+                       apply_body_task_bypass< class_type, continue_msg >(
+                           *this, __TBB_FLOW_GRAPH_PRIORITY_ARG1(continue_msg(), my_priority) );
+            }
+        }
+
+        graph& graph_reference() const __TBB_override {
+            return my_graph_ref;
+        }
+    };  // continue_input
+
+    //! Implements methods for both executable and function nodes that puts Output to its successors
+    template< typename Output >
+    class function_output : public sender<Output> {
+    public:
+
+        template<int N> friend struct clear_element;
+        typedef Output output_type;
+        typedef typename sender<output_type>::successor_type successor_type;
+        typedef broadcast_cache<output_type> broadcast_cache_type;
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        typedef typename sender<output_type>::built_successors_type built_successors_type;
+        typedef typename sender<output_type>::successor_list_type successor_list_type;
+#endif
+
+        function_output( graph& g) : my_graph_ref(g) { my_successors.set_owner(this); }
+        function_output(const function_output & other) : sender<output_type>(), my_graph_ref(other.my_graph_ref) {
+            my_successors.set_owner(this);
+        }
+
+        //! Adds a new successor to this node
+        bool register_successor( successor_type &r ) __TBB_override {
+            successors().register_successor( r );
+            return true;
+        }
+
+        //! Removes a successor from this node
+        bool remove_successor( successor_type &r ) __TBB_override {
+            successors().remove_successor( r );
+            return true;
+        }
+
+#if TBB_DEPRECATED_FLOW_NODE_EXTRACTION
+        built_successors_type &built_successors() __TBB_override { return successors().built_successors(); }
+
+
+        void internal_add_built_successor( successor_type &r) __TBB_override {
+            successors().internal_add_built_successor( r );
+        }
+
+        void internal_delete_built_successor( successor_type &r) __TBB_override {
+            successors().internal_delete_built_successor( r );
+        }
+
+        size_t successor_count() __TBB_override {
+            return successors().successor_count();
+        }
+
+        void  copy_successors( successor_list_type &v) __TBB_override {
+            successors().copy_successors(v);
+        }
+#endif  /* TBB_DEPRECATED_FLOW_NODE_EXTRACTION */
+
+        // for multifunction_node.  The function_body that implements
+        // the node will have an input and an output tuple of ports.  To put
+        // an item to a successor, the body should
+        //
+        //    get<I>(output_ports).try_put(output_value);
+        //
+        // if task pointer is returned will always spawn and return true, else
+        // return value will be bool returned from successors.try_put.
+        task *try_put_task(const output_type &i) { // not a virtual method in this class
+            return my_successors.try_put_task(i);
+        }
+
+        broadcast_cache_type &successors() { return my_successors; }
+
+        graph& graph_reference() const { return my_graph_ref; }
+    protected:
+        broadcast_cache_type my_successors;
+        graph& my_graph_ref;
+    };  // function_output
+
+    template< typename Output >
+    class multifunction_output : public function_output<Output> {
+    public:
+        typedef Output output_type;
+        typedef function_output<output_type> base_type;
+        using base_type::my_successors;
+
+        multifunction_output(graph& g) : base_type(g) {my_successors.set_owner(this);}
+        multifunction_output( const multifunction_output& other) : base_type(other.my_graph_ref) { my_successors.set_owner(this); }
+
+        bool try_put(const output_type &i) {
+            task *res = try_put_task(i);
+            if(!res) return false;
+            if(res != SUCCESSFULLY_ENQUEUED) {
+                FLOW_SPAWN(*res); // TODO: Spawn task inside arena
+            }
+            return true;
+        }
+
+        using base_type::graph_reference;
+
+    protected:
+
+        task* try_put_task(const output_type &i) {
+            return my_successors.try_put_task(i);
+        }
+
+        template <int N> friend struct emit_element;
+
+    };  // multifunction_output
+
+//composite_node
+#if __TBB_FLOW_GRAPH_CPP11_FEATURES
+    template<typename CompositeType>
+    void add_nodes_impl(CompositeType*, bool) {}
+
+    template< typename CompositeType, typename NodeType1, typename... NodeTypes >
+    void add_nodes_impl(CompositeType *c_node, bool visible, const NodeType1& n1, const NodeTypes&... n) {
+        void *addr = const_cast<NodeType1 *>(&n1);
+
+        fgt_alias_port(c_node, addr, visible);
+        add_nodes_impl(c_node, visible, n...);
+    }
+#endif
+
+}  // internal
+
+#endif // __TBB__flow_graph_node_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_node_set_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_node_set_impl.h
@@ -0,0 +1,269 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_flow_graph_node_set_impl_H
+#define __TBB_flow_graph_node_set_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// Included in namespace tbb::flow::interfaceX (in flow_graph.h)
+
+namespace internal {
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+// Visual Studio 2019 reports an error while calling predecessor_selector::get and successor_selector::get
+// Seems like the well-formed expression in trailing decltype is treated as ill-formed
+// TODO: investigate problems with decltype in trailing return types or find the cross-platform solution
+#define __TBB_MSVC_DISABLE_TRAILING_DECLTYPE (_MSC_VER >= 1900)
+
+namespace order {
+struct undefined {};
+struct following {};
+struct preceding {};
+}
+
+class get_graph_helper {
+public:
+    // TODO: consider making graph_reference() public and consistent interface to get a reference to the graph
+    // and remove get_graph_helper
+    template <typename T>
+    static graph& get(const T& object) {
+        return get_impl(object, std::is_base_of<graph_node, T>());
+    }
+
+private:
+    // Get graph from the object of type derived from graph_node
+    template <typename T>
+    static graph& get_impl(const T& object, std::true_type) {
+        return static_cast<const graph_node*>(&object)->my_graph;
+    }
+
+    template <typename T>
+    static graph& get_impl(const T& object, std::false_type) {
+        return object.graph_reference();
+    }
+};
+
+template<typename Order, typename... Nodes>
+struct node_set {
+    typedef Order order_type;
+
+    tbb::flow::tuple<Nodes&...> nodes;
+    node_set(Nodes&... ns) : nodes(ns...) {}
+
+    template <typename... Nodes2>
+    node_set(const node_set<order::undefined, Nodes2...>& set) : nodes(set.nodes) {}
+
+    graph& graph_reference() const {
+        return get_graph_helper::get(std::get<0>(nodes));
+    }
+};
+
+namespace alias_helpers {
+template <typename T> using output_type = typename T::output_type;
+template <typename T> using output_ports_type = typename T::output_ports_type;
+template <typename T> using input_type = typename T::input_type;
+template <typename T> using input_ports_type = typename T::input_ports_type;
+} // namespace alias_helpers
+
+template <typename T>
+using has_output_type = tbb::internal::supports<T, alias_helpers::output_type>;
+
+template <typename T>
+using has_input_type = tbb::internal::supports<T, alias_helpers::input_type>;
+
+template <typename T>
+using has_input_ports_type = tbb::internal::supports<T, alias_helpers::input_ports_type>;
+
+template <typename T>
+using has_output_ports_type = tbb::internal::supports<T, alias_helpers::output_ports_type>;
+
+template<typename T>
+struct is_sender : std::is_base_of<sender<typename T::output_type>, T> {};
+
+template<typename T>
+struct is_receiver : std::is_base_of<receiver<typename T::input_type>, T> {};
+
+template <typename Node>
+struct is_async_node : std::false_type {};
+
+template <typename... Args>
+struct is_async_node<tbb::flow::interface11::async_node<Args...>> : std::true_type {};
+
+template<typename FirstPredecessor, typename... Predecessors>
+node_set<order::following, FirstPredecessor, Predecessors...>
+follows(FirstPredecessor& first_predecessor, Predecessors&... predecessors) {
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<has_output_type<FirstPredecessor>,
+                                                   has_output_type<Predecessors>...>::value),
+                        "Not all node's predecessors has output_type typedef");
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<is_sender<FirstPredecessor>, is_sender<Predecessors>...>::value),
+                        "Not all node's predecessors are senders");
+    return node_set<order::following, FirstPredecessor, Predecessors...>(first_predecessor, predecessors...);
+}
+
+template<typename... Predecessors>
+node_set<order::following, Predecessors...>
+follows(node_set<order::undefined, Predecessors...>& predecessors_set) {
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<has_output_type<Predecessors>...>::value),
+                        "Not all nodes in the set has output_type typedef");
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<is_sender<Predecessors>...>::value),
+                        "Not all nodes in the set are senders");
+    return node_set<order::following, Predecessors...>(predecessors_set);
+}
+
+template<typename FirstSuccessor, typename... Successors>
+node_set<order::preceding, FirstSuccessor, Successors...>
+precedes(FirstSuccessor& first_successor, Successors&... successors) {
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<has_input_type<FirstSuccessor>,
+                                                    has_input_type<Successors>...>::value),
+                        "Not all node's successors has input_type typedef");
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<is_receiver<FirstSuccessor>, is_receiver<Successors>...>::value),
+                        "Not all node's successors are receivers");
+    return node_set<order::preceding, FirstSuccessor, Successors...>(first_successor, successors...);
+}
+
+template<typename... Successors>
+node_set<order::preceding, Successors...>
+precedes(node_set<order::undefined, Successors...>& successors_set) {
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<has_input_type<Successors>...>::value),
+                        "Not all nodes in the set has input_type typedef");
+    __TBB_STATIC_ASSERT((tbb::internal::conjunction<is_receiver<Successors>...>::value),
+                        "Not all nodes in the set are receivers");
+    return node_set<order::preceding, Successors...>(successors_set);
+}
+
+template <typename Node, typename... Nodes>
+node_set<order::undefined, Node, Nodes...>
+make_node_set(Node& first_node, Nodes&... nodes) {
+    return node_set<order::undefined, Node, Nodes...>(first_node, nodes...);
+}
+
+template<size_t I>
+class successor_selector {
+    template <typename NodeType>
+    static auto get_impl(NodeType& node, std::true_type) -> decltype(input_port<I>(node)) {
+        return input_port<I>(node);
+    }
+
+    template <typename NodeType>
+    static NodeType& get_impl(NodeType& node, std::false_type) { return node; }
+
+public:
+    template <typename NodeType>
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
+    static auto& get(NodeType& node)
+#else
+    static auto get(NodeType& node) -> decltype(get_impl(node, has_input_ports_type<NodeType>()))
+#endif
+	{
+        return get_impl(node, has_input_ports_type<NodeType>());
+    }
+};
+
+template<size_t I>
+class predecessor_selector {
+    template <typename NodeType>
+    static auto internal_get(NodeType& node, std::true_type) -> decltype(output_port<I>(node)) {
+        return output_port<I>(node);
+    }
+
+    template <typename NodeType>
+    static NodeType& internal_get(NodeType& node, std::false_type) { return node;}
+
+    template <typename NodeType>
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
+    static auto& get_impl(NodeType& node, std::false_type)
+#else
+    static auto get_impl(NodeType& node, std::false_type) -> decltype(internal_get(node, has_output_ports_type<NodeType>()))
+#endif
+	{
+        return internal_get(node, has_output_ports_type<NodeType>());
+    }
+
+    template <typename AsyncNode>
+    static AsyncNode& get_impl(AsyncNode& node, std::true_type) { return node; }
+
+public:
+    template <typename NodeType>
+#if __TBB_MSVC_DISABLE_TRAILING_DECLTYPE
+    static auto& get(NodeType& node)
+#else
+    static auto get(NodeType& node) -> decltype(get_impl(node, is_async_node<NodeType>()))
+#endif
+	{
+        return get_impl(node, is_async_node<NodeType>());
+    }
+};
+
+template<size_t I>
+class make_edges_helper {
+public:
+    template<typename PredecessorsTuple, typename NodeType>
+    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
+        make_edge(std::get<I>(predecessors), successor_selector<I>::get(node));
+        make_edges_helper<I - 1>::connect_predecessors(predecessors, node);
+    }
+
+    template<typename SuccessorsTuple, typename NodeType>
+    static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
+        make_edge(predecessor_selector<I>::get(node), std::get<I>(successors));
+        make_edges_helper<I - 1>::connect_successors(node, successors);
+    }
+};
+
+template<>
+struct make_edges_helper<0> {
+    template<typename PredecessorsTuple, typename NodeType>
+    static void connect_predecessors(PredecessorsTuple& predecessors, NodeType& node) {
+        make_edge(std::get<0>(predecessors), successor_selector<0>::get(node));
+    }
+
+    template<typename SuccessorsTuple, typename NodeType>
+    static void connect_successors(NodeType& node, SuccessorsTuple& successors) {
+        make_edge(predecessor_selector<0>::get(node), std::get<0>(successors));
+    }
+};
+
+// TODO: consider adding an overload for making edges between node sets
+template<typename NodeType, typename OrderFlagType, typename... Args>
+void make_edges(const node_set<OrderFlagType, Args...>& s, NodeType& node) {
+    const std::size_t SetSize = tbb::flow::tuple_size<decltype(s.nodes)>::value;
+    make_edges_helper<SetSize - 1>::connect_predecessors(s.nodes, node);
+}
+
+template <typename NodeType, typename OrderFlagType, typename... Args>
+void make_edges(NodeType& node, const node_set<OrderFlagType, Args...>& s) {
+    const std::size_t SetSize = tbb::flow::tuple_size<decltype(s.nodes)>::value;
+    make_edges_helper<SetSize - 1>::connect_successors(node, s.nodes);
+}
+
+template <typename NodeType, typename... Nodes>
+void make_edges_in_order(const node_set<order::following, Nodes...>& ns, NodeType& node) {
+    make_edges(ns, node);
+}
+
+template <typename NodeType, typename... Nodes>
+void make_edges_in_order(const node_set<order::preceding, Nodes...>& ns, NodeType& node) {
+    make_edges(node, ns);
+}
+
+#endif // __TBB_CPP11_PRESENT
+
+} // namespace internal
+
+#endif // __TBB_flow_graph_node_set_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_nodes_deduction.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_nodes_deduction.h
@@ -0,0 +1,270 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_flow_graph_nodes_deduction_H
+#define __TBB_flow_graph_nodes_deduction_H
+
+#if __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+
+namespace tbb {
+namespace flow {
+namespace interface11 {
+
+template <typename Input, typename Output>
+struct declare_body_types {
+    using input_type = Input;
+    using output_type = Output;
+};
+
+template <typename T> struct body_types;
+
+template <typename T, typename Input, typename Output>
+struct body_types<Output (T::*)(const Input&) const> : declare_body_types<Input, Output> {};
+
+template <typename T, typename Input, typename Output>
+struct body_types<Output (T::*)(const Input&)> : declare_body_types<Input, Output> {};
+
+template <typename T, typename Input, typename Output>
+struct body_types<Output (T::*)(Input&) const> : declare_body_types<Input, Output> {};
+
+template <typename T, typename Input, typename Output>
+struct body_types<Output (T::*)(Input&)> : declare_body_types<Input, Output> {};
+
+template <typename Input, typename Output>
+struct body_types<Output (*)(Input&)> : declare_body_types<Input, Output> {};
+
+template <typename Input, typename Output>
+struct body_types<Output (*)(const Input&)> : declare_body_types<Input, Output> {};
+
+template <typename Body>
+using input_t = typename body_types<Body>::input_type;
+
+template <typename Body>
+using output_t = typename body_types<Body>::output_type;
+
+template <typename T, typename Input, typename Output>
+auto decide_on_operator_overload(Output (T::*name)(const Input&) const)->decltype(name);
+
+template <typename T, typename Input, typename Output>
+auto decide_on_operator_overload(Output (T::*name)(const Input&))->decltype(name);
+
+template <typename T, typename Input, typename Output>
+auto decide_on_operator_overload(Output (T::*name)(Input&) const)->decltype(name);
+
+template <typename T, typename Input, typename Output>
+auto decide_on_operator_overload(Output (T::*name)(Input&))->decltype(name);
+
+template <typename Input, typename Output>
+auto decide_on_operator_overload(Output (*name)(const Input&))->decltype(name);
+
+template <typename Input, typename Output>
+auto decide_on_operator_overload(Output (*name)(Input&))->decltype(name);
+
+template <typename Body>
+decltype(decide_on_operator_overload(&Body::operator())) decide_on_callable_type(int);
+
+template <typename Body>
+decltype(decide_on_operator_overload(std::declval<Body>())) decide_on_callable_type(...);
+
+// Deduction guides for Flow Graph nodes
+#if TBB_USE_SOURCE_NODE_AS_ALIAS
+template <typename GraphOrSet, typename Body>
+source_node(GraphOrSet&&, Body)
+->source_node<input_t<decltype(decide_on_callable_type<Body>(0))>>;
+#else
+template <typename GraphOrSet, typename Body>
+source_node(GraphOrSet&&, Body, bool = true)
+->source_node<input_t<decltype(decide_on_callable_type<Body>(0))>>;
+#endif
+
+template <typename GraphOrSet, typename Body>
+input_node(GraphOrSet&&, Body, bool = true)
+->input_node<input_t<decltype(decide_on_callable_type<Body>(0))>>;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+
+template <typename NodeSet>
+struct decide_on_set;
+
+template <typename Node, typename... Nodes>
+struct decide_on_set<node_set<internal::order::following, Node, Nodes...>> {
+    using type = typename Node::output_type;
+};
+
+template <typename Node, typename... Nodes>
+struct decide_on_set<node_set<internal::order::preceding, Node, Nodes...>> {
+    using type = typename Node::input_type;
+};
+
+template <typename NodeSet>
+using decide_on_set_t = typename decide_on_set<std::decay_t<NodeSet>>::type;
+
+template <typename NodeSet>
+broadcast_node(const NodeSet&)
+->broadcast_node<decide_on_set_t<NodeSet>>;
+
+template <typename NodeSet>
+buffer_node(const NodeSet&)
+->buffer_node<decide_on_set_t<NodeSet>>;
+
+template <typename NodeSet>
+queue_node(const NodeSet&)
+->queue_node<decide_on_set_t<NodeSet>>;
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+
+template <typename GraphOrProxy, typename Sequencer>
+sequencer_node(GraphOrProxy&&, Sequencer)
+->sequencer_node<input_t<decltype(decide_on_callable_type<Sequencer>(0))>>;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+template <typename NodeSet, typename Compare>
+priority_queue_node(const NodeSet&, const Compare&)
+->priority_queue_node<decide_on_set_t<NodeSet>, Compare>;
+
+template <typename NodeSet>
+priority_queue_node(const NodeSet&)
+->priority_queue_node<decide_on_set_t<NodeSet>, std::less<decide_on_set_t<NodeSet>>>;
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+
+template <typename Key>
+struct join_key {
+    using type = Key;
+};
+
+template <typename T>
+struct join_key<const T&> {
+    using type = T&;
+};
+
+template <typename Key>
+using join_key_t = typename join_key<Key>::type;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+template <typename Policy, typename... Predecessors>
+join_node(const node_set<internal::order::following, Predecessors...>&, Policy)
+->join_node<std::tuple<typename Predecessors::output_type...>,
+            Policy>;
+
+template <typename Policy, typename Successor, typename... Successors>
+join_node(const node_set<internal::order::preceding, Successor, Successors...>&, Policy)
+->join_node<typename Successor::input_type, Policy>;
+
+template <typename... Predecessors>
+join_node(const node_set<internal::order::following, Predecessors...>)
+->join_node<std::tuple<typename Predecessors::output_type...>,
+            queueing>;
+
+template <typename Successor, typename... Successors>
+join_node(const node_set<internal::order::preceding, Successor, Successors...>)
+->join_node<typename Successor::input_type, queueing>;
+#endif
+
+template <typename GraphOrProxy, typename Body, typename... Bodies>
+join_node(GraphOrProxy&&, Body, Bodies...)
+->join_node<std::tuple<input_t<decltype(decide_on_callable_type<Body>(0))>,
+                       input_t<decltype(decide_on_callable_type<Bodies>(0))>...>,
+            key_matching<join_key_t<output_t<decltype(decide_on_callable_type<Body>(0))>>>>;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+template <typename... Predecessors>
+indexer_node(const node_set<internal::order::following, Predecessors...>&)
+->indexer_node<typename Predecessors::output_type...>;
+#endif
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+template <typename NodeSet>
+limiter_node(const NodeSet&, size_t)
+->limiter_node<decide_on_set_t<NodeSet>>;
+
+template <typename Predecessor, typename... Predecessors>
+split_node(const node_set<internal::order::following, Predecessor, Predecessors...>&)
+->split_node<typename Predecessor::output_type>;
+
+template <typename... Successors>
+split_node(const node_set<internal::order::preceding, Successors...>&)
+->split_node<std::tuple<typename Successors::input_type...>>;
+
+#endif
+
+template <typename GraphOrSet, typename Body, typename Policy>
+function_node(GraphOrSet&&,
+              size_t, Body,
+            __TBB_FLOW_GRAPH_PRIORITY_ARG1(Policy, node_priority_t = tbb::flow::internal::no_priority))
+->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
+                output_t<decltype(decide_on_callable_type<Body>(0))>,
+                Policy>;
+
+template <typename GraphOrSet, typename Body>
+function_node(GraphOrSet&&, size_t,
+              __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body, node_priority_t = tbb::flow::internal::no_priority))
+->function_node<input_t<decltype(decide_on_callable_type<Body>(0))>,
+                output_t<decltype(decide_on_callable_type<Body>(0))>,
+                queueing>;
+
+template <typename Output>
+struct continue_output {
+    using type = Output;
+};
+
+template <>
+struct continue_output<void> {
+    using type = continue_msg;
+};
+
+template <typename T>
+using continue_output_t = typename continue_output<T>::type;
+
+template <typename GraphOrSet, typename Body, typename Policy>
+continue_node(GraphOrSet&&, Body,
+              __TBB_FLOW_GRAPH_PRIORITY_ARG1(Policy, node_priority_t = tbb::flow::internal::no_priority))
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
+                Policy>;
+
+template <typename GraphOrSet, typename Body, typename Policy>
+continue_node(GraphOrSet&&,
+              int, Body,
+              __TBB_FLOW_GRAPH_PRIORITY_ARG1(Policy, node_priority_t = tbb::flow::internal::no_priority))
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
+                Policy>;
+
+template <typename GraphOrSet, typename Body>
+continue_node(GraphOrSet&&,
+              __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body, node_priority_t = tbb::flow::internal::no_priority))
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
+                internal::Policy<void>>;
+
+template <typename GraphOrSet, typename Body>
+continue_node(GraphOrSet&&, int,
+              __TBB_FLOW_GRAPH_PRIORITY_ARG1(Body, node_priority_t = tbb::flow::internal::no_priority))
+->continue_node<continue_output_t<std::invoke_result_t<Body, continue_msg>>,
+                internal::Policy<void>>;
+
+#if __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+
+template <typename NodeSet>
+overwrite_node(const NodeSet&)
+->overwrite_node<decide_on_set_t<NodeSet>>;
+
+template <typename NodeSet>
+write_once_node(const NodeSet&)
+->write_once_node<decide_on_set_t<NodeSet>>;
+#endif // __TBB_PREVIEW_FLOW_GRAPH_NODE_SET
+} // namespace interfaceX
+} // namespace flow
+} // namespace tbb
+
+#endif // __TBB_CPP17_DEDUCTION_GUIDES_PRESENT
+#endif // __TBB_flow_graph_nodes_deduction_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_streaming_node.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_streaming_node.h
@@ -0,0 +1,743 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_flow_graph_streaming_H
+#define __TBB_flow_graph_streaming_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if __TBB_PREVIEW_STREAMING_NODE
+
+// Included in namespace tbb::flow::interfaceX (in flow_graph.h)
+
+namespace internal {
+
+template <int N1, int N2>
+struct port_ref_impl {
+    // "+1" since the port_ref range is a closed interval (includes its endpoints).
+    static const int size = N2 - N1 + 1;
+};
+
+} // internal
+
+// The purpose of the port_ref_impl is the pretty syntax: the deduction of a compile-time constant is processed from the return type.
+// So it is possible to use this helper without parentheses, e.g. "port_ref<0>".
+template <int N1, int N2 = N1>
+__TBB_DEPRECATED internal::port_ref_impl<N1,N2> port_ref() {
+    return internal::port_ref_impl<N1,N2>();
+};
+
+namespace internal {
+
+template <typename T>
+struct num_arguments {
+    static const int value = 1;
+};
+
+template <int N1, int N2>
+struct num_arguments<port_ref_impl<N1,N2>(*)()> {
+    static const int value = port_ref_impl<N1,N2>::size;
+};
+
+template <int N1, int N2>
+struct num_arguments<port_ref_impl<N1,N2>> {
+    static const int value = port_ref_impl<N1,N2>::size;
+};
+
+template <typename... Args>
+void ignore_return_values( Args&&... ) {}
+
+template <typename T>
+T or_return_values( T&& t ) { return t; }
+template <typename T, typename... Rest>
+T or_return_values( T&& t, Rest&&... rest ) {
+    return t | or_return_values( std::forward<Rest>(rest)... );
+}
+
+template<typename JP>
+struct key_from_policy {
+    typedef size_t type;
+    typedef std::false_type is_key_matching;
+};
+
+template<typename Key>
+struct key_from_policy< key_matching<Key> > {
+    typedef Key type;
+    typedef std::true_type is_key_matching;
+};
+
+template<typename Key>
+struct key_from_policy< key_matching<Key&> > {
+    typedef const Key &type;
+    typedef std::true_type is_key_matching;
+};
+
+template<typename Device, typename Key>
+class streaming_device_with_key {
+    Device my_device;
+    typename std::decay<Key>::type my_key;
+public:
+    // TODO: investigate why default constructor is required
+    streaming_device_with_key() {}
+    streaming_device_with_key( const Device& d, Key k ) : my_device( d ), my_key( k ) {}
+    Key key() const { return my_key; }
+    const Device& device() const { return my_device; }
+};
+
+// --------- Kernel argument helpers --------- //
+template <typename T>
+struct is_port_ref_impl {
+    typedef std::false_type type;
+};
+
+template <int N1, int N2>
+struct is_port_ref_impl< port_ref_impl<N1, N2> > {
+    typedef std::true_type type;
+};
+
+template <int N1, int N2>
+struct is_port_ref_impl< port_ref_impl<N1, N2>( * )()  > {
+    typedef std::true_type type;
+};
+
+template <typename T>
+struct is_port_ref {
+    typedef typename is_port_ref_impl< typename tbb::internal::strip<T>::type >::type type;
+};
+
+template <typename ...Args1>
+struct convert_and_call_impl;
+
+template <typename A1, typename ...Args1>
+struct convert_and_call_impl<A1, Args1...> {
+    static const size_t my_delta = 1; // Index 0 contains device
+
+    template <typename F, typename Tuple, typename ...Args2>
+    static void doit(F& f, Tuple& t, A1& a1, Args1&... args1, Args2&... args2) {
+        convert_and_call_impl<A1, Args1...>::doit_impl(typename is_port_ref<A1>::type(), f, t, a1, args1..., args2...);
+    }
+    template <typename F, typename Tuple, typename ...Args2>
+    static void doit_impl(std::false_type, F& f, Tuple& t, A1& a1, Args1&... args1, Args2&... args2) {
+        convert_and_call_impl<Args1...>::doit(f, t, args1..., args2..., a1);
+    }
+    template <typename F, typename Tuple, int N1, int N2, typename ...Args2>
+    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N1, N2>, Args1&... args1, Args2&... args2) {
+        convert_and_call_impl<port_ref_impl<N1 + 1,N2>, Args1...>::doit_impl(x, f, t, port_ref<N1 + 1, N2>(), args1...,
+            args2..., std::get<N1 + my_delta>(t));
+    }
+    template <typename F, typename Tuple, int N, typename ...Args2>
+    static void doit_impl(std::true_type, F& f, Tuple& t, port_ref_impl<N, N>, Args1&... args1, Args2&... args2) {
+        convert_and_call_impl<Args1...>::doit(f, t, args1..., args2..., std::get<N + my_delta>(t));
+    }
+
+    template <typename F, typename Tuple, int N1, int N2, typename ...Args2>
+    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N1, N2>(* fn)(), Args1&... args1, Args2&... args2) {
+        doit_impl(x, f, t, fn(), args1..., args2...);
+    }
+    template <typename F, typename Tuple, int N, typename ...Args2>
+    static void doit_impl(std::true_type x, F& f, Tuple& t, port_ref_impl<N, N>(* fn)(), Args1&... args1, Args2&... args2) {
+        doit_impl(x, f, t, fn(), args1..., args2...);
+    }
+};
+
+template <>
+struct convert_and_call_impl<> {
+    template <typename F, typename Tuple, typename ...Args2>
+    static void doit(F& f, Tuple&, Args2&... args2) {
+        f(args2...);
+    }
+};
+// ------------------------------------------- //
+
+template<typename JP, typename StreamFactory, typename... Ports>
+struct streaming_node_traits {
+    // Do not use 'using' instead of 'struct' because Microsoft Visual C++ 12.0 fails to compile.
+    template <typename T>
+    struct async_msg_type {
+        typedef typename StreamFactory::template async_msg_type<T> type;
+    };
+
+    typedef tuple< typename async_msg_type<Ports>::type... > input_tuple;
+    typedef input_tuple output_tuple;
+    typedef tuple< streaming_device_with_key< typename StreamFactory::device_type, typename key_from_policy<JP>::type >,
+        typename async_msg_type<Ports>::type... > kernel_input_tuple;
+
+    // indexer_node parameters pack expansion workaround for VS2013 for streaming_node
+    typedef indexer_node< typename async_msg_type<Ports>::type... > indexer_node_type;
+};
+
+// Default empty implementation
+template<typename StreamFactory, typename KernelInputTuple, typename = void>
+class kernel_executor_helper {
+    typedef typename StreamFactory::device_type device_type;
+    typedef typename StreamFactory::kernel_type kernel_type;
+    typedef KernelInputTuple kernel_input_tuple;
+protected:
+    template <typename ...Args>
+    void enqueue_kernel_impl( kernel_input_tuple&, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
+        factory.send_kernel( device, kernel, args... );
+    }
+};
+
+// Implementation for StreamFactory supporting range
+template<typename StreamFactory, typename KernelInputTuple>
+class kernel_executor_helper<StreamFactory, KernelInputTuple, typename tbb::internal::void_t< typename StreamFactory::range_type >::type > {
+    typedef typename StreamFactory::device_type device_type;
+    typedef typename StreamFactory::kernel_type kernel_type;
+    typedef KernelInputTuple kernel_input_tuple;
+
+    typedef typename StreamFactory::range_type range_type;
+
+    // Container for randge. It can contain either port references or real range.
+    struct range_wrapper {
+        virtual range_type get_range( const kernel_input_tuple &ip ) const = 0;
+        virtual range_wrapper *clone() const = 0;
+        virtual ~range_wrapper() {}
+    };
+
+    struct range_value : public range_wrapper {
+        range_value( const range_type& value ) : my_value(value) {}
+
+        range_value( range_type&& value ) : my_value(std::move(value)) {}
+
+        range_type get_range( const kernel_input_tuple & ) const __TBB_override {
+            return my_value;
+        }
+
+        range_wrapper *clone() const __TBB_override {
+            return new range_value(my_value);
+        }
+    private:
+        range_type my_value;
+    };
+
+    template <int N>
+    struct range_mapper : public range_wrapper {
+        range_mapper() {}
+
+        range_type get_range( const kernel_input_tuple &ip ) const __TBB_override {
+            // "+1" since get<0>(ip) is StreamFactory::device.
+            return get<N + 1>(ip).data(false);
+        }
+
+        range_wrapper *clone() const __TBB_override {
+            return new range_mapper<N>;
+        }
+    };
+
+protected:
+    template <typename ...Args>
+    void enqueue_kernel_impl( kernel_input_tuple& ip, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
+        __TBB_ASSERT(my_range_wrapper, "Range is not set. Call set_range() before running streaming_node.");
+        factory.send_kernel( device, kernel, my_range_wrapper->get_range(ip), args... );
+    }
+
+public:
+    kernel_executor_helper() : my_range_wrapper(NULL) {}
+
+    kernel_executor_helper(const kernel_executor_helper& executor) : my_range_wrapper(executor.my_range_wrapper ? executor.my_range_wrapper->clone() : NULL) {}
+
+    kernel_executor_helper(kernel_executor_helper&& executor) : my_range_wrapper(executor.my_range_wrapper) {
+        // Set moving holder mappers to NULL to prevent double deallocation
+        executor.my_range_wrapper = NULL;
+    }
+
+    ~kernel_executor_helper() {
+        if (my_range_wrapper) delete my_range_wrapper;
+    }
+
+    void set_range(const range_type& work_size) {
+        my_range_wrapper = new range_value(work_size);
+    }
+
+    void set_range(range_type&& work_size) {
+        my_range_wrapper = new range_value(std::move(work_size));
+    }
+
+    template <int N>
+    void set_range(port_ref_impl<N, N>) {
+        my_range_wrapper = new range_mapper<N>;
+    }
+
+    template <int N>
+    void set_range(port_ref_impl<N, N>(*)()) {
+        my_range_wrapper = new range_mapper<N>;
+    }
+
+private:
+    range_wrapper* my_range_wrapper;
+};
+
+} // internal
+
+/*
+/---------------------------------------- streaming_node ------------------------------------\
+|                                                                                            |
+|   /--------------\   /----------------------\   /-----------\   /----------------------\   |
+|   |              |   |    (device_with_key) O---O           |   |                      |   |
+|   |              |   |                      |   |           |   |                      |   |
+O---O indexer_node O---O device_selector_node O---O join_node O---O      kernel_node     O---O
+|   |              |   | (multifunction_node) |   |           |   | (multifunction_node) |   |
+O---O              |   |                      O---O           |   |                      O---O
+|   \--------------/   \----------------------/   \-----------/   \----------------------/   |
+|                                                                                            |
+\--------------------------------------------------------------------------------------------/
+*/
+template<typename... Args>
+class __TBB_DEPRECATED streaming_node;
+
+template<typename... Ports, typename JP, typename StreamFactory>
+class __TBB_DEPRECATED
+streaming_node< tuple<Ports...>, JP, StreamFactory >
+    : public composite_node < typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::input_tuple,
+                              typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::output_tuple >
+    , public internal::kernel_executor_helper< StreamFactory, typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::kernel_input_tuple >
+{
+    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::input_tuple input_tuple;
+    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::output_tuple output_tuple;
+    typedef typename internal::key_from_policy<JP>::type key_type;
+protected:
+    typedef typename StreamFactory::device_type device_type;
+    typedef typename StreamFactory::kernel_type kernel_type;
+private:
+    typedef internal::streaming_device_with_key<device_type, key_type> device_with_key_type;
+    typedef composite_node<input_tuple, output_tuple> base_type;
+    static const size_t NUM_INPUTS = tuple_size<input_tuple>::value;
+    static const size_t NUM_OUTPUTS = tuple_size<output_tuple>::value;
+
+    typedef typename internal::make_sequence<NUM_INPUTS>::type input_sequence;
+    typedef typename internal::make_sequence<NUM_OUTPUTS>::type output_sequence;
+
+    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::indexer_node_type indexer_node_type;
+    typedef typename indexer_node_type::output_type indexer_node_output_type;
+    typedef typename internal::streaming_node_traits<JP, StreamFactory, Ports...>::kernel_input_tuple kernel_input_tuple;
+    typedef multifunction_node<indexer_node_output_type, kernel_input_tuple> device_selector_node;
+    typedef multifunction_node<kernel_input_tuple, output_tuple> kernel_multifunction_node;
+
+    template <int... S>
+    typename base_type::input_ports_type get_input_ports( internal::sequence<S...> ) {
+        return std::tie( internal::input_port<S>( my_indexer_node )... );
+    }
+
+    template <int... S>
+    typename base_type::output_ports_type get_output_ports( internal::sequence<S...> ) {
+        return std::tie( internal::output_port<S>( my_kernel_node )... );
+    }
+
+    typename base_type::input_ports_type get_input_ports() {
+        return get_input_ports( input_sequence() );
+    }
+
+    typename base_type::output_ports_type get_output_ports() {
+        return get_output_ports( output_sequence() );
+    }
+
+    template <int N>
+    int make_Nth_edge() {
+        make_edge( internal::output_port<N>( my_device_selector_node ), internal::input_port<N>( my_join_node ) );
+        return 0;
+    }
+
+    template <int... S>
+    void make_edges( internal::sequence<S...> ) {
+        make_edge( my_indexer_node, my_device_selector_node );
+        make_edge( my_device_selector_node, my_join_node );
+        internal::ignore_return_values( make_Nth_edge<S + 1>()... );
+        make_edge( my_join_node, my_kernel_node );
+    }
+
+    void make_edges() {
+        make_edges( input_sequence() );
+    }
+
+    class device_selector_base {
+    public:
+        virtual void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) = 0;
+        virtual device_selector_base *clone( streaming_node &n ) const = 0;
+        virtual ~device_selector_base() {}
+    };
+
+    template <typename UserFunctor>
+    class device_selector : public device_selector_base, tbb::internal::no_assign {
+    public:
+        device_selector( UserFunctor uf, streaming_node &n, StreamFactory &f )
+            : my_dispatch_funcs( create_dispatch_funcs( input_sequence() ) )
+            , my_user_functor( uf ), my_node(n), my_factory( f )
+        {
+            my_port_epoches.fill( 0 );
+        }
+
+        void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) __TBB_override {
+            (this->*my_dispatch_funcs[ v.tag() ])( my_port_epoches[ v.tag() ], v, op );
+            __TBB_ASSERT( (tbb::internal::is_same_type<typename internal::key_from_policy<JP>::is_key_matching, std::false_type>::value)
+                || my_port_epoches[v.tag()] == 0, "Epoch is changed when key matching is requested" );
+        }
+
+        device_selector_base *clone( streaming_node &n ) const __TBB_override {
+            return new device_selector( my_user_functor, n, my_factory );
+        }
+    private:
+        typedef void(device_selector<UserFunctor>::*send_and_put_fn_type)(size_t &, const indexer_node_output_type &, typename device_selector_node::output_ports_type &);
+        typedef std::array < send_and_put_fn_type, NUM_INPUTS > dispatch_funcs_type;
+
+        template <int... S>
+        static dispatch_funcs_type create_dispatch_funcs( internal::sequence<S...> ) {
+            dispatch_funcs_type dispatch = { { &device_selector<UserFunctor>::send_and_put_impl<S>... } };
+            return dispatch;
+        }
+
+        template <typename T>
+        key_type get_key( std::false_type, const T &, size_t &epoch ) {
+            __TBB_STATIC_ASSERT( (tbb::internal::is_same_type<key_type, size_t>::value), "" );
+            return epoch++;
+        }
+
+        template <typename T>
+        key_type get_key( std::true_type, const T &t, size_t &/*epoch*/ ) {
+            using tbb::flow::key_from_message;
+            return key_from_message<key_type>( t );
+        }
+
+        template <int N>
+        void send_and_put_impl( size_t &epoch, const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) {
+            typedef typename tuple_element<N + 1, typename device_selector_node::output_ports_type>::type::output_type elem_type;
+            elem_type e = internal::cast_to<elem_type>( v );
+            device_type device = get_device( get_key( typename internal::key_from_policy<JP>::is_key_matching(), e, epoch ), get<0>( op ) );
+            my_factory.send_data( device, e );
+            get<N + 1>( op ).try_put( e );
+        }
+
+        template< typename DevicePort >
+        device_type get_device( key_type key, DevicePort& dp ) {
+            typename std::unordered_map<typename std::decay<key_type>::type, epoch_desc>::iterator it = my_devices.find( key );
+            if ( it == my_devices.end() ) {
+                device_type d = my_user_functor( my_factory );
+                std::tie( it, std::ignore ) = my_devices.insert( std::make_pair( key, d ) );
+                bool res = dp.try_put( device_with_key_type( d, key ) );
+                __TBB_ASSERT_EX( res, NULL );
+                my_node.notify_new_device( d );
+            }
+            epoch_desc &e = it->second;
+            device_type d = e.my_device;
+            if ( ++e.my_request_number == NUM_INPUTS ) my_devices.erase( it );
+            return d;
+        }
+
+        struct epoch_desc {
+            epoch_desc(device_type d ) : my_device( d ), my_request_number( 0 ) {}
+            device_type my_device;
+            size_t my_request_number;
+        };
+
+        std::unordered_map<typename std::decay<key_type>::type, epoch_desc> my_devices;
+        std::array<size_t, NUM_INPUTS> my_port_epoches;
+        dispatch_funcs_type my_dispatch_funcs;
+        UserFunctor my_user_functor;
+        streaming_node &my_node;
+        StreamFactory &my_factory;
+    };
+
+    class device_selector_body {
+    public:
+        device_selector_body( device_selector_base *d ) : my_device_selector( d ) {}
+
+        void operator()( const indexer_node_output_type &v, typename device_selector_node::output_ports_type &op ) {
+            (*my_device_selector)(v, op);
+        }
+    private:
+        device_selector_base *my_device_selector;
+    };
+
+    // TODO: investigate why copy-construction is disallowed
+    class args_storage_base : tbb::internal::no_copy {
+    public:
+        typedef typename kernel_multifunction_node::output_ports_type output_ports_type;
+
+        virtual void enqueue( kernel_input_tuple &ip, output_ports_type &op, const streaming_node &n ) = 0;
+        virtual void send( device_type d ) = 0;
+        virtual args_storage_base *clone() const = 0;
+        virtual ~args_storage_base () {}
+
+    protected:
+        args_storage_base( const kernel_type& kernel, StreamFactory &f )
+            : my_kernel( kernel ), my_factory( f )
+        {}
+
+        args_storage_base( const args_storage_base &k )
+            : tbb::internal::no_copy(), my_kernel( k.my_kernel ), my_factory( k.my_factory )
+        {}
+
+        const kernel_type my_kernel;
+        StreamFactory &my_factory;
+    };
+
+    template <typename... Args>
+    class args_storage : public args_storage_base {
+        typedef typename args_storage_base::output_ports_type output_ports_type;
+
+        // ---------- Update events helpers ---------- //
+        template <int N>
+        bool do_try_put( const kernel_input_tuple& ip, output_ports_type &op ) const {
+            const auto& t = get<N + 1>( ip );
+            auto &port = get<N>( op );
+            return port.try_put( t );
+        }
+
+        template <int... S>
+        bool do_try_put( const kernel_input_tuple& ip, output_ports_type &op, internal::sequence<S...> ) const {
+            return internal::or_return_values( do_try_put<S>( ip, op )... );
+        }
+
+        // ------------------------------------------- //
+        class run_kernel_func : tbb::internal::no_assign {
+        public:
+            run_kernel_func( kernel_input_tuple &ip, const streaming_node &node, const args_storage& storage )
+                : my_kernel_func( ip, node, storage, get<0>(ip).device() ) {}
+
+            // It is immpossible to use Args... because a function pointer cannot be casted to a function reference implicitly.
+            // Allow the compiler to deduce types for function pointers automatically.
+            template <typename... FnArgs>
+            void operator()( FnArgs&... args ) {
+                internal::convert_and_call_impl<FnArgs...>::doit( my_kernel_func, my_kernel_func.my_ip, args... );
+            }
+        private:
+            struct kernel_func : tbb::internal::no_copy {
+                kernel_input_tuple &my_ip;
+                const streaming_node &my_node;
+                const args_storage& my_storage;
+                device_type my_device;
+
+                kernel_func( kernel_input_tuple &ip, const streaming_node &node, const args_storage& storage, device_type device )
+                    : my_ip( ip ), my_node( node ), my_storage( storage ), my_device( device )
+                {}
+
+                template <typename... FnArgs>
+                void operator()( FnArgs&... args ) {
+                    my_node.enqueue_kernel( my_ip, my_storage.my_factory, my_device, my_storage.my_kernel, args... );
+                }
+            } my_kernel_func;
+        };
+
+        template<typename FinalizeFn>
+        class run_finalize_func : tbb::internal::no_assign {
+        public:
+            run_finalize_func( kernel_input_tuple &ip, StreamFactory &factory, FinalizeFn fn )
+                : my_ip( ip ), my_finalize_func( factory, get<0>(ip).device(), fn ) {}
+
+            // It is immpossible to use Args... because a function pointer cannot be casted to a function reference implicitly.
+            // Allow the compiler to deduce types for function pointers automatically.
+            template <typename... FnArgs>
+            void operator()( FnArgs&... args ) {
+                internal::convert_and_call_impl<FnArgs...>::doit( my_finalize_func, my_ip, args... );
+            }
+        private:
+            kernel_input_tuple &my_ip;
+
+            struct finalize_func : tbb::internal::no_assign {
+                StreamFactory &my_factory;
+                device_type my_device;
+                FinalizeFn my_fn;
+
+                finalize_func( StreamFactory &factory, device_type device, FinalizeFn fn )
+                    : my_factory(factory), my_device(device), my_fn(fn) {}
+
+                template <typename... FnArgs>
+                void operator()( FnArgs&... args ) {
+                    my_factory.finalize( my_device, my_fn, args... );
+                }
+            } my_finalize_func;
+        };
+
+        template<typename FinalizeFn>
+        static run_finalize_func<FinalizeFn> make_run_finalize_func( kernel_input_tuple &ip, StreamFactory &factory, FinalizeFn fn ) {
+            return run_finalize_func<FinalizeFn>( ip, factory, fn );
+        }
+
+        class send_func : tbb::internal::no_assign {
+        public:
+            send_func( StreamFactory &factory, device_type d )
+                : my_factory(factory), my_device( d ) {}
+
+            template <typename... FnArgs>
+            void operator()( FnArgs&... args ) {
+                my_factory.send_data( my_device, args... );
+            }
+        private:
+            StreamFactory &my_factory;
+            device_type my_device;
+        };
+
+    public:
+        args_storage( const kernel_type& kernel, StreamFactory &f, Args&&... args )
+            : args_storage_base( kernel, f )
+            , my_args_pack( std::forward<Args>(args)... )
+        {}
+
+        args_storage( const args_storage &k ) : args_storage_base( k ), my_args_pack( k.my_args_pack ) {}
+
+        args_storage( const args_storage_base &k, Args&&... args ) : args_storage_base( k ), my_args_pack( std::forward<Args>(args)... ) {}
+
+        void enqueue( kernel_input_tuple &ip, output_ports_type &op, const streaming_node &n ) __TBB_override {
+            // Make const qualified args_pack (from non-const)
+            const args_pack_type& const_args_pack = my_args_pack;
+            // factory.enqure_kernel() gets
+            //  - 'ip' tuple elements by reference and updates it (and 'ip') with dependencies
+            //  - arguments (from my_args_pack) by const-reference via const_args_pack
+            tbb::internal::call( run_kernel_func( ip, n, *this ), const_args_pack );
+
+            if (! do_try_put( ip, op, input_sequence() ) ) {
+                graph& g = n.my_graph;
+                // No one message was passed to successors so set a callback to extend the graph lifetime until the kernel completion.
+                g.increment_wait_count();
+
+                // factory.finalize() gets
+                //  - 'ip' tuple elements by reference, so 'ip' might be changed
+                //  - arguments (from my_args_pack) by const-reference via const_args_pack
+                tbb::internal::call( make_run_finalize_func(ip, this->my_factory, [&g] {
+                    g.decrement_wait_count();
+                }), const_args_pack );
+            }
+        }
+
+        void send( device_type d ) __TBB_override {
+            // factory.send() gets arguments by reference and updates these arguments with dependencies
+            // (it gets but usually ignores port_ref-s)
+            tbb::internal::call( send_func( this->my_factory, d ), my_args_pack );
+        }
+
+        args_storage_base *clone() const __TBB_override {
+            // Create new args_storage with copying constructor.
+            return new args_storage<Args...>( *this );
+        }
+
+    private:
+        typedef tbb::internal::stored_pack<Args...> args_pack_type;
+        args_pack_type my_args_pack;
+    };
+
+    // Body for kernel_multifunction_node.
+    class kernel_body : tbb::internal::no_assign {
+    public:
+        kernel_body( const streaming_node &node ) : my_node( node ) {}
+
+        void operator()( kernel_input_tuple ip, typename args_storage_base::output_ports_type &op ) {
+            __TBB_ASSERT( (my_node.my_args_storage != NULL), "No arguments storage" );
+            // 'ip' is passed by value to create local copy for updating inside enqueue_kernel()
+            my_node.my_args_storage->enqueue( ip, op, my_node );
+        }
+    private:
+        const streaming_node &my_node;
+    };
+
+    template <typename T, typename U = typename internal::is_port_ref<T>::type >
+    struct wrap_to_async {
+        typedef T type; // Keep port_ref as it is
+    };
+
+    template <typename T>
+    struct wrap_to_async<T, std::false_type> {
+        typedef typename StreamFactory::template async_msg_type< typename tbb::internal::strip<T>::type > type;
+    };
+
+    template <typename... Args>
+    args_storage_base *make_args_storage(const args_storage_base& storage, Args&&... args) const {
+        // In this variadic template convert all simple types 'T' into 'async_msg_type<T>'
+        return new args_storage<Args...>(storage, std::forward<Args>(args)...);
+    }
+
+    void notify_new_device( device_type d ) {
+        my_args_storage->send( d );
+    }
+
+    template <typename ...Args>
+    void enqueue_kernel( kernel_input_tuple& ip, StreamFactory& factory, device_type device, const kernel_type& kernel, Args&... args ) const {
+        this->enqueue_kernel_impl( ip, factory, device, kernel, args... );
+    }
+
+public:
+    template <typename DeviceSelector>
+    streaming_node( graph &g, const kernel_type& kernel, DeviceSelector d, StreamFactory &f )
+        : base_type( g )
+        , my_indexer_node( g )
+        , my_device_selector( new device_selector<DeviceSelector>( d, *this, f ) )
+        , my_device_selector_node( g, serial, device_selector_body( my_device_selector ) )
+        , my_join_node( g )
+        , my_kernel_node( g, serial, kernel_body( *this ) )
+        // By default, streaming_node maps all its ports to the kernel arguments on a one-to-one basis.
+        , my_args_storage( make_args_storage( args_storage<>(kernel, f), port_ref<0, NUM_INPUTS - 1>() ) )
+    {
+        base_type::set_external_ports( get_input_ports(), get_output_ports() );
+        make_edges();
+    }
+
+    streaming_node( const streaming_node &node )
+        : base_type( node.my_graph )
+        , my_indexer_node( node.my_indexer_node )
+        , my_device_selector( node.my_device_selector->clone( *this ) )
+        , my_device_selector_node( node.my_graph, serial, device_selector_body( my_device_selector ) )
+        , my_join_node( node.my_join_node )
+        , my_kernel_node( node.my_graph, serial, kernel_body( *this ) )
+        , my_args_storage( node.my_args_storage->clone() )
+    {
+        base_type::set_external_ports( get_input_ports(), get_output_ports() );
+        make_edges();
+    }
+
+    streaming_node( streaming_node &&node )
+        : base_type( node.my_graph )
+        , my_indexer_node( std::move( node.my_indexer_node ) )
+        , my_device_selector( node.my_device_selector->clone(*this) )
+        , my_device_selector_node( node.my_graph, serial, device_selector_body( my_device_selector ) )
+        , my_join_node( std::move( node.my_join_node ) )
+        , my_kernel_node( node.my_graph, serial, kernel_body( *this ) )
+        , my_args_storage( node.my_args_storage )
+    {
+        base_type::set_external_ports( get_input_ports(), get_output_ports() );
+        make_edges();
+        // Set moving node mappers to NULL to prevent double deallocation.
+        node.my_args_storage = NULL;
+    }
+
+    ~streaming_node() {
+        if ( my_args_storage ) delete my_args_storage;
+        if ( my_device_selector ) delete my_device_selector;
+    }
+
+    template <typename... Args>
+    void set_args( Args&&... args ) {
+        // Copy the base class of args_storage and create new storage for "Args...".
+        args_storage_base * const new_args_storage = make_args_storage( *my_args_storage, typename wrap_to_async<Args>::type(std::forward<Args>(args))...);
+        delete my_args_storage;
+        my_args_storage = new_args_storage;
+    }
+
+protected:
+    void reset_node( reset_flags = rf_reset_protocol ) __TBB_override { __TBB_ASSERT( false, "Not implemented yet" ); }
+
+private:
+    indexer_node_type my_indexer_node;
+    device_selector_base *my_device_selector;
+    device_selector_node my_device_selector_node;
+    join_node<kernel_input_tuple, JP> my_join_node;
+    kernel_multifunction_node my_kernel_node;
+
+    args_storage_base *my_args_storage;
+};
+
+#endif // __TBB_PREVIEW_STREAMING_NODE
+#endif // __TBB_flow_graph_streaming_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_tagged_buffer_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_tagged_buffer_impl.h
@@ -0,0 +1,249 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+// a hash table buffer that can expand, and can support as many deletions as
+// additions, list-based, with elements of list held in array (for destruction
+// management), multiplicative hashing (like ets).  No synchronization built-in.
+//
+
+#ifndef __TBB__flow_graph_hash_buffer_impl_H
+#define __TBB__flow_graph_hash_buffer_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interfaceX::internal
+
+// elements in the table are a simple list; we need pointer to next element to
+// traverse the chain
+template<typename ValueType>
+struct buffer_element_type {
+    // the second parameter below is void * because we can't forward-declare the type
+    // itself, so we just reinterpret_cast below.
+    typedef typename aligned_pair<ValueType, void *>::type type;
+};
+
+template
+    <
+     typename Key,         // type of key within ValueType
+     typename ValueType,
+     typename ValueToKey,  // abstract method that returns "const Key" or "const Key&" given ValueType
+     typename HashCompare, // has hash and equal
+     typename Allocator=tbb::cache_aligned_allocator< typename aligned_pair<ValueType, void *>::type >
+    >
+class hash_buffer : public HashCompare {
+public:
+    static const size_t INITIAL_SIZE = 8;  // initial size of the hash pointer table
+    typedef ValueType value_type;
+    typedef typename buffer_element_type< value_type >::type element_type;
+    typedef value_type *pointer_type;
+    typedef element_type *list_array_type;  // array we manage manually
+    typedef list_array_type *pointer_array_type;
+    typedef typename Allocator::template rebind<list_array_type>::other pointer_array_allocator_type;
+    typedef typename Allocator::template rebind<element_type>::other elements_array_allocator;
+    typedef typename tbb::internal::strip<Key>::type Knoref;
+
+private:
+    ValueToKey *my_key;
+    size_t my_size;
+    size_t nelements;
+    pointer_array_type pointer_array;    // pointer_array[my_size]
+    list_array_type elements_array;      // elements_array[my_size / 2]
+    element_type* free_list;
+
+    size_t mask() { return my_size - 1; }
+
+    void set_up_free_list( element_type **p_free_list, list_array_type la, size_t sz) {
+        for(size_t i=0; i < sz - 1; ++i ) {  // construct free list
+            la[i].second = &(la[i+1]);
+        }
+        la[sz-1].second = NULL;
+        *p_free_list = (element_type *)&(la[0]);
+    }
+
+    // cleanup for exceptions
+    struct DoCleanup {
+        pointer_array_type *my_pa;
+        list_array_type *my_elements;
+        size_t my_size;
+
+        DoCleanup(pointer_array_type &pa, list_array_type &my_els, size_t sz) :
+            my_pa(&pa), my_elements(&my_els), my_size(sz) {  }
+        ~DoCleanup() {
+            if(my_pa) {
+                size_t dont_care = 0;
+                internal_free_buffer(*my_pa, *my_elements, my_size, dont_care);
+            }
+        }
+    };
+
+    // exception-safety requires we do all the potentially-throwing operations first
+    void grow_array() {
+        size_t new_size = my_size*2;
+        size_t new_nelements = nelements;  // internal_free_buffer zeroes this
+        list_array_type new_elements_array = NULL;
+        pointer_array_type new_pointer_array = NULL;
+        list_array_type new_free_list = NULL;
+        {
+            DoCleanup my_cleanup(new_pointer_array, new_elements_array, new_size);
+            new_elements_array = elements_array_allocator().allocate(my_size);
+            new_pointer_array = pointer_array_allocator_type().allocate(new_size);
+            for(size_t i=0; i < new_size; ++i) new_pointer_array[i] = NULL;
+            set_up_free_list(&new_free_list, new_elements_array, my_size );
+
+            for(size_t i=0; i < my_size; ++i) {
+                for( element_type* op = pointer_array[i]; op; op = (element_type *)(op->second)) {
+                    value_type *ov = reinterpret_cast<value_type *>(&(op->first));
+                    // could have std::move semantics
+                    internal_insert_with_key(new_pointer_array, new_size, new_free_list, *ov);
+                }
+            }
+            my_cleanup.my_pa = NULL;
+            my_cleanup.my_elements = NULL;
+        }
+
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+        free_list = new_free_list;
+        pointer_array = new_pointer_array;
+        elements_array = new_elements_array;
+        my_size = new_size;
+        nelements = new_nelements;
+    }
+
+    // v should have perfect forwarding if std::move implemented.
+    // we use this method to move elements in grow_array, so can't use class fields
+    void internal_insert_with_key( element_type **p_pointer_array, size_t p_sz, list_array_type &p_free_list,
+            const value_type &v) {
+        size_t l_mask = p_sz-1;
+        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
+        size_t h = this->hash((*my_key)(v)) & l_mask;
+        __TBB_ASSERT(p_free_list, "Error: free list not set up.");
+        element_type* my_elem = p_free_list; p_free_list = (element_type *)(p_free_list->second);
+        (void) new(&(my_elem->first)) value_type(v);
+        my_elem->second = p_pointer_array[h];
+        p_pointer_array[h] = my_elem;
+    }
+
+    void internal_initialize_buffer() {
+        pointer_array = pointer_array_allocator_type().allocate(my_size);
+        for(size_t i = 0; i < my_size; ++i) pointer_array[i] = NULL;
+        elements_array = elements_array_allocator().allocate(my_size / 2);
+        set_up_free_list(&free_list, elements_array, my_size / 2);
+    }
+
+    // made static so an enclosed class can use to properly dispose of the internals
+    static void internal_free_buffer( pointer_array_type &pa, list_array_type &el, size_t &sz, size_t &ne ) {
+        if(pa) {
+            for(size_t i = 0; i < sz; ++i ) {
+                element_type *p_next;
+                for( element_type *p = pa[i]; p; p = p_next) {
+                    p_next = (element_type *)p->second;
+                    internal::punned_cast<value_type *>(&(p->first))->~value_type();
+                }
+            }
+            pointer_array_allocator_type().deallocate(pa, sz);
+            pa = NULL;
+        }
+        // Separate test (if allocation of pa throws, el may be allocated.
+        // but no elements will be constructed.)
+        if(el) {
+            elements_array_allocator().deallocate(el, sz / 2);
+            el = NULL;
+        }
+        sz = INITIAL_SIZE;
+        ne = 0;
+    }
+
+public:
+    hash_buffer() : my_key(NULL), my_size(INITIAL_SIZE), nelements(0) {
+        internal_initialize_buffer();
+    }
+
+    ~hash_buffer() {
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+        if(my_key) delete my_key;
+    }
+
+    void reset() {
+        internal_free_buffer(pointer_array, elements_array, my_size, nelements);
+        internal_initialize_buffer();
+    }
+
+    // Take ownership of func object allocated with new.
+    // This method is only used internally, so can't be misused by user.
+    void set_key_func(ValueToKey *vtk) { my_key = vtk; }
+    // pointer is used to clone()
+    ValueToKey* get_key_func() { return my_key; }
+
+    bool insert_with_key(const value_type &v) {
+        pointer_type p = NULL;
+        __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
+        if(find_ref_with_key((*my_key)(v), p)) {
+            p->~value_type();
+            (void) new(p) value_type(v);  // copy-construct into the space
+            return false;
+        }
+        ++nelements;
+        if(nelements*2 > my_size) grow_array();
+        internal_insert_with_key(pointer_array, my_size, free_list, v);
+        return true;
+    }
+
+    // returns true and sets v to array element if found, else returns false.
+    bool find_ref_with_key(const Knoref& k, pointer_type &v) {
+        size_t i = this->hash(k) & mask();
+        for(element_type* p = pointer_array[i]; p; p = (element_type *)(p->second)) {
+            pointer_type pv = reinterpret_cast<pointer_type>(&(p->first));
+            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
+            if(this->equal((*my_key)(*pv), k)) {
+                v = pv;
+                return true;
+            }
+        }
+        return false;
+    }
+
+    bool find_with_key( const Knoref& k, value_type &v) {
+        value_type *p;
+        if(find_ref_with_key(k, p)) {
+            v = *p;
+            return true;
+        }
+        else
+            return false;
+    }
+
+    void delete_with_key(const Knoref& k) {
+        size_t h = this->hash(k) & mask();
+        element_type* prev = NULL;
+        for(element_type* p = pointer_array[h]; p; prev = p, p = (element_type *)(p->second)) {
+            value_type *vp = reinterpret_cast<value_type *>(&(p->first));
+            __TBB_ASSERT(my_key, "Error: value-to-key functor not provided");
+            if(this->equal((*my_key)(*vp), k)) {
+                vp->~value_type();
+                if(prev) prev->second = p->second;
+                else pointer_array[h] = (element_type *)(p->second);
+                p->second = free_list;
+                free_list = p;
+                --nelements;
+                return;
+            }
+        }
+        __TBB_ASSERT(false, "key not found for delete");
+    }
+};
+#endif // __TBB__flow_graph_hash_buffer_impl_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_trace_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_trace_impl.h
@@ -0,0 +1,364 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef _FGT_GRAPH_TRACE_IMPL_H
+#define _FGT_GRAPH_TRACE_IMPL_H
+
+#include "../tbb_profiling.h"
+#if (_MSC_VER >= 1900)
+    #include <intrin.h>
+#endif
+
+namespace tbb {
+    namespace internal {
+
+#if TBB_USE_THREADING_TOOLS
+    #if TBB_PREVIEW_FLOW_GRAPH_TRACE
+        #if (_MSC_VER >= 1900)
+            #define CODEPTR() (_ReturnAddress())
+        #elif __TBB_GCC_VERSION >= 40800
+            #define CODEPTR() ( __builtin_return_address(0))
+        #else
+            #define CODEPTR() NULL
+        #endif
+    #else
+        #define CODEPTR() NULL
+    #endif /* TBB_PREVIEW_FLOW_GRAPH_TRACE */
+
+static inline void fgt_alias_port(void *node, void *p, bool visible) {
+    if(visible)
+        itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_NODE );
+    else
+        itt_relation_add( ITT_DOMAIN_FLOW, p, FLOW_NODE, __itt_relation_is_child_of, node, FLOW_NODE );
+}
+
+static inline void fgt_composite ( void* codeptr, void *node, void *graph ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_COMPOSITE_NODE );
+    suppress_unused_warning( codeptr );
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+    if (codeptr != NULL) {
+        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
+    }
+#endif
+}
+
+static inline void fgt_internal_alias_input_port( void *node, void *p, string_index name_index ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
+    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_INPUT_PORT );
+}
+
+static inline void fgt_internal_alias_output_port( void *node, void *p, string_index name_index ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index );
+    itt_relation_add( ITT_DOMAIN_FLOW, node, FLOW_NODE, __itt_relation_is_parent_of, p, FLOW_OUTPUT_PORT );
+}
+
+template<typename InputType>
+void alias_input_port(void *node, tbb::flow::receiver<InputType>* port, string_index name_index) {
+    // TODO: Make fgt_internal_alias_input_port a function template?
+    fgt_internal_alias_input_port( node, port, name_index);
+}
+
+template < typename PortsTuple, int N >
+struct fgt_internal_input_alias_helper {
+    static void alias_port( void *node, PortsTuple &ports ) {
+        alias_input_port( node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_INPUT_PORT_0 + N - 1) );
+        fgt_internal_input_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
+    }
+};
+
+template < typename PortsTuple >
+struct fgt_internal_input_alias_helper<PortsTuple, 0> {
+    static void alias_port( void * /* node */, PortsTuple & /* ports */ ) { }
+};
+
+template<typename OutputType>
+void alias_output_port(void *node, tbb::flow::sender<OutputType>* port, string_index name_index) {
+    // TODO: Make fgt_internal_alias_output_port a function template?
+    fgt_internal_alias_output_port( node, static_cast<void *>(port), name_index);
+}
+
+template < typename PortsTuple, int N >
+struct fgt_internal_output_alias_helper {
+    static void alias_port( void *node, PortsTuple &ports ) {
+        alias_output_port( node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
+        fgt_internal_output_alias_helper<PortsTuple, N-1>::alias_port( node, ports );
+    }
+};
+
+template < typename PortsTuple >
+struct fgt_internal_output_alias_helper<PortsTuple, 0> {
+    static void alias_port( void * /*node*/, PortsTuple &/*ports*/ ) {
+    }
+};
+
+static inline void fgt_internal_create_input_port( void *node, void *p, string_index name_index ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, p, FLOW_INPUT_PORT, node, FLOW_NODE, name_index );
+}
+
+static inline void fgt_internal_create_output_port( void* codeptr, void *node, void *p, string_index name_index ) {
+    itt_make_task_group(ITT_DOMAIN_FLOW, p, FLOW_OUTPUT_PORT, node, FLOW_NODE, name_index);
+    suppress_unused_warning( codeptr );
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+    if (codeptr != NULL) {
+        register_node_addr(ITT_DOMAIN_FLOW, node, FLOW_NODE, CODE_ADDRESS, &codeptr);
+    }
+#endif
+}
+
+template<typename InputType>
+void register_input_port(void *node, tbb::flow::receiver<InputType>* port, string_index name_index) {
+    // TODO: Make fgt_internal_create_input_port a function template?
+    // In C++03 dependent name lookup from the template definition context
+    // works only for function declarations with external linkage:
+    // http://www.open-std.org/JTC1/SC22/WG21/docs/cwg_defects.html#561
+    fgt_internal_create_input_port(node, static_cast<void*>(port), name_index);
+}
+
+template < typename PortsTuple, int N >
+struct fgt_internal_input_helper {
+    static void register_port( void *node, PortsTuple &ports ) {
+        register_input_port( node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_INPUT_PORT_0 + N - 1) );
+        fgt_internal_input_helper<PortsTuple, N-1>::register_port( node, ports );
+    }
+};
+
+template < typename PortsTuple >
+struct fgt_internal_input_helper<PortsTuple, 1> {
+    static void register_port( void *node, PortsTuple &ports ) {
+        register_input_port( node, &(tbb::flow::get<0>(ports)), FLOW_INPUT_PORT_0 );
+    }
+};
+
+template<typename OutputType>
+void register_output_port(void* codeptr, void *node, tbb::flow::sender<OutputType>* port, string_index name_index) {
+    // TODO: Make fgt_internal_create_output_port a function template?
+    fgt_internal_create_output_port( codeptr, node, static_cast<void *>(port), name_index);
+}
+
+template < typename PortsTuple, int N >
+struct fgt_internal_output_helper {
+    static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
+        register_output_port( codeptr, node, &(tbb::flow::get<N-1>(ports)), static_cast<tbb::internal::string_index>(FLOW_OUTPUT_PORT_0 + N - 1) );
+        fgt_internal_output_helper<PortsTuple, N-1>::register_port( codeptr, node, ports );
+    }
+};
+
+template < typename PortsTuple >
+struct fgt_internal_output_helper<PortsTuple,1> {
+    static void register_port( void* codeptr, void *node, PortsTuple &ports ) {
+        register_output_port( codeptr, node, &(tbb::flow::get<0>(ports)), FLOW_OUTPUT_PORT_0 );
+    }
+};
+
+template< typename NodeType >
+void fgt_multioutput_node_desc( const NodeType *node, const char *desc ) {
+    void *addr =  (void *)( static_cast< tbb::flow::receiver< typename NodeType::input_type > * >(const_cast< NodeType *>(node)) );
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
+}
+
+template< typename NodeType >
+void fgt_multiinput_multioutput_node_desc( const NodeType *node, const char *desc ) {
+    void *addr =  const_cast<NodeType *>(node);
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
+}
+
+template< typename NodeType >
+static inline void fgt_node_desc( const NodeType *node, const char *desc ) {
+    void *addr =  (void *)( static_cast< tbb::flow::sender< typename NodeType::output_type > * >(const_cast< NodeType *>(node)) );
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, addr, FLOW_NODE, FLOW_OBJECT_NAME, desc );
+}
+
+static inline void fgt_graph_desc( void *g, const char *desc ) {
+    itt_metadata_str_add( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, FLOW_OBJECT_NAME, desc );
+}
+
+static inline void fgt_body( void *node, void *body ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, body, FLOW_BODY, __itt_relation_is_child_of, node, FLOW_NODE );
+}
+
+template< int N, typename PortsTuple >
+static inline void fgt_multioutput_node(void* codeptr, string_index t, void *g, void *input_port, PortsTuple &ports ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
+    fgt_internal_output_helper<PortsTuple, N>::register_port(codeptr, input_port, ports );
+}
+
+template< int N, typename PortsTuple >
+static inline void fgt_multioutput_node_with_body( void* codeptr, string_index t, void *g, void *input_port, PortsTuple &ports, void *body ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, input_port, FLOW_NODE, g, FLOW_GRAPH, t );
+    fgt_internal_create_input_port( input_port, input_port, FLOW_INPUT_PORT_0 );
+    fgt_internal_output_helper<PortsTuple, N>::register_port( codeptr, input_port, ports );
+    fgt_body( input_port, body );
+}
+
+template< int N, typename PortsTuple >
+static inline void fgt_multiinput_node( void* codeptr, string_index t, void *g, PortsTuple &ports, void *output_port) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
+    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
+    fgt_internal_input_helper<PortsTuple, N>::register_port( output_port, ports );
+}
+
+static inline void fgt_multiinput_multioutput_node( void* codeptr, string_index t, void *n, void *g ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, n, FLOW_NODE, g, FLOW_GRAPH, t );
+    suppress_unused_warning( codeptr );
+#if TBB_PREVIEW_FLOW_GRAPH_TRACE
+    if (codeptr != NULL) {
+        register_node_addr(ITT_DOMAIN_FLOW, n, FLOW_NODE, CODE_ADDRESS, &codeptr);
+    }
+#endif
+}
+
+static inline void fgt_node( void* codeptr, string_index t, void *g, void *output_port ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
+    fgt_internal_create_output_port( codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
+}
+
+static void fgt_node_with_body( void* codeptr, string_index t, void *g, void *output_port, void *body ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, output_port, FLOW_NODE, g, FLOW_GRAPH, t );
+    fgt_internal_create_output_port(codeptr, output_port, output_port, FLOW_OUTPUT_PORT_0 );
+    fgt_body( output_port, body );
+}
+
+static inline void fgt_node( void* codeptr, string_index t, void *g, void *input_port, void *output_port ) {
+    fgt_node( codeptr, t, g, output_port );
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
+}
+
+static inline void  fgt_node_with_body( void* codeptr, string_index t, void *g, void *input_port, void *output_port, void *body ) {
+    fgt_node_with_body( codeptr, t, g, output_port, body );
+    fgt_internal_create_input_port( output_port, input_port, FLOW_INPUT_PORT_0 );
+}
+
+
+static inline void  fgt_node( void* codeptr, string_index t, void *g, void *input_port, void *decrement_port, void *output_port ) {
+    fgt_node( codeptr, t, g, input_port, output_port );
+    fgt_internal_create_input_port( output_port, decrement_port, FLOW_INPUT_PORT_1 );
+}
+
+static inline void fgt_make_edge( void *output_port, void *input_port ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_predecessor_to, input_port, FLOW_INPUT_PORT);
+}
+
+static inline void fgt_remove_edge( void *output_port, void *input_port ) {
+    itt_relation_add( ITT_DOMAIN_FLOW, output_port, FLOW_OUTPUT_PORT, __itt_relation_is_sibling_of, input_port, FLOW_INPUT_PORT);
+}
+
+static inline void fgt_graph( void *g ) {
+    itt_make_task_group( ITT_DOMAIN_FLOW, g, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_GRAPH );
+}
+
+static inline void fgt_begin_body( void *body ) {
+    itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, NULL, FLOW_NULL, FLOW_BODY );
+}
+
+static inline void fgt_end_body( void * ) {
+    itt_task_end( ITT_DOMAIN_FLOW );
+}
+
+static inline void fgt_async_try_put_begin( void *node, void *port ) {
+    itt_task_begin( ITT_DOMAIN_FLOW, port, FLOW_OUTPUT_PORT, node, FLOW_NODE, FLOW_OUTPUT_PORT );
+}
+
+static inline void fgt_async_try_put_end( void *, void * ) {
+    itt_task_end( ITT_DOMAIN_FLOW );
+}
+
+static inline void fgt_async_reserve( void *node, void *graph ) {
+    itt_region_begin( ITT_DOMAIN_FLOW, node, FLOW_NODE, graph, FLOW_GRAPH, FLOW_NULL );
+}
+
+static inline void fgt_async_commit( void *node, void * /*graph*/) {
+    itt_region_end( ITT_DOMAIN_FLOW, node, FLOW_NODE );
+}
+
+static inline void fgt_reserve_wait( void *graph ) {
+    itt_region_begin( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH, NULL, FLOW_NULL, FLOW_NULL );
+}
+
+static inline void fgt_release_wait( void *graph ) {
+    itt_region_end( ITT_DOMAIN_FLOW, graph, FLOW_GRAPH );
+}
+
+#else // TBB_USE_THREADING_TOOLS
+
+#define CODEPTR() NULL
+
+static inline void fgt_alias_port(void * /*node*/, void * /*p*/, bool /*visible*/ ) { }
+
+static inline void fgt_composite ( void* /*codeptr*/, void * /*node*/, void * /*graph*/ ) { }
+
+static inline void fgt_graph( void * /*g*/ ) { }
+
+template< typename NodeType >
+static inline void fgt_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
+
+template< typename NodeType >
+static inline void fgt_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
+
+static inline void fgt_graph_desc( void * /*g*/, const char * /*desc*/ ) { }
+
+static inline void fgt_body( void * /*node*/, void * /*body*/ ) { }
+
+template< int N, typename PortsTuple >
+static inline void fgt_multioutput_node( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/ ) { }
+
+template< int N, typename PortsTuple >
+static inline void fgt_multioutput_node_with_body( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*input_port*/, PortsTuple & /*ports*/, void * /*body*/ ) { }
+
+template< int N, typename PortsTuple >
+static inline void fgt_multiinput_node( void* /*codeptr*/, string_index /*t*/, void * /*g*/, PortsTuple & /*ports*/, void * /*output_port*/ ) { }
+
+static inline void fgt_multiinput_multioutput_node( void* /*codeptr*/, string_index /*t*/, void * /*node*/, void * /*graph*/ ) { }
+
+static inline void fgt_node( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*output_port*/ ) { }
+static inline void fgt_node( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/ ) { }
+static inline void  fgt_node( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*decrement_port*/, void * /*output_port*/ ) { }
+
+static inline void fgt_node_with_body( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*output_port*/, void * /*body*/ ) { }
+static inline void fgt_node_with_body( void* /*codeptr*/, string_index /*t*/, void * /*g*/, void * /*input_port*/, void * /*output_port*/, void * /*body*/ ) { }
+
+static inline void fgt_make_edge( void * /*output_port*/, void * /*input_port*/ ) { }
+static inline void fgt_remove_edge( void * /*output_port*/, void * /*input_port*/ ) { }
+
+static inline void fgt_begin_body( void * /*body*/ ) { }
+static inline void fgt_end_body( void *  /*body*/) { }
+
+static inline void fgt_async_try_put_begin( void * /*node*/, void * /*port*/ ) { }
+static inline void fgt_async_try_put_end( void * /*node*/ , void * /*port*/ ) { }
+static inline void fgt_async_reserve( void * /*node*/, void * /*graph*/ ) { }
+static inline void fgt_async_commit( void * /*node*/, void * /*graph*/ ) { }
+static inline void fgt_reserve_wait( void * /*graph*/ ) { }
+static inline void fgt_release_wait( void * /*graph*/ ) { }
+
+template< typename NodeType >
+void fgt_multiinput_multioutput_node_desc( const NodeType * /*node*/, const char * /*desc*/ ) { }
+
+template < typename PortsTuple, int N >
+struct fgt_internal_input_alias_helper {
+    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
+};
+
+template < typename PortsTuple, int N >
+struct fgt_internal_output_alias_helper {
+    static void alias_port( void * /*node*/, PortsTuple & /*ports*/ ) { }
+};
+
+#endif // TBB_USE_THREADING_TOOLS
+
+    } // namespace internal
+} // namespace tbb
+
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_types_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_flow_graph_types_impl.h
@@ -0,0 +1,723 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__flow_graph_types_impl_H
+#define __TBB__flow_graph_types_impl_H
+
+#ifndef __TBB_flow_graph_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+// included in namespace tbb::flow::interfaceX
+
+namespace internal {
+
+    // the change to key_matching (adding a K and KHash template parameter, making it a class)
+    // means we have to pass this data to the key_matching_port.  All the ports have only one
+    // template parameter, so we have to wrap the following types in a trait:
+    //
+    //    . K == key_type
+    //    . KHash == hash and compare for Key
+    //    . TtoK == function_body that given an object of T, returns its K
+    //    . T == type accepted by port, and stored in the hash table
+    //
+    // The port will have an additional parameter on node construction, which is a function_body
+    // that accepts a const T& and returns a K which is the field in T which is its K.
+    template<typename Kp, typename KHashp, typename Tp>
+    struct KeyTrait {
+        typedef Kp K;
+        typedef Tp T;
+        typedef internal::type_to_key_function_body<T,K> TtoK;
+        typedef KHashp KHash;
+    };
+
+    // wrap each element of a tuple in a template, and make a tuple of the result.
+    template<int N, template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements;
+
+    // A wrapper that generates the traits needed for each port of a key-matching join,
+    // and the type of the tuple of input ports.
+    template<int N, template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements;
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_VARIADIC_TUPLE_PRESENT
+    template<int N, template<class> class PT,  typename... Args>
+    struct wrap_tuple_elements<N, PT, tbb::flow::tuple<Args...> >{
+        typedef typename tbb::flow::tuple<PT<Args>... > type;
+    };
+
+    template<int N, template<class> class PT, typename KeyTraits, typename... Args>
+    struct wrap_key_tuple_elements<N, PT, KeyTraits, tbb::flow::tuple<Args...> > {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef typename tbb::flow::tuple<PT<KeyTrait<K, KHash, Args> >... > type;
+    };
+#else
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<1, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<1, PT, KeyTraits, TypeTuple > {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0> > type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<2, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<2, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1> > type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<3, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<3, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2> > type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<4, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<4, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>,
+                PT<KeyTrait3> > type;
+    };
+
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<5, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<5, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>,
+                PT<KeyTrait3>, PT<KeyTrait4> > type;
+    };
+
+#if __TBB_VARIADIC_MAX >= 6
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<6, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<6, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
+                PT<KeyTrait4>, PT<KeyTrait5> > type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 7
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<7, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<7, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
+                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6> > type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 8
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<8, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<8, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
+                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7> > type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 9
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<9, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<9, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<8,TypeTuple>::type> KeyTrait8;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
+                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7>, PT<KeyTrait8> > type;
+    };
+#endif
+
+#if __TBB_VARIADIC_MAX >= 10
+    template<template<class> class PT, typename TypeTuple>
+    struct wrap_tuple_elements<10, PT, TypeTuple> {
+        typedef typename tbb::flow::tuple<
+                PT<typename tbb::flow::tuple_element<0,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<1,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<2,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<3,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<4,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<5,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<6,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<7,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<8,TypeTuple>::type>,
+                PT<typename tbb::flow::tuple_element<9,TypeTuple>::type> >
+            type;
+    };
+
+    template<template<class> class PT, typename KeyTraits, typename TypeTuple>
+    struct wrap_key_tuple_elements<10, PT, KeyTraits, TypeTuple> {
+        typedef typename KeyTraits::key_type K;
+        typedef typename KeyTraits::hash_compare_type KHash;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<0,TypeTuple>::type> KeyTrait0;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<1,TypeTuple>::type> KeyTrait1;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<2,TypeTuple>::type> KeyTrait2;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<3,TypeTuple>::type> KeyTrait3;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<4,TypeTuple>::type> KeyTrait4;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<5,TypeTuple>::type> KeyTrait5;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<6,TypeTuple>::type> KeyTrait6;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<7,TypeTuple>::type> KeyTrait7;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<8,TypeTuple>::type> KeyTrait8;
+        typedef KeyTrait<K, KHash, typename tbb::flow::tuple_element<9,TypeTuple>::type> KeyTrait9;
+        typedef typename tbb::flow::tuple< PT<KeyTrait0>, PT<KeyTrait1>, PT<KeyTrait2>, PT<KeyTrait3>,
+                PT<KeyTrait4>, PT<KeyTrait5>, PT<KeyTrait6>, PT<KeyTrait7>, PT<KeyTrait8>,
+                PT<KeyTrait9> > type;
+    };
+#endif
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_VARIADIC_TUPLE_PRESENT */
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+    template< int... S > class sequence {};
+
+    template< int N, int... S >
+    struct make_sequence : make_sequence < N - 1, N - 1, S... > {};
+
+    template< int... S >
+    struct make_sequence < 0, S... > {
+        typedef sequence<S...> type;
+    };
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
+
+#if __TBB_INITIALIZER_LISTS_PRESENT
+    // Until C++14 std::initializer_list does not guarantee life time of contained objects.
+    template <typename T>
+    class initializer_list_wrapper {
+    public:
+        typedef T value_type;
+        typedef const T& reference;
+        typedef const T& const_reference;
+        typedef size_t size_type;
+
+        typedef T* iterator;
+        typedef const T* const_iterator;
+
+        initializer_list_wrapper( std::initializer_list<T> il ) __TBB_NOEXCEPT( true ) : my_begin( static_cast<T*>(malloc( il.size()*sizeof( T ) )) ) {
+            iterator dst = my_begin;
+            for ( typename std::initializer_list<T>::const_iterator src = il.begin(); src != il.end(); ++src )
+                new (dst++) T( *src );
+            my_end = dst;
+        }
+
+        initializer_list_wrapper( const initializer_list_wrapper<T>& ilw ) __TBB_NOEXCEPT( true ) : my_begin( static_cast<T*>(malloc( ilw.size()*sizeof( T ) )) ) {
+            iterator dst = my_begin;
+            for ( typename std::initializer_list<T>::const_iterator src = ilw.begin(); src != ilw.end(); ++src )
+                new (dst++) T( *src );
+            my_end = dst;
+        }
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        initializer_list_wrapper( initializer_list_wrapper<T>&& ilw ) __TBB_NOEXCEPT( true ) : my_begin( ilw.my_begin ), my_end( ilw.my_end ) {
+            ilw.my_begin = ilw.my_end = NULL;
+        }
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+
+        ~initializer_list_wrapper() {
+            if ( my_begin )
+                free( my_begin );
+        }
+
+        const_iterator begin() const __TBB_NOEXCEPT(true) { return my_begin; }
+        const_iterator end() const __TBB_NOEXCEPT(true) { return my_end; }
+        size_t size() const __TBB_NOEXCEPT(true) { return (size_t)(my_end - my_begin); }
+
+    private:
+        iterator my_begin;
+        iterator my_end;
+    };
+#endif /* __TBB_INITIALIZER_LISTS_PRESENT */
+
+//! type mimicking std::pair but with trailing fill to ensure each element of an array
+//* will have the correct alignment
+    template<typename T1, typename T2, size_t REM>
+    struct type_plus_align {
+        char first[sizeof(T1)];
+        T2 second;
+        char fill1[REM];
+    };
+
+    template<typename T1, typename T2>
+    struct type_plus_align<T1,T2,0> {
+        char first[sizeof(T1)];
+        T2 second;
+    };
+
+    template<class U> struct alignment_of {
+        typedef struct { char t; U    padded; } test_alignment;
+        static const size_t value = sizeof(test_alignment) - sizeof(U);
+    };
+
+    // T1, T2 are actual types stored.  The space defined for T1 in the type returned
+    // is a char array of the correct size.  Type T2 should be trivially-constructible,
+    // T1 must be explicitly managed.
+    template<typename T1, typename T2>
+    struct aligned_pair {
+        static const size_t t1_align = alignment_of<T1>::value;
+        static const size_t t2_align = alignment_of<T2>::value;
+        typedef type_plus_align<T1, T2, 0 > just_pair;
+        static const size_t max_align = t1_align < t2_align ? t2_align : t1_align;
+        static const size_t extra_bytes = sizeof(just_pair) % max_align;
+        static const size_t remainder = extra_bytes ? max_align - extra_bytes : 0;
+    public:
+        typedef type_plus_align<T1,T2,remainder> type;
+    };  // aligned_pair
+
+// support for variant type
+// type we use when we're not storing a value
+struct default_constructed { };
+
+// type which contains another type, tests for what type is contained, and references to it.
+// internal::Wrapper<T>
+//     void CopyTo( void *newSpace) : builds a Wrapper<T> copy of itself in newSpace
+
+// struct to allow us to copy and test the type of objects
+struct WrapperBase {
+    virtual ~WrapperBase() {}
+    virtual void CopyTo(void* /*newSpace*/) const {  }
+};
+
+// Wrapper<T> contains a T, with the ability to test what T is.  The Wrapper<T> can be
+// constructed from a T, can be copy-constructed from another Wrapper<T>, and can be
+// examined via value(), but not modified.
+template<typename T>
+struct Wrapper: public WrapperBase {
+    typedef T value_type;
+    typedef T* pointer_type;
+private:
+    T value_space;
+public:
+    const value_type &value() const { return value_space; }
+
+private:
+    Wrapper();
+
+    // on exception will ensure the Wrapper will contain only a trivially-constructed object
+    struct _unwind_space {
+        pointer_type space;
+        _unwind_space(pointer_type p) : space(p) {}
+        ~_unwind_space() {
+            if(space) (void) new (space) Wrapper<default_constructed>(default_constructed());
+        }
+    };
+public:
+    explicit Wrapper( const T& other ) : value_space(other) { }
+    explicit Wrapper(const Wrapper& other) : value_space(other.value_space) { }
+
+    void CopyTo(void* newSpace) const __TBB_override {
+        _unwind_space guard((pointer_type)newSpace);
+        (void) new(newSpace) Wrapper(value_space);
+        guard.space = NULL;
+    }
+    ~Wrapper() { }
+};
+
+// specialization for array objects
+template<typename T, size_t N>
+struct Wrapper<T[N]> : public WrapperBase {
+    typedef T value_type;
+    typedef T* pointer_type;
+    // space must be untyped.
+    typedef T ArrayType[N];
+private:
+    // The space is not of type T[N] because when copy-constructing, it would be
+    // default-initialized and then copied to in some fashion, resulting in two
+    // constructions and one destruction per element.  If the type is char[ ], we
+    // placement new into each element, resulting in one construction per element.
+    static const size_t space_size = sizeof(ArrayType) / sizeof(char);
+    char value_space[space_size];
+
+
+    // on exception will ensure the already-built objects will be destructed
+    // (the value_space is a char array, so it is already trivially-destructible.)
+    struct _unwind_class {
+        pointer_type space;
+        int    already_built;
+        _unwind_class(pointer_type p) : space(p), already_built(0) {}
+        ~_unwind_class() {
+            if(space) {
+                for(size_t i = already_built; i > 0 ; --i ) space[i-1].~value_type();
+                (void) new(space) Wrapper<default_constructed>(default_constructed());
+            }
+        }
+    };
+public:
+    const ArrayType &value() const {
+        char *vp = const_cast<char *>(value_space);
+        return reinterpret_cast<ArrayType &>(*vp);
+    }
+
+private:
+    Wrapper();
+public:
+    // have to explicitly construct because other decays to a const value_type*
+    explicit Wrapper(const ArrayType& other) {
+        _unwind_class guard((pointer_type)value_space);
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
+        for(size_t i = 0; i < N; ++i ) {
+            (void) new(vp++) value_type(other[i]);
+            ++(guard.already_built);
+        }
+        guard.space = NULL;
+    }
+    explicit Wrapper(const Wrapper& other) : WrapperBase() {
+        // we have to do the heavy lifting to copy contents
+        _unwind_class guard((pointer_type)value_space);
+        pointer_type dp = reinterpret_cast<pointer_type>(value_space);
+        pointer_type sp = reinterpret_cast<pointer_type>(const_cast<char *>(other.value_space));
+        for(size_t i = 0; i < N; ++i, ++dp, ++sp) {
+            (void) new(dp) value_type(*sp);
+            ++(guard.already_built);
+        }
+        guard.space = NULL;
+    }
+
+    void CopyTo(void* newSpace) const __TBB_override {
+        (void) new(newSpace) Wrapper(*this);  // exceptions handled in copy constructor
+    }
+
+    ~Wrapper() {
+        // have to destroy explicitly in reverse order
+        pointer_type vp = reinterpret_cast<pointer_type>(&value_space);
+        for(size_t i = N; i > 0 ; --i ) vp[i-1].~value_type();
+    }
+};
+
+// given a tuple, return the type of the element that has the maximum alignment requirement.
+// Given a tuple and that type, return the number of elements of the object with the max
+// alignment requirement that is at least as big as the largest object in the tuple.
+
+template<bool, class T1, class T2> struct pick_one;
+template<class T1, class T2> struct pick_one<true , T1, T2> { typedef T1 type; };
+template<class T1, class T2> struct pick_one<false, T1, T2> { typedef T2 type; };
+
+template< template<class> class Selector, typename T1, typename T2 >
+struct pick_max {
+    typedef typename pick_one< (Selector<T1>::value > Selector<T2>::value), T1, T2 >::type type;
+};
+
+template<typename T> struct size_of { static const int value = sizeof(T); };
+
+template< size_t N, class Tuple, template<class> class Selector > struct pick_tuple_max {
+    typedef typename pick_tuple_max<N-1, Tuple, Selector>::type LeftMaxType;
+    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type ThisType;
+    typedef typename pick_max<Selector, LeftMaxType, ThisType>::type type;
+};
+
+template< class Tuple, template<class> class Selector > struct pick_tuple_max<0, Tuple, Selector> {
+    typedef typename tbb::flow::tuple_element<0, Tuple>::type type;
+};
+
+// is the specified type included in a tuple?
+template<class Q, size_t N, class Tuple>
+struct is_element_of {
+    typedef typename tbb::flow::tuple_element<N-1, Tuple>::type T_i;
+    static const bool value = tbb::internal::is_same_type<Q,T_i>::value || is_element_of<Q,N-1,Tuple>::value;
+};
+
+template<class Q, class Tuple>
+struct is_element_of<Q,0,Tuple> {
+    typedef typename tbb::flow::tuple_element<0, Tuple>::type T_i;
+    static const bool value = tbb::internal::is_same_type<Q,T_i>::value;
+};
+
+// allow the construction of types that are listed tuple.  If a disallowed type
+// construction is written, a method involving this type is created.  The
+// type has no definition, so a syntax error is generated.
+template<typename T> struct ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple;
+
+template<typename T, bool BUILD_IT> struct do_if;
+template<typename T>
+struct do_if<T, true> {
+    static void construct(void *mySpace, const T& x) {
+        (void) new(mySpace) Wrapper<T>(x);
+    }
+};
+template<typename T>
+struct do_if<T, false> {
+    static void construct(void * /*mySpace*/, const T& x) {
+        // This method is instantiated when the type T does not match any of the
+        // element types in the Tuple in variant<Tuple>.
+        ERROR_Type_Not_allowed_In_Tagged_Msg_Not_Member_Of_Tuple<T>::bad_type(x);
+    }
+};
+
+// Tuple tells us the allowed types that variant can hold.  It determines the alignment of the space in
+// Wrapper, and how big Wrapper is.
+//
+// the object can only be tested for type, and a read-only reference can be fetched by cast_to<T>().
+
+using tbb::internal::punned_cast;
+struct tagged_null_type {};
+template<typename TagType, typename T0, typename T1=tagged_null_type, typename T2=tagged_null_type, typename T3=tagged_null_type,
+                           typename T4=tagged_null_type, typename T5=tagged_null_type, typename T6=tagged_null_type,
+                           typename T7=tagged_null_type, typename T8=tagged_null_type, typename T9=tagged_null_type>
+class tagged_msg {
+    typedef tbb::flow::tuple<T0, T1, T2, T3, T4
+                  //TODO: Should we reject lists longer than a tuple can hold?
+                  #if __TBB_VARIADIC_MAX >= 6
+                  , T5
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 7
+                  , T6
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 8
+                  , T7
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 9
+                  , T8
+                  #endif
+                  #if __TBB_VARIADIC_MAX >= 10
+                  , T9
+                  #endif
+                  > Tuple;
+
+private:
+    class variant {
+        static const size_t N = tbb::flow::tuple_size<Tuple>::value;
+        typedef typename pick_tuple_max<N, Tuple, alignment_of>::type AlignType;
+        typedef typename pick_tuple_max<N, Tuple, size_of>::type MaxSizeType;
+        static const size_t MaxNBytes = (sizeof(Wrapper<MaxSizeType>)+sizeof(AlignType)-1);
+        static const size_t MaxNElements = MaxNBytes/sizeof(AlignType);
+        typedef typename tbb::aligned_space<AlignType, MaxNElements> SpaceType;
+        SpaceType my_space;
+        static const size_t MaxSize = sizeof(SpaceType);
+
+    public:
+        variant() { (void) new(&my_space) Wrapper<default_constructed>(default_constructed()); }
+
+        template<typename T>
+        variant( const T& x ) {
+            do_if<T, is_element_of<T, N, Tuple>::value>::construct(&my_space,x);
+        }
+
+        variant(const variant& other) {
+            const WrapperBase * h = punned_cast<const WrapperBase *>(&(other.my_space));
+            h->CopyTo(&my_space);
+        }
+
+        // assignment must destroy and re-create the Wrapper type, as there is no way
+        // to create a Wrapper-to-Wrapper assign even if we find they agree in type.
+        void operator=( const variant& rhs ) {
+            if(&rhs != this) {
+                WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
+                h->~WrapperBase();
+                const WrapperBase *ch = punned_cast<const WrapperBase *>(&(rhs.my_space));
+                ch->CopyTo(&my_space);
+            }
+        }
+
+        template<typename U>
+        const U& variant_cast_to() const {
+            const Wrapper<U> *h = dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space));
+            if(!h) {
+                tbb::internal::throw_exception(tbb::internal::eid_bad_tagged_msg_cast);
+            }
+            return h->value();
+        }
+        template<typename U>
+        bool variant_is_a() const { return dynamic_cast<const Wrapper<U>*>(punned_cast<const WrapperBase *>(&my_space)) != NULL; }
+
+        bool variant_is_default_constructed() const {return variant_is_a<default_constructed>();}
+
+        ~variant() {
+            WrapperBase *h = punned_cast<WrapperBase *>(&my_space);
+            h->~WrapperBase();
+        }
+    }; //class variant
+
+    TagType my_tag;
+    variant my_msg;
+
+public:
+    tagged_msg(): my_tag(TagType(~0)), my_msg(){}
+
+    template<typename T, typename R>
+    tagged_msg(T const &index, R const &value) : my_tag(index), my_msg(value) {}
+
+    #if __TBB_CONST_REF_TO_ARRAY_TEMPLATE_PARAM_BROKEN
+    template<typename T, typename R, size_t N>
+    tagged_msg(T const &index,  R (&value)[N]) : my_tag(index), my_msg(value) {}
+    #endif
+
+    void set_tag(TagType const &index) {my_tag = index;}
+    TagType tag() const {return my_tag;}
+
+    template<typename V>
+    const V& cast_to() const {return my_msg.template variant_cast_to<V>();}
+
+    template<typename V>
+    bool is_a() const {return my_msg.template variant_is_a<V>();}
+
+    bool is_default_constructed() const {return my_msg.variant_is_default_constructed();}
+}; //class tagged_msg
+
+// template to simplify cast and test for tagged_msg in template contexts
+template<typename V, typename T>
+const V& cast_to(T const &t) { return t.template cast_to<V>(); }
+
+template<typename V, typename T>
+bool is_a(T const &t) { return t.template is_a<V>(); }
+
+enum op_stat { WAIT = 0, SUCCEEDED, FAILED };
+
+}  // namespace internal
+
+#endif  /* __TBB__flow_graph_types_impl_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_mutex_padding.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_mutex_padding.h
@@ -0,0 +1,98 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_mutex_padding_H
+#define __TBB_mutex_padding_H
+
+// wrapper for padding mutexes to be alone on a cache line, without requiring they be allocated
+// from a pool.  Because we allow them to be defined anywhere they must be two cache lines in size.
+
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+static const size_t cache_line_size = 64;
+
+// Pad a mutex to occupy a number of full cache lines sufficient to avoid false sharing
+// with other data; space overhead is up to 2*cache_line_size-1.
+template<typename Mutex, bool is_rw> class padded_mutex;
+
+template<typename Mutex>
+class padded_mutex<Mutex,false> : tbb::internal::mutex_copy_deprecated_and_disabled {
+    typedef long pad_type;
+    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
+
+    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
+
+public:
+    static const bool is_rw_mutex = Mutex::is_rw_mutex;
+    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
+    static const bool is_fair_mutex = Mutex::is_fair_mutex;
+
+    padded_mutex() { new(impl()) Mutex(); }
+    ~padded_mutex() { impl()->~Mutex(); }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock :  tbb::internal::no_copy {
+        typename Mutex::scoped_lock my_scoped_lock;
+    public:
+        scoped_lock() : my_scoped_lock() {}
+        scoped_lock( padded_mutex& m ) : my_scoped_lock(*m.impl()) { }
+        ~scoped_lock() {  }
+
+        void acquire( padded_mutex& m ) { my_scoped_lock.acquire(*m.impl()); }
+        bool try_acquire( padded_mutex& m ) { return my_scoped_lock.try_acquire(*m.impl()); }
+        void release() { my_scoped_lock.release(); }
+    };
+};
+
+template<typename Mutex>
+class padded_mutex<Mutex,true> : tbb::internal::mutex_copy_deprecated_and_disabled {
+    typedef long pad_type;
+    pad_type my_pad[((sizeof(Mutex)+cache_line_size-1)/cache_line_size+1)*cache_line_size/sizeof(pad_type)];
+
+    Mutex *impl() { return (Mutex *)((uintptr_t(this)|(cache_line_size-1))+1);}
+
+public:
+    static const bool is_rw_mutex = Mutex::is_rw_mutex;
+    static const bool is_recursive_mutex = Mutex::is_recursive_mutex;
+    static const bool is_fair_mutex = Mutex::is_fair_mutex;
+
+    padded_mutex() { new(impl()) Mutex(); }
+    ~padded_mutex() { impl()->~Mutex(); }
+
+    //! Represents acquisition of a mutex.
+    class scoped_lock :  tbb::internal::no_copy {
+        typename Mutex::scoped_lock my_scoped_lock;
+    public:
+        scoped_lock() : my_scoped_lock() {}
+        scoped_lock( padded_mutex& m, bool write = true ) : my_scoped_lock(*m.impl(),write) { }
+        ~scoped_lock() {  }
+
+        void acquire( padded_mutex& m, bool write = true ) { my_scoped_lock.acquire(*m.impl(),write); }
+        bool try_acquire( padded_mutex& m, bool write = true ) { return my_scoped_lock.try_acquire(*m.impl(),write); }
+        bool upgrade_to_writer() { return my_scoped_lock.upgrade_to_writer(); }
+        bool downgrade_to_reader() { return my_scoped_lock.downgrade_to_reader(); }
+        void release() { my_scoped_lock.release(); }
+    };
+};
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* __TBB_mutex_padding_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_node_handle_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_node_handle_impl.h
@@ -0,0 +1,168 @@
+/*
+    Copyright (c) 2019-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_node_handle_H
+#define __TBB_node_handle_H
+
+#include "_allocator_traits.h"
+#include "../tbb_config.h"
+
+
+namespace tbb {
+
+// This classes must be declared here for correct friendly relationship
+// TODO: Consider creation some internal class to access node_handle private fields without any friendly classes
+namespace interface5 {
+namespace internal {
+    template <typename T, typename Allocator>
+    class split_ordered_list;
+    template <typename Traits>
+    class concurrent_unordered_base;
+}
+}
+
+namespace interface10{
+namespace internal {
+    template<typename Traits>
+    class concurrent_skip_list;
+}
+}
+
+namespace internal {
+
+template<typename Value, typename Node, typename Allocator>
+class node_handle_base {
+public:
+    typedef Allocator allocator_type;
+protected:
+    typedef Node node;
+    typedef tbb::internal::allocator_traits<allocator_type> traits_type;
+public:
+
+    node_handle_base() : my_node(NULL), my_allocator() {}
+    node_handle_base(node_handle_base&& nh) : my_node(nh.my_node),
+                                              my_allocator(std::move(nh.my_allocator)) {
+        nh.my_node = NULL;
+    }
+
+    bool empty() const { return my_node == NULL; }
+    explicit operator bool() const { return my_node != NULL; }
+
+    ~node_handle_base() { internal_destroy(); }
+
+    node_handle_base& operator=(node_handle_base&& nh) {
+        internal_destroy();
+        my_node = nh.my_node;
+        typedef typename traits_type::propagate_on_container_move_assignment pocma_type;
+        tbb::internal::allocator_move_assignment(my_allocator, nh.my_allocator, pocma_type());
+        nh.deactivate();
+        return *this;
+    }
+
+    void swap(node_handle_base& nh) {
+        std::swap(my_node, nh.my_node);
+        typedef typename traits_type::propagate_on_container_swap pocs_type;
+        tbb::internal::allocator_swap(my_allocator, nh.my_allocator, pocs_type());
+    }
+
+    allocator_type get_allocator() const {
+        return my_allocator;
+    }
+
+protected:
+    node_handle_base(node* n) : my_node(n) {}
+
+    void internal_destroy() {
+        if(my_node) {
+            traits_type::destroy(my_allocator, my_node->storage());
+            typename tbb::internal::allocator_rebind<allocator_type, node>::type node_allocator;
+            node_allocator.deallocate(my_node, 1);
+        }
+    }
+
+    void deactivate() { my_node = NULL; }
+
+    node* my_node;
+    allocator_type my_allocator;
+};
+
+// node handle for maps
+template<typename Key, typename Value, typename Node, typename Allocator>
+class node_handle : public node_handle_base<Value, Node, Allocator> {
+    typedef node_handle_base<Value, Node, Allocator> base_type;
+public:
+    typedef Key key_type;
+    typedef typename Value::second_type mapped_type;
+    typedef typename base_type::allocator_type allocator_type;
+
+    node_handle() : base_type() {}
+
+    key_type& key() const {
+        __TBB_ASSERT(!this->empty(), "Cannot get key from the empty node_type object");
+        return *const_cast<key_type*>(&(this->my_node->value().first));
+    }
+
+    mapped_type& mapped() const {
+        __TBB_ASSERT(!this->empty(), "Cannot get mapped value from the empty node_type object");
+        return this->my_node->value().second;
+    }
+
+private:
+    template<typename T, typename A>
+    friend class tbb::interface5::internal::split_ordered_list;
+
+    template<typename Traits>
+    friend class tbb::interface5::internal::concurrent_unordered_base;
+
+    template<typename Traits>
+    friend class tbb::interface10::internal::concurrent_skip_list;
+
+    node_handle(typename base_type::node* n) : base_type(n) {}
+};
+
+// node handle for sets
+template<typename Key, typename Node, typename Allocator>
+class node_handle<Key, Key, Node, Allocator> : public node_handle_base<Key, Node, Allocator> {
+    typedef node_handle_base<Key, Node, Allocator> base_type;
+public:
+    typedef Key value_type;
+    typedef typename base_type::allocator_type allocator_type;
+
+    node_handle() : base_type() {}
+
+    value_type& value() const {
+        __TBB_ASSERT(!this->empty(), "Cannot get value from the empty node_type object");
+        return *const_cast<value_type*>(&(this->my_node->value()));
+    }
+
+private:
+    template<typename T, typename A>
+    friend class tbb::interface5::internal::split_ordered_list;
+
+    template<typename Traits>
+    friend class tbb::interface5::internal::concurrent_unordered_base;
+
+    template<typename Traits>
+    friend class tbb::interface10::internal::concurrent_skip_list;
+
+    node_handle(typename base_type::node* n) : base_type(n) {}
+};
+
+
+}// namespace internal
+}// namespace tbb
+
+#endif /*__TBB_node_handle_H*/
--- a/cs440-acg/ext/tbb/include/tbb/internal/_range_iterator.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_range_iterator.h
@@ -0,0 +1,66 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_range_iterator_H
+#define __TBB_range_iterator_H
+
+#include "../tbb_stddef.h"
+
+#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
+    #include <iterator>
+#endif
+
+namespace tbb {
+    // iterators to first and last elements of container
+    namespace internal {
+
+#if __TBB_CPP11_STD_BEGIN_END_PRESENT && __TBB_CPP11_AUTO_PRESENT && __TBB_CPP11_DECLTYPE_PRESENT
+        using std::begin;
+        using std::end;
+        template<typename Container>
+        auto first(Container& c)-> decltype(begin(c))  {return begin(c);}
+
+        template<typename Container>
+        auto first(const Container& c)-> decltype(begin(c))  {return begin(c);}
+
+        template<typename Container>
+        auto last(Container& c)-> decltype(begin(c))  {return end(c);}
+
+        template<typename Container>
+        auto last(const Container& c)-> decltype(begin(c)) {return end(c);}
+#else
+        template<typename Container>
+        typename Container::iterator first(Container& c) {return c.begin();}
+
+        template<typename Container>
+        typename Container::const_iterator first(const Container& c) {return c.begin();}
+
+        template<typename Container>
+        typename Container::iterator last(Container& c) {return c.end();}
+
+        template<typename Container>
+        typename Container::const_iterator last(const Container& c) {return c.end();}
+#endif
+
+        template<typename T, size_t size>
+        T* first(T (&arr) [size]) {return arr;}
+
+        template<typename T, size_t size>
+        T* last(T (&arr) [size]) {return arr + size;}
+    } //namespace internal
+}  //namespace tbb
+
+#endif // __TBB_range_iterator_H
--- a/cs440-acg/ext/tbb/include/tbb/internal/_tbb_hash_compare_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_tbb_hash_compare_impl.h
@@ -0,0 +1,105 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+// must be included outside namespaces.
+#ifndef __TBB_tbb_hash_compare_impl_H
+#define __TBB_tbb_hash_compare_impl_H
+
+#include <string>
+
+namespace tbb {
+namespace interface5 {
+namespace internal {
+
+// Template class for hash compare
+template<typename Key, typename Hasher, typename Key_equality>
+class hash_compare
+{
+public:
+    typedef Hasher hasher;
+    typedef Key_equality key_equal;
+
+    hash_compare() {}
+
+    hash_compare(Hasher a_hasher) : my_hash_object(a_hasher) {}
+
+    hash_compare(Hasher a_hasher, Key_equality a_keyeq) : my_hash_object(a_hasher), my_key_compare_object(a_keyeq) {}
+
+    size_t operator()(const Key& key) const {
+        return ((size_t)my_hash_object(key));
+    }
+
+    bool operator()(const Key& key1, const Key& key2) const {
+        // TODO: get rid of the result invertion
+        return (!my_key_compare_object(key1, key2));
+    }
+
+    Hasher       my_hash_object;        // The hash object
+    Key_equality my_key_compare_object; // The equality comparator object
+};
+
+//! Hash multiplier
+static const size_t hash_multiplier = tbb::internal::select_size_t_constant<2654435769U, 11400714819323198485ULL>::value;
+
+} // namespace internal
+
+//! Hasher functions
+template<typename T>
+__TBB_DEPRECATED_MSG("tbb::tbb_hasher is deprecated, use std::hash") inline size_t tbb_hasher( const T& t ) {
+    return static_cast<size_t>( t ) * internal::hash_multiplier;
+}
+template<typename P>
+__TBB_DEPRECATED_MSG("tbb::tbb_hasher is deprecated, use std::hash") inline size_t tbb_hasher( P* ptr ) {
+    size_t const h = reinterpret_cast<size_t>( ptr );
+    return (h >> 3) ^ h;
+}
+template<typename E, typename S, typename A>
+__TBB_DEPRECATED_MSG("tbb::tbb_hasher is deprecated, use std::hash") inline size_t tbb_hasher( const std::basic_string<E,S,A>& s ) {
+    size_t h = 0;
+    for( const E* c = s.c_str(); *c; ++c )
+        h = static_cast<size_t>(*c) ^ (h * internal::hash_multiplier);
+    return h;
+}
+template<typename F, typename S>
+__TBB_DEPRECATED_MSG("tbb::tbb_hasher is deprecated, use std::hash") inline size_t tbb_hasher( const std::pair<F,S>& p ) {
+    return tbb_hasher(p.first) ^ tbb_hasher(p.second);
+}
+
+} // namespace interface5
+using interface5::tbb_hasher;
+
+// Template class for hash compare
+template<typename Key>
+class __TBB_DEPRECATED_MSG("tbb::tbb_hash is deprecated, use std::hash") tbb_hash
+{
+public:
+    tbb_hash() {}
+
+    size_t operator()(const Key& key) const
+    {
+        return tbb_hasher(key);
+    }
+};
+
+//! hash_compare that is default argument for concurrent_hash_map
+template<typename Key>
+struct tbb_hash_compare {
+    static size_t hash( const Key& a ) { return tbb_hasher(a); }
+    static bool equal( const Key& a, const Key& b ) { return a == b; }
+};
+
+}  // namespace tbb
+#endif  /*  __TBB_tbb_hash_compare_impl_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_tbb_strings.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_tbb_strings.h
@@ -0,0 +1,79 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+TBB_STRING_RESOURCE(FLOW_BROADCAST_NODE, "broadcast_node")
+TBB_STRING_RESOURCE(FLOW_BUFFER_NODE, "buffer_node")
+TBB_STRING_RESOURCE(FLOW_CONTINUE_NODE, "continue_node")
+TBB_STRING_RESOURCE(FLOW_FUNCTION_NODE, "function_node")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_QUEUEING, "join_node (queueing)")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_RESERVING, "join_node (reserving)")
+TBB_STRING_RESOURCE(FLOW_JOIN_NODE_TAG_MATCHING, "join_node (tag_matching)")
+TBB_STRING_RESOURCE(FLOW_LIMITER_NODE, "limiter_node")
+TBB_STRING_RESOURCE(FLOW_MULTIFUNCTION_NODE, "multifunction_node")
+TBB_STRING_RESOURCE(FLOW_OR_NODE, "or_node") //no longer in use, kept for backward compatibility
+TBB_STRING_RESOURCE(FLOW_OVERWRITE_NODE, "overwrite_node")
+TBB_STRING_RESOURCE(FLOW_PRIORITY_QUEUE_NODE, "priority_queue_node")
+TBB_STRING_RESOURCE(FLOW_QUEUE_NODE, "queue_node")
+TBB_STRING_RESOURCE(FLOW_SEQUENCER_NODE, "sequencer_node")
+TBB_STRING_RESOURCE(FLOW_SOURCE_NODE, "source_node")
+TBB_STRING_RESOURCE(FLOW_SPLIT_NODE, "split_node")
+TBB_STRING_RESOURCE(FLOW_WRITE_ONCE_NODE, "write_once_node")
+TBB_STRING_RESOURCE(FLOW_BODY, "body")
+TBB_STRING_RESOURCE(FLOW_GRAPH, "graph")
+TBB_STRING_RESOURCE(FLOW_NODE, "node")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT, "input_port")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_0, "input_port_0")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_1, "input_port_1")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_2, "input_port_2")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_3, "input_port_3")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_4, "input_port_4")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_5, "input_port_5")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_6, "input_port_6")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_7, "input_port_7")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_8, "input_port_8")
+TBB_STRING_RESOURCE(FLOW_INPUT_PORT_9, "input_port_9")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT, "output_port")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_0, "output_port_0")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_1, "output_port_1")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_2, "output_port_2")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_3, "output_port_3")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_4, "output_port_4")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_5, "output_port_5")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_6, "output_port_6")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_7, "output_port_7")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_8, "output_port_8")
+TBB_STRING_RESOURCE(FLOW_OUTPUT_PORT_9, "output_port_9")
+TBB_STRING_RESOURCE(FLOW_OBJECT_NAME, "object_name")
+TBB_STRING_RESOURCE(FLOW_NULL, "null")
+TBB_STRING_RESOURCE(FLOW_INDEXER_NODE, "indexer_node")
+TBB_STRING_RESOURCE(FLOW_COMPOSITE_NODE, "composite_node")
+TBB_STRING_RESOURCE(FLOW_ASYNC_NODE, "async_node")
+TBB_STRING_RESOURCE(FLOW_OPENCL_NODE, "opencl_node")
+TBB_STRING_RESOURCE(ALGORITHM, "tbb_algorithm")
+TBB_STRING_RESOURCE(PARALLEL_FOR, "tbb_parallel_for")
+TBB_STRING_RESOURCE(PARALLEL_DO, "tbb_parallel_do")
+TBB_STRING_RESOURCE(PARALLEL_INVOKE, "tbb_parallel_invoke")
+TBB_STRING_RESOURCE(PARALLEL_REDUCE, "tbb_parallel_reduce")
+TBB_STRING_RESOURCE(PARALLEL_SCAN, "tbb_parallel_scan")
+TBB_STRING_RESOURCE(PARALLEL_SORT, "tbb_parallel_sort")
+TBB_STRING_RESOURCE(CUSTOM_CTX, "tbb_custom")
+TBB_STRING_RESOURCE(FLOW_TASKS, "tbb_flow_graph")
+TBB_STRING_RESOURCE(PARALLEL_FOR_TASK, "tbb_parallel_for_task")
+// TODO: Drop following string prefix "fgt_" here and in FGA's collector
+TBB_STRING_RESOURCE(USER_EVENT, "fgt_user_event")
+#if __TBB_CPF_BUILD || (TBB_PREVIEW_FLOW_GRAPH_TRACE && TBB_USE_THREADING_TOOLS)
+TBB_STRING_RESOURCE(CODE_ADDRESS, "code_address")
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/internal/_tbb_trace_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_tbb_trace_impl.h
@@ -0,0 +1,55 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef _FGT_TBB_TRACE_IMPL_H
+#define _FGT_TBB_TRACE_IMPL_H
+
+#include "../tbb_profiling.h"
+
+namespace tbb {
+    namespace internal {
+
+#if TBB_PREVIEW_ALGORITHM_TRACE
+        static inline void fgt_algorithm( string_index t, void *algorithm, void *parent ) {
+            itt_make_task_group( ITT_DOMAIN_FLOW, algorithm, ALGORITHM, parent, ALGORITHM, t );
+        }
+        static inline void fgt_begin_algorithm( string_index t, void *algorithm ) {
+            itt_task_begin( ITT_DOMAIN_FLOW, algorithm, ALGORITHM, NULL, FLOW_NULL, t );
+        }
+        static inline void fgt_end_algorithm( void * ) {
+            itt_task_end( ITT_DOMAIN_FLOW );
+        }
+        static inline void fgt_alg_begin_body( string_index t, void *body, void *algorithm ) {
+            itt_task_begin( ITT_DOMAIN_FLOW, body, FLOW_BODY, algorithm, ALGORITHM, t );
+        }
+        static inline void fgt_alg_end_body( void * ) {
+            itt_task_end( ITT_DOMAIN_FLOW );
+        }
+
+#else // TBB_PREVIEW_ALGORITHM_TRACE
+
+        static inline void fgt_algorithm( string_index /*t*/, void * /*algorithm*/, void * /*parent*/ ) { }
+        static inline void fgt_begin_algorithm( string_index /*t*/, void * /*algorithm*/ ) { }
+        static inline void fgt_end_algorithm( void * ) { }
+        static inline void fgt_alg_begin_body( string_index /*t*/, void * /*body*/, void * /*algorithm*/ ) { }
+        static inline void fgt_alg_end_body( void * ) { }
+
+#endif // TBB_PREVIEW_ALGORITHM_TRACEE
+
+    } // namespace internal
+} // namespace tbb
+
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/internal/_tbb_windef.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_tbb_windef.h
@@ -0,0 +1,69 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_tbb_windef_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif /* __TBB_tbb_windef_H */
+
+// Check that the target Windows version has all API calls required for TBB.
+// Do not increase the version in condition beyond 0x0500 without prior discussion!
+#if defined(_WIN32_WINNT) && _WIN32_WINNT<0x0501
+#error TBB is unable to run on old Windows versions; _WIN32_WINNT must be 0x0501 or greater.
+#endif
+
+#if !defined(_MT)
+#error TBB requires linkage with multithreaded C/C++ runtime library. \
+       Choose multithreaded DLL runtime in project settings, or use /MD[d] compiler switch.
+#endif
+
+// Workaround for the problem with MVSC headers failing to define namespace std
+namespace std {
+  using ::size_t; using ::ptrdiff_t;
+}
+
+#define __TBB_STRING_AUX(x) #x
+#define __TBB_STRING(x) __TBB_STRING_AUX(x)
+
+// Default setting of TBB_USE_DEBUG
+#ifdef TBB_USE_DEBUG
+#    if TBB_USE_DEBUG
+#        if !defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MDd if compiling with TBB_USE_DEBUG!=0")
+#        endif
+#    else
+#        if defined(_DEBUG)
+#            pragma message(__FILE__ "(" __TBB_STRING(__LINE__) ") : Warning: Recommend using /MD if compiling with TBB_USE_DEBUG==0")
+#        endif
+#    endif
+#endif
+
+#if (__TBB_BUILD || __TBBMALLOC_BUILD || __TBBBIND_BUILD) && !defined(__TBB_NO_IMPLICIT_LINKAGE)
+#define __TBB_NO_IMPLICIT_LINKAGE 1
+#endif
+
+#if _MSC_VER
+    #if !__TBB_NO_IMPLICIT_LINKAGE
+        #ifdef __TBB_LIB_NAME
+	        #pragma comment(lib, __TBB_STRING(__TBB_LIB_NAME))
+        #else
+			#ifdef _DEBUG
+				#pragma comment(lib, "tbb_debug.lib")
+			#else
+				#pragma comment(lib, "tbb.lib")
+			#endif
+        #endif
+    #endif
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/internal/_template_helpers.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_template_helpers.h
@@ -0,0 +1,284 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_template_helpers_H
+#define __TBB_template_helpers_H
+
+#include <utility>
+#include <cstddef>
+#include "../tbb_config.h"
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT
+#include <type_traits>
+#endif
+#if __TBB_CPP11_PRESENT
+#include <iterator>
+#include <memory> // allocator_traits
+#endif
+
+namespace tbb { namespace internal {
+
+//! Enables one or the other code branches
+template<bool Condition, typename T = void> struct enable_if {};
+template<typename T> struct enable_if<true, T> { typedef T type; };
+
+//! Strips its template type argument from cv- and ref-qualifiers
+template<typename T> struct strip                     { typedef T type; };
+template<typename T> struct strip<const T>            { typedef T type; };
+template<typename T> struct strip<volatile T>         { typedef T type; };
+template<typename T> struct strip<const volatile T>   { typedef T type; };
+template<typename T> struct strip<T&>                 { typedef T type; };
+template<typename T> struct strip<const T&>           { typedef T type; };
+template<typename T> struct strip<volatile T&>        { typedef T type; };
+template<typename T> struct strip<const volatile T&>  { typedef T type; };
+//! Specialization for function pointers
+template<typename T> struct strip<T(&)()>             { typedef T(*type)(); };
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+template<typename T> struct strip<T&&>                { typedef T type; };
+template<typename T> struct strip<const T&&>          { typedef T type; };
+template<typename T> struct strip<volatile T&&>       { typedef T type; };
+template<typename T> struct strip<const volatile T&&> { typedef T type; };
+#endif
+//! Specialization for arrays converts to a corresponding pointer
+template<typename T, std::size_t N> struct strip<T(&)[N]>                { typedef T* type; };
+template<typename T, std::size_t N> struct strip<const T(&)[N]>          { typedef const T* type; };
+template<typename T, std::size_t N> struct strip<volatile T(&)[N]>       { typedef volatile T* type; };
+template<typename T, std::size_t N> struct strip<const volatile T(&)[N]> { typedef const volatile T* type; };
+
+//! Detects whether two given types are the same
+template<class U, class V> struct is_same_type      { static const bool value = false; };
+template<class W>          struct is_same_type<W,W> { static const bool value = true; };
+
+template<typename T> struct is_ref { static const bool value = false; };
+template<typename U> struct is_ref<U&> { static const bool value = true; };
+
+//! Partial support for std::is_integral
+template<typename T> struct is_integral_impl             { static const bool value = false; };
+template<>           struct is_integral_impl<bool>       { static const bool value = true;  };
+template<>           struct is_integral_impl<char>       { static const bool value = true;  };
+#if __TBB_CPP11_PRESENT
+template<>           struct is_integral_impl<char16_t>   { static const bool value = true;  };
+template<>           struct is_integral_impl<char32_t>   { static const bool value = true;  };
+#endif
+template<>           struct is_integral_impl<wchar_t>    { static const bool value = true;  };
+template<>           struct is_integral_impl<short>      { static const bool value = true;  };
+template<>           struct is_integral_impl<int>        { static const bool value = true;  };
+template<>           struct is_integral_impl<long>       { static const bool value = true;  };
+template<>           struct is_integral_impl<long long>  { static const bool value = true;  };
+
+template<typename T>
+struct is_integral : is_integral_impl<typename strip<T>::type> {};
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+//! std::void_t internal implementation (to avoid GCC < 4.7 "template aliases" absence)
+template<typename...> struct void_t { typedef void type; };
+#endif
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT
+
+// Generic SFINAE helper for expression checks, based on the idea demonstrated in ISO C++ paper n4502
+template<typename T, typename, template<typename> class... Checks>
+struct supports_impl { typedef std::false_type type; };
+template<typename T, template<typename> class... Checks>
+struct supports_impl<T, typename void_t<Checks<T>...>::type, Checks...> { typedef std::true_type type; };
+
+template<typename T, template<typename> class... Checks>
+using supports = typename supports_impl<T, void, Checks...>::type;
+
+#endif /* __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT */
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+
+//! Allows to store a function parameter pack as a variable and later pass it to another function
+template< typename... Types >
+struct stored_pack;
+
+template<>
+struct stored_pack<>
+{
+    typedef stored_pack<> pack_type;
+    stored_pack() {}
+
+    // Friend front-end functions
+    template< typename F, typename Pack > friend void call( F&& f, Pack&& p );
+    template< typename Ret, typename F, typename Pack > friend Ret call_and_return( F&& f, Pack&& p );
+
+protected:
+    // Ideally, ref-qualified non-static methods would be used,
+    // but that would greatly reduce the set of compilers where it works.
+    template< typename Ret, typename F, typename... Preceding >
+    static Ret call( F&& f, const pack_type& /*pack*/, Preceding&&... params ) {
+        return std::forward<F>(f)( std::forward<Preceding>(params)... );
+    }
+    template< typename Ret, typename F, typename... Preceding >
+    static Ret call( F&& f, pack_type&& /*pack*/, Preceding&&... params ) {
+        return std::forward<F>(f)( std::forward<Preceding>(params)... );
+    }
+};
+
+template< typename T, typename... Types >
+struct stored_pack<T, Types...> : stored_pack<Types...>
+{
+    typedef stored_pack<T, Types...> pack_type;
+    typedef stored_pack<Types...> pack_remainder;
+    // Since lifetime of original values is out of control, copies should be made.
+    // Thus references should be stripped away from the deduced type.
+    typename strip<T>::type leftmost_value;
+
+    // Here rvalue references act in the same way as forwarding references,
+    // as long as class template parameters were deduced via forwarding references.
+    stored_pack( T&& t, Types&&... types )
+    : pack_remainder(std::forward<Types>(types)...), leftmost_value(std::forward<T>(t)) {}
+
+    // Friend front-end functions
+    template< typename F, typename Pack > friend void call( F&& f, Pack&& p );
+    template< typename Ret, typename F, typename Pack > friend Ret call_and_return( F&& f, Pack&& p );
+
+protected:
+    template< typename Ret, typename F, typename... Preceding >
+    static Ret call( F&& f, pack_type& pack, Preceding&&... params ) {
+        return pack_remainder::template call<Ret>(
+            std::forward<F>(f), static_cast<pack_remainder&>(pack),
+            std::forward<Preceding>(params)... , pack.leftmost_value
+        );
+    }
+    template< typename Ret, typename F, typename... Preceding >
+    static Ret call( F&& f, const pack_type& pack, Preceding&&... params ) {
+        return pack_remainder::template call<Ret>(
+            std::forward<F>(f), static_cast<const pack_remainder&>(pack),
+            std::forward<Preceding>(params)... , pack.leftmost_value
+        );
+    }
+    template< typename Ret, typename F, typename... Preceding >
+    static Ret call( F&& f, pack_type&& pack, Preceding&&... params ) {
+        return pack_remainder::template call<Ret>(
+            std::forward<F>(f), static_cast<pack_remainder&&>(pack),
+            std::forward<Preceding>(params)... , std::move(pack.leftmost_value)
+        );
+    }
+};
+
+//! Calls the given function with arguments taken from a stored_pack
+template< typename F, typename Pack >
+void call( F&& f, Pack&& p ) {
+    strip<Pack>::type::template call<void>( std::forward<F>(f), std::forward<Pack>(p) );
+}
+
+template< typename Ret, typename F, typename Pack >
+Ret call_and_return( F&& f, Pack&& p ) {
+    return strip<Pack>::type::template call<Ret>( std::forward<F>(f), std::forward<Pack>(p) );
+}
+
+template< typename... Types >
+stored_pack<Types...> save_pack( Types&&... types ) {
+    return stored_pack<Types...>( std::forward<Types>(types)... );
+}
+
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT && __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT */
+
+#if __TBB_CPP14_INTEGER_SEQUENCE_PRESENT
+
+using std::index_sequence;
+using std::make_index_sequence;
+
+#elif __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT && __TBB_CPP11_TEMPLATE_ALIASES_PRESENT
+
+template<std::size_t... S> class index_sequence {};
+
+template<std::size_t N, std::size_t... S>
+struct make_index_sequence_impl : make_index_sequence_impl < N - 1, N - 1, S... > {};
+
+template<std::size_t... S>
+struct make_index_sequence_impl <0, S...> {
+    using type = index_sequence<S...>;
+};
+
+template<std::size_t N>
+using make_index_sequence = typename tbb::internal::make_index_sequence_impl<N>::type;
+
+#endif /* __TBB_CPP14_INTEGER_SEQUENCE_PRESENT */
+
+#if __TBB_CPP11_VARIADIC_TEMPLATES_PRESENT
+template<typename... Args>
+struct conjunction;
+
+template<typename First, typename... Args>
+struct conjunction<First, Args...>
+    : std::conditional<bool(First::value), conjunction<Args...>, First>::type {};
+
+template<typename T>
+struct conjunction<T> : T {};
+
+template<>
+struct conjunction<> : std::true_type {};
+
+#endif
+
+#if __TBB_CPP11_PRESENT
+
+template< typename Iter >
+using iterator_value_t = typename std::iterator_traits<Iter>::value_type;
+
+template< typename Iter >
+using iterator_key_t = typename std::remove_const<typename iterator_value_t<Iter>::first_type>::type;
+
+template< typename Iter >
+using iterator_mapped_t = typename iterator_value_t<Iter>::second_type;
+
+template< typename A > using value_type = typename A::value_type;
+template< typename A > using alloc_ptr_t = typename std::allocator_traits<A>::pointer;
+template< typename A > using has_allocate = decltype(std::declval<alloc_ptr_t<A>&>() = std::declval<A>().allocate(0));
+template< typename A > using has_deallocate = decltype(std::declval<A>().deallocate(std::declval<alloc_ptr_t<A>>(), 0));
+
+// value_type should be checked first because it can be used in other checks (via allocator_traits)
+template< typename T >
+using is_allocator = supports<T, value_type, has_allocate, has_deallocate>;
+
+#if __TBB_CPP14_VARIABLE_TEMPLATES_PRESENT
+
+template< typename T >
+static constexpr bool is_allocator_v = is_allocator<T>::value;
+
+#endif /*__TBB_CPP14_VARIABLE_TEMPLATES */
+
+template< std::size_t N, typename... Args >
+struct pack_element {
+    using type = void;
+};
+
+template< std::size_t N, typename T, typename... Args >
+struct pack_element<N, T, Args...> {
+    using type = typename pack_element<N - 1, Args...>::type;
+};
+
+template< typename T, typename... Args >
+struct pack_element<0, T, Args...> {
+    using type = T;
+};
+
+template< std::size_t N, typename... Args >
+using pack_element_t = typename pack_element<N, Args...>::type;
+
+// Helper alias for heterogeneous lookup functions in containers
+// template parameter K and std::conditional are needed to provide immediate context
+// and postpone getting is_trasparent from the compare functor until method instantiation.
+template <typename Comp, typename K>
+using is_transparent = typename std::conditional<true, Comp, K>::type::is_transparent;
+
+#endif /* __TBB_CPP11_PRESENT */
+
+} } // namespace internal, namespace tbb
+
+#endif /* __TBB_template_helpers_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_warning_suppress_disable_notice.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_warning_suppress_disable_notice.h
@@ -0,0 +1,27 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES
+
+#if __INTEL_COMPILER || _MSC_VER
+#pragma warning( pop )
+#elif __GNUC__
+#pragma GCC diagnostic pop
+#elif __clang__
+#pragma clang diagnostic pop
+#endif
+
+#endif  // __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES
--- a/cs440-acg/ext/tbb/include/tbb/internal/_warning_suppress_enable_notice.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_warning_suppress_enable_notice.h
@@ -0,0 +1,32 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "../tbb_config.h"
+
+#if __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES
+
+#if _MSC_VER || __INTEL_COMPILER
+#pragma warning( push )
+#pragma warning( disable: 4996 )
+#elif __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#elif __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
+#endif
+
+#endif  // __TBB_SUPPRESS_INTERNAL_DEPRECATED_MESSAGES
--- a/cs440-acg/ext/tbb/include/tbb/internal/_x86_eliding_mutex_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_x86_eliding_mutex_impl.h
@@ -0,0 +1,144 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__x86_eliding_mutex_impl_H
+#define __TBB__x86_eliding_mutex_impl_H
+
+#ifndef __TBB_spin_mutex_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if ( __TBB_x86_32 || __TBB_x86_64 )
+
+namespace tbb {
+namespace interface7 {
+namespace internal {
+
+template<typename Mutex, bool is_rw>
+class padded_mutex;
+
+//! An eliding lock that occupies a single byte.
+/** A x86_eliding_mutex is an HLE-enabled spin mutex. It is recommended to
+    put the mutex on a cache line that is not shared by the data it protects.
+    It should be used for locking short critical sections where the lock is
+    contended but the data it protects are not.  If zero-initialized, the
+    mutex is considered unheld.
+    @ingroup synchronization */
+class x86_eliding_mutex : tbb::internal::mutex_copy_deprecated_and_disabled {
+    //! 0 if lock is released, 1 if lock is acquired.
+    __TBB_atomic_flag flag;
+
+    friend class padded_mutex<x86_eliding_mutex, false>;
+
+public:
+    //! Construct unacquired lock.
+    /** Equivalent to zero-initialization of *this. */
+    x86_eliding_mutex() : flag(0) {}
+
+// bug in gcc 3.x.x causes syntax error in spite of the friend declaration above.
+// Make the scoped_lock public in that case.
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+    // by default we will not provide the scoped_lock interface.  The user
+    // should use the padded version of the mutex.  scoped_lock is used in
+    // padded_mutex template.
+private:
+#endif
+    // scoped_lock in padded_mutex<> is the interface to use.
+    //! Represents acquisition of a mutex.
+    class scoped_lock : tbb::internal::no_copy {
+    private:
+        //! Points to currently held mutex, or NULL if no lock is held.
+        x86_eliding_mutex* my_mutex;
+
+    public:
+        //! Construct without acquiring a mutex.
+        scoped_lock() : my_mutex(NULL) {}
+
+        //! Construct and acquire lock on a mutex.
+        scoped_lock( x86_eliding_mutex& m ) : my_mutex(NULL) { acquire(m); }
+
+        //! Acquire lock.
+        void acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            my_mutex=&m;
+            my_mutex->lock();
+        }
+
+        //! Try acquiring lock (non-blocking)
+        /** Return true if lock acquired; false otherwise. */
+        bool try_acquire( x86_eliding_mutex& m ) {
+            __TBB_ASSERT( !my_mutex, "already holding a lock" );
+
+            bool result = m.try_lock();
+            if( result ) {
+                my_mutex = &m;
+            }
+            return result;
+        }
+
+        //! Release lock
+        void release() {
+            __TBB_ASSERT( my_mutex, "release on scoped_lock that is not holding a lock" );
+
+            my_mutex->unlock();
+            my_mutex = NULL;
+        }
+
+        //! Destroy lock.  If holding a lock, releases the lock first.
+        ~scoped_lock() {
+            if( my_mutex ) {
+                release();
+            }
+        }
+    };
+#if __TBB_USE_X86_ELIDING_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+public:
+#endif  /* __TBB_USE_X86_ELIDING_MUTEX */
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+        __TBB_LockByteElided(flag);
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+        return __TBB_TryLockByteElided(flag);
+    }
+
+    //! Release lock
+    void unlock() {
+        __TBB_UnlockByteElided( flag );
+    }
+}; // end of x86_eliding_mutex
+
+} // namespace internal
+} // namespace interface7
+} // namespace tbb
+
+#endif /* ( __TBB_x86_32 || __TBB_x86_64 ) */
+
+#endif /* __TBB__x86_eliding_mutex_impl_H */
--- a/cs440-acg/ext/tbb/include/tbb/internal/_x86_rtm_rw_mutex_impl.h
+++ b/cs440-acg/ext/tbb/include/tbb/internal/_x86_rtm_rw_mutex_impl.h
@@ -0,0 +1,223 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB__x86_rtm_rw_mutex_impl_H
+#define __TBB__x86_rtm_rw_mutex_impl_H
+
+#ifndef __TBB_spin_rw_mutex_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if __TBB_TSX_AVAILABLE
+
+#include "../tbb_stddef.h"
+#include "../tbb_machine.h"
+#include "../tbb_profiling.h"
+#include "../spin_rw_mutex.h"
+
+namespace tbb {
+namespace interface8 {
+namespace internal {
+
+enum RTM_type {
+    RTM_not_in_mutex,
+    RTM_transacting_reader,
+    RTM_transacting_writer,
+    RTM_real_reader,
+    RTM_real_writer
+};
+
+static const unsigned long speculation_granularity = 64;
+
+//! Fast, unfair, spinning speculation-enabled reader-writer lock with backoff and
+//  writer-preference
+/** @ingroup synchronization */
+class x86_rtm_rw_mutex: private spin_rw_mutex {
+#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
+// bug in gcc 3.x.x causes syntax error in spite of the friend declaration below.
+// Make the scoped_lock public in that case.
+public:
+#else
+private:
+#endif
+    friend class interface7::internal::padded_mutex<x86_rtm_rw_mutex,true>;
+    class scoped_lock;   // should be private
+    friend class scoped_lock;
+private:
+    //! @cond INTERNAL
+
+    //! Internal construct unacquired mutex.
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    //! Internal acquire write lock.
+    // only_speculate == true if we're doing a try_lock, else false.
+    void __TBB_EXPORTED_METHOD internal_acquire_writer(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
+
+    //! Internal acquire read lock.
+    // only_speculate == true if we're doing a try_lock, else false.
+    void __TBB_EXPORTED_METHOD internal_acquire_reader(x86_rtm_rw_mutex::scoped_lock&, bool only_speculate=false);
+
+    //! Internal upgrade reader to become a writer.
+    bool __TBB_EXPORTED_METHOD internal_upgrade( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Out of line code for downgrading a writer to a reader.
+    bool __TBB_EXPORTED_METHOD internal_downgrade( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Internal try_acquire write lock.
+    bool __TBB_EXPORTED_METHOD internal_try_acquire_writer( x86_rtm_rw_mutex::scoped_lock& );
+
+    //! Internal release lock.
+    void __TBB_EXPORTED_METHOD internal_release( x86_rtm_rw_mutex::scoped_lock& );
+
+    static x86_rtm_rw_mutex* internal_get_mutex( const spin_rw_mutex::scoped_lock& lock )
+    {
+        return static_cast<x86_rtm_rw_mutex*>( lock.mutex );
+    }
+    static void internal_set_mutex( spin_rw_mutex::scoped_lock& lock, spin_rw_mutex* mtx )
+    {
+        lock.mutex = mtx;
+    }
+    //! @endcond
+public:
+    //! Construct unacquired mutex.
+    x86_rtm_rw_mutex() {
+        w_flag = false;
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+#if TBB_USE_ASSERT
+    //! Empty destructor.
+    ~x86_rtm_rw_mutex() {}
+#endif /* TBB_USE_ASSERT */
+
+    // Mutex traits
+    static const bool is_rw_mutex = true;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+#if __TBB_USE_X86_RTM_RW_MUTEX || __TBB_GCC_VERSION < 40000
+#else
+    // by default we will not provide the scoped_lock interface.  The user
+    // should use the padded version of the mutex.  scoped_lock is used in
+    // padded_mutex template.
+private:
+#endif
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    // Speculation-enabled scoped lock for spin_rw_mutex
+    // The idea is to be able to reuse the acquire/release methods of spin_rw_mutex
+    // and its scoped lock wherever possible.  The only way to use a speculative lock is to use
+    // a scoped_lock. (because transaction_state must be local)
+
+    class scoped_lock : tbb::internal::no_copy {
+        friend class x86_rtm_rw_mutex;
+        spin_rw_mutex::scoped_lock my_scoped_lock;
+
+        RTM_type transaction_state;
+
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() : my_scoped_lock(), transaction_state(RTM_not_in_mutex) {
+        }
+
+        //! Acquire lock on given mutex.
+        scoped_lock( x86_rtm_rw_mutex& m, bool write = true ) : my_scoped_lock(),
+            transaction_state(RTM_not_in_mutex) {
+            acquire(m, write);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if(transaction_state != RTM_not_in_mutex) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( x86_rtm_rw_mutex& m, bool write = true ) {
+            if( write ) m.internal_acquire_writer(*this);
+            else        m.internal_acquire_reader(*this);
+        }
+
+        //! Release lock
+        void release() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            __TBB_ASSERT( transaction_state!=RTM_not_in_mutex, "lock is not acquired" );
+            return mutex->internal_release(*this);
+        }
+
+        //! Upgrade reader to become a writer.
+        /** Returns whether the upgrade happened without releasing and re-acquiring the lock */
+        bool upgrade_to_writer() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            if (transaction_state == RTM_transacting_writer || transaction_state == RTM_real_writer)
+                return true; // Already a writer
+            return mutex->internal_upgrade(*this);
+        }
+
+        //! Downgrade writer to become a reader.
+        /** Returns whether the downgrade happened without releasing and re-acquiring the lock */
+        bool downgrade_to_reader() {
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( mutex, "lock is not acquired" );
+            if (transaction_state == RTM_transacting_reader || transaction_state == RTM_real_reader)
+                return true; // Already a reader
+            return mutex->internal_downgrade(*this);
+        }
+
+        //! Attempt to acquire mutex.
+        /** returns true if successful.  */
+        bool try_acquire( x86_rtm_rw_mutex& m, bool write = true ) {
+#if TBB_USE_ASSERT
+            x86_rtm_rw_mutex* mutex = x86_rtm_rw_mutex::internal_get_mutex(my_scoped_lock);
+            __TBB_ASSERT( !mutex, "lock is already acquired" );
+#endif
+            // have to assign m to our mutex.
+            // cannot set the mutex, because try_acquire in spin_rw_mutex depends on it being NULL.
+            if(write) return m.internal_try_acquire_writer(*this);
+            // speculatively acquire the lock.  If this fails, do try_acquire on the spin_rw_mutex.
+            m.internal_acquire_reader(*this, /*only_speculate=*/true);
+            if(transaction_state == RTM_transacting_reader) return true;
+            if( my_scoped_lock.try_acquire(m, false)) {
+                transaction_state = RTM_real_reader;
+                return true;
+            }
+            return false;
+        }
+
+        };  // class x86_rtm_rw_mutex::scoped_lock
+
+    // ISO C++0x compatibility methods not provided because we cannot maintain
+    // state about whether a thread is in a transaction.
+
+private:
+    char pad[speculation_granularity-sizeof(spin_rw_mutex)]; // padding
+
+    // If true, writer holds the spin_rw_mutex.
+    tbb::atomic<bool> w_flag;  // want this on a separate cache line
+
+};  // x86_rtm_rw_mutex
+
+}  // namespace internal
+}  // namespace interface8
+}  // namespace tbb
+
+#endif  /* __TBB_TSX_AVAILABLE */
+#endif /* __TBB__x86_rtm_rw_mutex_impl_H */
--- a/cs440-acg/ext/tbb/include/tbb/iterators.h
+++ b/cs440-acg/ext/tbb/include/tbb/iterators.h
@@ -0,0 +1,326 @@
+/*
+    Copyright (c) 2017-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_iterators_H
+#define __TBB_iterators_H
+
+#include <iterator>
+#include <limits>
+
+#include "tbb_config.h"
+#include "tbb_stddef.h"
+
+#if __TBB_CPP11_PRESENT
+
+#include <type_traits>
+
+namespace tbb {
+
+template <typename IntType>
+class counting_iterator {
+    __TBB_STATIC_ASSERT(std::numeric_limits<IntType>::is_integer, "Cannot instantiate counting_iterator with a non-integer type");
+public:
+    typedef typename std::make_signed<IntType>::type difference_type;
+    typedef IntType value_type;
+    typedef const IntType* pointer;
+    typedef const IntType& reference;
+    typedef std::random_access_iterator_tag iterator_category;
+
+    counting_iterator() : my_counter() {}
+    explicit counting_iterator(IntType init) : my_counter(init) {}
+
+    reference operator*() const { return my_counter; }
+    value_type operator[](difference_type i) const { return *(*this + i); }
+
+    difference_type operator-(const counting_iterator& it) const { return my_counter - it.my_counter; }
+
+    counting_iterator& operator+=(difference_type forward) { my_counter += forward; return *this; }
+    counting_iterator& operator-=(difference_type backward) { return *this += -backward; }
+    counting_iterator& operator++() { return *this += 1; }
+    counting_iterator& operator--() { return *this -= 1; }
+
+    counting_iterator operator++(int) {
+        counting_iterator it(*this);
+        ++(*this);
+        return it;
+    }
+    counting_iterator operator--(int) {
+        counting_iterator it(*this);
+        --(*this);
+        return it;
+    }
+
+    counting_iterator operator-(difference_type backward) const { return counting_iterator(my_counter - backward); }
+    counting_iterator operator+(difference_type forward) const { return counting_iterator(my_counter + forward); }
+    friend counting_iterator operator+(difference_type forward, const counting_iterator it) { return it + forward; }
+
+    bool operator==(const counting_iterator& it) const { return *this - it == 0; }
+    bool operator!=(const counting_iterator& it) const { return !(*this == it); }
+    bool operator<(const counting_iterator& it) const {return *this - it < 0; }
+    bool operator>(const counting_iterator& it) const { return it < *this; }
+    bool operator<=(const counting_iterator& it) const { return !(*this > it); }
+    bool operator>=(const counting_iterator& it) const { return !(*this < it); }
+
+private:
+    IntType my_counter;
+};
+} //namespace tbb
+
+
+#include <tuple>
+
+#include "internal/_template_helpers.h" // index_sequence, make_index_sequence
+
+namespace tbb {
+namespace internal {
+
+template<size_t N>
+struct tuple_util {
+    template<typename TupleType, typename DifferenceType>
+    static void increment(TupleType& it, DifferenceType forward) {
+        std::get<N-1>(it) += forward;
+        tuple_util<N-1>::increment(it, forward);
+    }
+    template<typename TupleType, typename DifferenceType>
+    static bool check_sync(const TupleType& it1, const TupleType& it2, DifferenceType val) {
+        if(std::get<N-1>(it1) - std::get<N-1>(it2) != val)
+            return false;
+        return tuple_util<N-1>::check_sync(it1, it2, val);
+    }
+};
+
+template<>
+struct tuple_util<0> {
+    template<typename TupleType, typename DifferenceType>
+    static void increment(TupleType&, DifferenceType) {}
+    template<typename TupleType, typename DifferenceType>
+    static bool check_sync(const TupleType&, const TupleType&, DifferenceType) { return true;}
+};
+
+template <typename TupleReturnType>
+struct make_references {
+    template <typename TupleType, std::size_t... Is>
+    TupleReturnType operator()(const TupleType& t, tbb::internal::index_sequence<Is...>) {
+        return std::tie( *std::get<Is>(t)... );
+    }
+};
+
+// A simple wrapper over a tuple of references.
+// The class is designed to hold a temporary tuple of reference
+// after dereferencing a zip_iterator; in particular, it is needed
+// to swap these rvalue tuples. Any other usage is not supported.
+template<typename... T>
+struct tuplewrapper : public std::tuple<typename std::enable_if<std::is_reference<T>::value, T&&>::type...> {
+    // In the context of this class, T is a reference, so T&& is a "forwarding reference"
+    typedef std::tuple<T&&...> base_type;
+    // Construct from the result of std::tie
+    tuplewrapper(const base_type& in) : base_type(in) {}
+#if __INTEL_COMPILER
+    // ICC cannot generate copy ctor & assignment
+    tuplewrapper(const tuplewrapper& rhs) : base_type(rhs) {}
+    tuplewrapper& operator=(const tuplewrapper& rhs) {
+        *this = base_type(rhs);
+        return *this;
+    }
+#endif
+    // Assign any tuple convertible to std::tuple<T&&...>: *it = a_tuple;
+    template<typename... U>
+    tuplewrapper& operator=(const std::tuple<U...>& other) {
+        base_type::operator=(other);
+        return *this;
+    }
+#if _LIBCPP_VERSION
+    // (Necessary for libc++ tuples) Convert to a tuple of values: v = *it;
+    operator std::tuple<typename std::remove_reference<T>::type...>() { return base_type(*this); }
+#endif
+    // Swap rvalue tuples: swap(*it1,*it2);
+    friend void swap(tuplewrapper&& a, tuplewrapper&& b) {
+        std::swap<T&&...>(a,b);
+    }
+};
+
+} //namespace internal
+
+template <typename... Types>
+class zip_iterator {
+    __TBB_STATIC_ASSERT(sizeof...(Types)>0, "Cannot instantiate zip_iterator with empty template parameter pack");
+    static const std::size_t num_types = sizeof...(Types);
+    typedef std::tuple<Types...> it_types;
+public:
+    typedef typename std::make_signed<std::size_t>::type difference_type;
+    typedef std::tuple<typename std::iterator_traits<Types>::value_type...> value_type;
+#if __INTEL_COMPILER && __INTEL_COMPILER < 1800 && _MSC_VER
+    typedef std::tuple<typename std::iterator_traits<Types>::reference...> reference;
+#else
+    typedef tbb::internal::tuplewrapper<typename std::iterator_traits<Types>::reference...> reference;
+#endif
+    typedef std::tuple<typename std::iterator_traits<Types>::pointer...> pointer;
+    typedef std::random_access_iterator_tag iterator_category;
+
+    zip_iterator() : my_it() {}
+    explicit zip_iterator(Types... args) : my_it(std::make_tuple(args...)) {}
+    zip_iterator(const zip_iterator& input) : my_it(input.my_it) {}
+    zip_iterator& operator=(const zip_iterator& input) {
+        my_it = input.my_it;
+        return *this;
+    }
+
+    reference operator*() const {
+        return tbb::internal::make_references<reference>()(my_it, tbb::internal::make_index_sequence<num_types>());
+    }
+    reference operator[](difference_type i) const { return *(*this + i); }
+
+    difference_type operator-(const zip_iterator& it) const {
+        __TBB_ASSERT(internal::tuple_util<num_types>::check_sync(my_it, it.my_it, std::get<0>(my_it) - std::get<0>(it.my_it)),
+                     "Components of zip_iterator are not synchronous");
+        return std::get<0>(my_it) - std::get<0>(it.my_it);
+    }
+
+    zip_iterator& operator+=(difference_type forward) {
+        internal::tuple_util<num_types>::increment(my_it, forward);
+        return *this;
+    }
+    zip_iterator& operator-=(difference_type backward) { return *this += -backward; }
+    zip_iterator& operator++() { return *this += 1; }
+    zip_iterator& operator--() { return *this -= 1; }
+
+    zip_iterator operator++(int) {
+        zip_iterator it(*this);
+        ++(*this);
+        return it;
+    }
+    zip_iterator operator--(int) {
+        zip_iterator it(*this);
+        --(*this);
+        return it;
+    }
+
+    zip_iterator operator-(difference_type backward) const {
+        zip_iterator it(*this);
+        return it -= backward;
+    }
+    zip_iterator operator+(difference_type forward) const {
+        zip_iterator it(*this);
+        return it += forward;
+    }
+    friend zip_iterator operator+(difference_type forward, const zip_iterator& it) { return it + forward; }
+
+    bool operator==(const zip_iterator& it) const {
+        return *this - it == 0;
+    }
+    it_types base() const { return my_it; }
+
+    bool operator!=(const zip_iterator& it) const { return !(*this == it); }
+    bool operator<(const zip_iterator& it) const { return *this - it < 0; }
+    bool operator>(const zip_iterator& it) const { return it < *this; }
+    bool operator<=(const zip_iterator& it) const { return !(*this > it); }
+    bool operator>=(const zip_iterator& it) const { return !(*this < it); }
+private:
+    it_types my_it;
+};
+
+template<typename... T>
+zip_iterator<T...> make_zip_iterator(T... args) { return zip_iterator<T...>(args...); }
+
+template <typename UnaryFunc, typename Iter>
+class transform_iterator {
+public:
+    typedef typename std::iterator_traits<Iter>::value_type value_type;
+    typedef typename std::iterator_traits<Iter>::difference_type difference_type;
+#if __TBB_CPP17_INVOKE_RESULT_PRESENT
+    typedef typename std::invoke_result<UnaryFunc, typename std::iterator_traits<Iter>::reference>::type reference;
+#else
+    typedef typename std::result_of<UnaryFunc(typename std::iterator_traits<Iter>::reference)>::type reference;
+#endif
+    typedef typename std::iterator_traits<Iter>::pointer pointer;
+    typedef typename std::random_access_iterator_tag iterator_category;
+
+    transform_iterator(Iter it, UnaryFunc unary_func) : my_it(it), my_unary_func(unary_func) {
+        __TBB_STATIC_ASSERT((std::is_same<typename std::iterator_traits<Iter>::iterator_category,
+                             std::random_access_iterator_tag>::value), "Random access iterator required.");
+    }
+    transform_iterator(const transform_iterator& input) : my_it(input.my_it), my_unary_func(input.my_unary_func) { }
+    transform_iterator& operator=(const transform_iterator& input) {
+        my_it = input.my_it;
+        return *this;
+    }
+    reference operator*() const {
+        return my_unary_func(*my_it);
+    }
+    reference operator[](difference_type i) const {
+        return *(*this + i);
+    }
+    transform_iterator& operator++() {
+        ++my_it;
+        return *this;
+    }
+    transform_iterator& operator--() {
+        --my_it;
+        return *this;
+    }
+    transform_iterator operator++(int) {
+        transform_iterator it(*this);
+        ++(*this);
+        return it;
+    }
+    transform_iterator operator--(int) {
+        transform_iterator it(*this);
+        --(*this);
+        return it;
+    }
+    transform_iterator operator+(difference_type forward) const {
+        return { my_it + forward, my_unary_func };
+    }
+    transform_iterator operator-(difference_type backward) const {
+        return { my_it - backward, my_unary_func };
+    }
+    transform_iterator& operator+=(difference_type forward) {
+        my_it += forward;
+        return *this;
+    }
+    transform_iterator& operator-=(difference_type backward) {
+        my_it -= backward;
+        return *this;
+    }
+    friend transform_iterator operator+(difference_type forward, const transform_iterator& it) {
+        return it + forward;
+    }
+    difference_type operator-(const transform_iterator& it) const {
+        return my_it - it.my_it;
+    }
+    bool operator==(const transform_iterator& it) const { return *this - it == 0; }
+    bool operator!=(const transform_iterator& it) const { return !(*this == it); }
+    bool operator<(const transform_iterator& it) const { return *this - it < 0; }
+    bool operator>(const transform_iterator& it) const { return it < *this; }
+    bool operator<=(const transform_iterator& it) const { return !(*this > it); }
+    bool operator>=(const transform_iterator& it) const { return !(*this < it); }
+
+    Iter base() const { return my_it; }
+private:
+    Iter my_it;
+    const UnaryFunc my_unary_func;
+};
+
+template<typename UnaryFunc, typename Iter>
+transform_iterator<UnaryFunc, Iter> make_transform_iterator(Iter it, UnaryFunc unary_func) {
+    return transform_iterator<UnaryFunc, Iter>(it, unary_func);
+}
+
+} //namespace tbb
+
+#endif //__TBB_CPP11_PRESENT
+
+#endif /* __TBB_iterators_H */
--- a/cs440-acg/ext/tbb/include/tbb/machine/gcc_arm.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/gcc_arm.h
@@ -0,0 +1,216 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+/*
+    Platform isolation layer for the ARMv7-a architecture.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not include this file directly; include tbb_machine.h instead
+#endif
+
+#if __ARM_ARCH_7A__
+
+#include <sys/param.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE 4
+
+// Traditionally ARM is little-endian.
+// Note that, since only the layout of aligned 32-bit words is of interest,
+// any apparent PDP-endianness of 32-bit words at half-word alignment or
+// any little-endian ordering of big-endian 32-bit words in 64-bit quantities
+// may be disregarded for this setting.
+#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
+#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+#elif defined(__BYTE_ORDER__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
+#else
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
+#endif
+
+
+#define __TBB_compiler_fence()    __asm__ __volatile__("": : :"memory")
+#define __TBB_full_memory_fence() __asm__ __volatile__("dmb ish": : :"memory")
+#define __TBB_control_consistency_helper() __TBB_full_memory_fence()
+#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
+#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
+
+//--------------------------------------------------
+// Compare and swap
+//--------------------------------------------------
+
+/**
+ * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ * @return value originally in memory at ptr, regardless of success
+*/
+static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand )
+{
+    int32_t oldval, res;
+
+    __TBB_full_memory_fence();
+
+    do {
+    __asm__ __volatile__(
+        "ldrex      %1, [%3]\n"
+        "mov        %0, #0\n"
+        "cmp        %1, %4\n"
+        "it         eq\n"
+        "strexeq    %0, %5, [%3]\n"
+        : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int32_t*)ptr)
+        : "r" ((volatile int32_t *)ptr), "Ir" (comparand), "r" (value)
+        : "cc");
+    } while (res);
+
+    __TBB_full_memory_fence();
+
+    return oldval;
+}
+
+/**
+ * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ * @return value originally in memory at ptr, regardless of success
+ */
+static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t oldval;
+    int32_t res;
+
+    __TBB_full_memory_fence();
+
+    do {
+        __asm__ __volatile__(
+            "mov        %0, #0\n"
+            "ldrexd     %1, %H1, [%3]\n"
+            "cmp        %1, %4\n"
+            "it         eq\n"
+            "cmpeq      %H1, %H4\n"
+            "it         eq\n"
+            "strexdeq   %0, %5, %H5, [%3]"
+        : "=&r" (res), "=&r" (oldval), "+Qo" (*(volatile int64_t*)ptr)
+        : "r" ((volatile int64_t *)ptr), "r" (comparand), "r" (value)
+        : "cc");
+    } while (res);
+
+    __TBB_full_memory_fence();
+
+    return oldval;
+}
+
+static inline int32_t __TBB_machine_fetchadd4(volatile void* ptr, int32_t addend)
+{
+    unsigned long tmp;
+    int32_t result, tmp2;
+
+    __TBB_full_memory_fence();
+
+    __asm__ __volatile__(
+"1:     ldrex   %0, [%4]\n"
+"       add     %3, %0, %5\n"
+"       strex   %1, %3, [%4]\n"
+"       cmp     %1, #0\n"
+"       bne     1b\n"
+    : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int32_t*)ptr), "=&r"(tmp2)
+    : "r" ((volatile int32_t *)ptr), "Ir" (addend)
+    : "cc");
+
+    __TBB_full_memory_fence();
+
+    return result;
+}
+
+static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
+{
+    unsigned long tmp;
+    int64_t result, tmp2;
+
+    __TBB_full_memory_fence();
+
+    __asm__ __volatile__(
+"1:     ldrexd  %0, %H0, [%4]\n"
+"       adds    %3, %0, %5\n"
+"       adc     %H3, %H0, %H5\n"
+"       strexd  %1, %3, %H3, [%4]\n"
+"       cmp     %1, #0\n"
+"       bne     1b"
+    : "=&r" (result), "=&r" (tmp), "+Qo" (*(volatile int64_t*)ptr), "=&r"(tmp2)
+    : "r" ((volatile int64_t *)ptr), "r" (addend)
+    : "cc");
+
+
+    __TBB_full_memory_fence();
+
+    return result;
+}
+
+namespace tbb {
+namespace internal {
+    template <typename T, size_t S>
+    struct machine_load_store_relaxed {
+        static inline T load ( const volatile T& location ) {
+            const T value = location;
+
+            /*
+            * An extra memory barrier is required for errata #761319
+            * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
+            */
+            __TBB_acquire_consistency_helper();
+            return value;
+        }
+
+        static inline void store ( volatile T& location, T value ) {
+            location = value;
+        }
+    };
+}} // namespaces internal, tbb
+
+// Machine specific atomic operations
+
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+
+// Use generics for some things
+#define __TBB_USE_GENERIC_PART_WORD_CAS                         1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD                   1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE                 1
+#define __TBB_USE_GENERIC_FETCH_STORE                           1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_DWORD_LOAD_STORE                      1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE     1
+#elif defined __aarch64__
+// Generic gcc implementations are fine for ARMv8-a except __TBB_PAUSE.
+#include "gcc_generic.h"
+#else
+#error compilation requires an ARMv7-a or ARMv8-a architecture.
+#endif // __ARM_ARCH_7A__
+
+inline void __TBB_machine_pause (int32_t delay)
+{
+    while(delay>0)
+    {
+        __asm__ __volatile__("yield" ::: "memory");
+        delay--;
+    }
+}
+#define __TBB_Pause(V) __TBB_machine_pause(V)
--- a/cs440-acg/ext/tbb/include/tbb/machine/gcc_generic.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/gcc_generic.h
@@ -0,0 +1,233 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_generic_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_gcc_generic_H
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE __SIZEOF_POINTER__
+
+#if __TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
+    #define __TBB_64BIT_ATOMICS 0
+#endif
+
+/** FPU control setting not available for non-Intel architectures on Android **/
+#if __ANDROID__ && __TBB_generic_arch
+    #define __TBB_CPU_CTL_ENV_PRESENT 0
+#endif
+
+// __BYTE_ORDER__ is used in accordance with http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html,
+// but __BIG_ENDIAN__ or __LITTLE_ENDIAN__ may be more commonly found instead.
+#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
+#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+#elif defined(__BYTE_ORDER__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
+#else
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
+#endif
+
+#if __TBB_GCC_VERSION < 40700
+// Use __sync_* builtins
+
+/** As this generic implementation has absolutely no information about underlying
+    hardware, its performance most likely will be sub-optimal because of full memory
+    fence usages where a more lightweight synchronization means (or none at all)
+    could suffice. Thus if you use this header to enable TBB on a new platform,
+    consider forking it and relaxing below helpers as appropriate. **/
+#define __TBB_acquire_consistency_helper()  __sync_synchronize()
+#define __TBB_release_consistency_helper()  __sync_synchronize()
+#define __TBB_full_memory_fence()           __sync_synchronize()
+#define __TBB_control_consistency_helper()  __sync_synchronize()
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                                         \
+inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                    \
+    return __sync_val_compare_and_swap(reinterpret_cast<volatile T *>(ptr),comparand,value);      \
+}                                                                                                 \
+inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                               \
+    return __sync_fetch_and_add(reinterpret_cast<volatile T *>(ptr),value);                       \
+}
+
+#define __TBB_USE_GENERIC_FETCH_STORE 1
+
+#else
+// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7
+
+#define __TBB_compiler_fence()              __asm__ __volatile__("": : :"memory")
+// Acquire and release fence intrinsics in GCC might miss compiler fence.
+// Adding it at both sides of an intrinsic, as we do not know what reordering can be made.
+#define __TBB_acquire_consistency_helper()  __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_ACQUIRE); __TBB_compiler_fence()
+#define __TBB_release_consistency_helper()  __TBB_compiler_fence(); __atomic_thread_fence(__ATOMIC_RELEASE); __TBB_compiler_fence()
+#define __TBB_full_memory_fence()           __atomic_thread_fence(__ATOMIC_SEQ_CST)
+#define __TBB_control_consistency_helper()  __TBB_acquire_consistency_helper()
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                                         \
+inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                    \
+    (void)__atomic_compare_exchange_n(reinterpret_cast<volatile T *>(ptr), &comparand, value,     \
+                                      false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);                 \
+    return comparand;                                                                             \
+}                                                                                                 \
+inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                               \
+    return __atomic_fetch_add(reinterpret_cast<volatile T *>(ptr), value, __ATOMIC_SEQ_CST);      \
+}                                                                                                 \
+inline T __TBB_machine_fetchstore##S( volatile void *ptr, T value ) {                             \
+    return __atomic_exchange_n(reinterpret_cast<volatile T *>(ptr), value, __ATOMIC_SEQ_CST);     \
+}
+
+#endif // __TBB_GCC_VERSION < 40700
+
+__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t)
+__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t)
+__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t)
+__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t)
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+typedef unsigned char __TBB_Flag;
+typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
+
+#if __TBB_GCC_VERSION < 40700
+// Use __sync_* builtins
+
+// Use generic machine_load_store functions if there are no builtin atomics
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
+    __sync_fetch_and_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
+    __sync_fetch_and_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend);
+}
+
+inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
+    return __sync_lock_test_and_set(&flag,1)==0;
+}
+
+inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
+    __sync_lock_release(&flag);
+}
+
+#else
+// __TBB_GCC_VERSION >= 40700; use __atomic_* builtins available since gcc 4.7
+
+static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
+    __atomic_fetch_or(reinterpret_cast<volatile uintptr_t *>(ptr),addend,__ATOMIC_SEQ_CST);
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
+    __atomic_fetch_and(reinterpret_cast<volatile uintptr_t *>(ptr),addend,__ATOMIC_SEQ_CST);
+}
+
+inline bool __TBB_machine_try_lock_byte( __TBB_atomic_flag &flag ) {
+    return !__atomic_test_and_set(&flag,__ATOMIC_ACQUIRE);
+}
+
+inline void __TBB_machine_unlock_byte( __TBB_atomic_flag &flag ) {
+    __atomic_clear(&flag,__ATOMIC_RELEASE);
+}
+
+namespace tbb { namespace internal {
+
+/** GCC atomic operation intrinsics might miss compiler fence.
+    Adding it after load-with-acquire, before store-with-release, and
+    on both sides of sequentially consistent operations is sufficient for correctness. **/
+
+template <typename T, int MemOrder>
+inline T __TBB_machine_atomic_load( const volatile T& location) {
+    if (MemOrder == __ATOMIC_SEQ_CST) __TBB_compiler_fence();
+    T value = __atomic_load_n(&location, MemOrder);
+    if (MemOrder != __ATOMIC_RELAXED) __TBB_compiler_fence();
+    return value;
+}
+
+template <typename T, int MemOrder>
+inline void __TBB_machine_atomic_store( volatile T& location, T value) {
+    if (MemOrder != __ATOMIC_RELAXED) __TBB_compiler_fence();
+    __atomic_store_n(&location, value, MemOrder);
+    if (MemOrder == __ATOMIC_SEQ_CST) __TBB_compiler_fence();
+}
+
+template <typename T, size_t S>
+struct machine_load_store {
+    static T load_with_acquire ( const volatile T& location ) {
+        return __TBB_machine_atomic_load<T, __ATOMIC_ACQUIRE>(location);
+    }
+    static void store_with_release ( volatile T &location, T value ) {
+        __TBB_machine_atomic_store<T, __ATOMIC_RELEASE>(location, value);
+    }
+};
+
+template <typename T, size_t S>
+struct machine_load_store_relaxed {
+    static inline T load ( const volatile T& location ) {
+        return __TBB_machine_atomic_load<T, __ATOMIC_RELAXED>(location);
+    }
+    static inline void store ( volatile T& location, T value ) {
+        __TBB_machine_atomic_store<T, __ATOMIC_RELAXED>(location, value);
+    }
+};
+
+template <typename T, size_t S>
+struct machine_load_store_seq_cst {
+    static T load ( const volatile T& location ) {
+        return __TBB_machine_atomic_load<T, __ATOMIC_SEQ_CST>(location);
+    }
+    static void store ( volatile T &location, T value ) {
+        __TBB_machine_atomic_store<T, __ATOMIC_SEQ_CST>(location, value);
+    }
+};
+
+}} // namespace tbb::internal
+
+#endif // __TBB_GCC_VERSION < 40700
+
+// Machine specific atomic operations
+#define __TBB_AtomicOR(P,V)     __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V)    __TBB_machine_and(P,V)
+
+#define __TBB_TryLockByte   __TBB_machine_try_lock_byte
+#define __TBB_UnlockByte    __TBB_machine_unlock_byte
+
+// __builtin_clz counts the number of leading zeroes
+namespace tbb{ namespace internal { namespace gcc_builtins {
+    inline int clz(unsigned int x){ return __builtin_clz(x); }
+    inline int clz(unsigned long int x){ return __builtin_clzl(x); }
+    inline int clz(unsigned long long int x){ return __builtin_clzll(x); }
+}}}
+// logarithm is the index of the most significant non-zero bit
+static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
+    // If P is a power of 2 and x<P, then (P-1)-x == (P-1) XOR x
+    return (sizeof(x)*8 - 1) ^ tbb::internal::gcc_builtins::clz(x);
+}
+
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
+
+#if __TBB_WORDSIZE==4
+    #define __TBB_USE_GENERIC_DWORD_LOAD_STORE              1
+#endif
+
+#if __TBB_x86_32 || __TBB_x86_64
+#include "gcc_ia32_common.h"
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/machine/gcc_ia32_common.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/gcc_ia32_common.h
@@ -0,0 +1,109 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_machine_gcc_ia32_common_H
+#define __TBB_machine_gcc_ia32_common_H
+
+#ifndef __TBB_Log2
+//TODO: Add a higher-level function, e.g. tbb::internal::log2(), into tbb_stddef.h, which
+//uses __TBB_Log2 and contains the assert and remove the assert from here and all other
+//platform-specific headers.
+template <typename T>
+static inline intptr_t __TBB_machine_lg( T x ) {
+    __TBB_ASSERT(x>0, "The logarithm of a non-positive value is undefined.");
+    uintptr_t j, i = x;
+    __asm__("bsr %1,%0" : "=r"(j) : "r"(i));
+    return j;
+}
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
+#endif /* !__TBB_Log2 */
+
+#ifndef __TBB_Pause
+//TODO: check if raising a ratio of pause instructions to loop control instructions
+//(via e.g. loop unrolling) gives any benefit for HT.  E.g, the current implementation
+//does about 2 CPU-consuming instructions for every pause instruction.  Perhaps for
+//high pause counts it should use an unrolled loop to raise the ratio, and thus free
+//up more integer cycles for the other hyperthread.  On the other hand, if the loop is
+//unrolled too far, it won't fit in the core's loop cache, and thus take away
+//instruction decode slots from the other hyperthread.
+
+//TODO: check if use of gcc __builtin_ia32_pause intrinsic gives a "some how" better performing code
+static inline void __TBB_machine_pause( int32_t delay ) {
+    for (int32_t i = 0; i < delay; i++) {
+       __asm__ __volatile__("pause;");
+    }
+    return;
+}
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#endif /* !__TBB_Pause */
+
+namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
+static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
+#if __INTEL_COMPILER
+    return _rdtsc();
+#else
+    tbb::internal::uint32_t hi, lo;
+    __asm__ __volatile__("rdtsc" : "=d"(hi), "=a"(lo));
+    return (tbb::internal::machine_tsc_t( hi ) << 32) | lo;
+#endif
+}
+#define __TBB_time_stamp() __TBB_machine_time_stamp()
+
+// API to retrieve/update FPU control setting
+#ifndef __TBB_CPU_CTL_ENV_PRESENT
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+namespace tbb {
+namespace internal {
+class cpu_ctl_env {
+private:
+    int     mxcsr;
+    short   x87cw;
+    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
+public:
+    bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
+    void get_env() {
+    #if __TBB_ICC_12_0_INL_ASM_FSTCW_BROKEN
+        cpu_ctl_env loc_ctl;
+        __asm__ __volatile__ (
+                "stmxcsr %0\n\t"
+                "fstcw %1"
+                : "=m"(loc_ctl.mxcsr), "=m"(loc_ctl.x87cw)
+        );
+        *this = loc_ctl;
+    #else
+        __asm__ __volatile__ (
+                "stmxcsr %0\n\t"
+                "fstcw %1"
+                : "=m"(mxcsr), "=m"(x87cw)
+        );
+    #endif
+        mxcsr &= MXCSR_CONTROL_MASK;
+    }
+    void set_env() const {
+        __asm__ __volatile__ (
+                "ldmxcsr %0\n\t"
+                "fldcw %1"
+                : : "m"(mxcsr), "m"(x87cw)
+        );
+    }
+};
+} // namespace internal
+} // namespace tbb
+#endif /* !__TBB_CPU_CTL_ENV_PRESENT */
+
+#include "gcc_itsx.h"
+
+#endif /* __TBB_machine_gcc_ia32_common_H */
--- a/cs440-acg/ext/tbb/include/tbb/machine/gcc_itsx.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/gcc_itsx.h
@@ -0,0 +1,119 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_itsx_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_gcc_itsx_H
+
+#define __TBB_OP_XACQUIRE 0xF2
+#define __TBB_OP_XRELEASE 0xF3
+#define __TBB_OP_LOCK     0xF0
+
+#define __TBB_STRINGIZE_INTERNAL(arg) #arg
+#define __TBB_STRINGIZE(arg) __TBB_STRINGIZE_INTERNAL(arg)
+
+#ifdef __TBB_x86_64
+#define __TBB_r_out "=r"
+#else
+#define __TBB_r_out "=q"
+#endif
+
+inline static uint8_t __TBB_machine_try_lock_elided( volatile uint8_t* lk )
+{
+    uint8_t value = 1;
+    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XACQUIRE)"; lock; xchgb %0, %1;"
+                      : __TBB_r_out(value), "=m"(*lk)  : "0"(value), "m"(*lk) : "memory" );
+    return uint8_t(value^1);
+}
+
+inline static void __TBB_machine_try_lock_elided_cancel()
+{
+    // 'pause' instruction aborts HLE/RTM transactions
+    __asm__ volatile ("pause\n" : : : "memory" );
+}
+
+inline static void __TBB_machine_unlock_elided( volatile uint8_t* lk )
+{
+    __asm__ volatile (".byte " __TBB_STRINGIZE(__TBB_OP_XRELEASE)"; movb $0, %0"
+                      : "=m"(*lk) : "m"(*lk) : "memory" );
+}
+
+#if __TBB_TSX_INTRINSICS_PRESENT
+#include <immintrin.h>
+
+#define __TBB_machine_is_in_transaction _xtest
+#define __TBB_machine_begin_transaction _xbegin
+#define __TBB_machine_end_transaction   _xend
+#define __TBB_machine_transaction_conflict_abort() _xabort(0xff)
+
+#else
+
+/*!
+ * Check if the instruction is executed in a transaction or not
+ */
+inline static bool __TBB_machine_is_in_transaction()
+{
+    int8_t res = 0;
+#if __TBB_x86_32
+    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
+                      "setz %0" : "=q"(res) : : "memory" );
+#else
+    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD6;\n"
+                      "setz %0" : "=r"(res) : : "memory" );
+#endif
+    return res==0;
+}
+
+/*!
+ * Enter speculative execution mode.
+ * @return -1 on success
+ *         abort cause ( or 0 ) on abort
+ */
+inline static uint32_t __TBB_machine_begin_transaction()
+{
+    uint32_t res = ~uint32_t(0);   // success value
+    __asm__ volatile ("1: .byte  0xC7; .byte 0xF8;\n"           //  XBEGIN <abort-offset>
+                      "   .long  2f-1b-6\n"                     //  2f-1b == difference in addresses of start
+                                                                //  of XBEGIN and the MOVL
+                                                                //  2f - 1b - 6 == that difference minus the size of the
+                                                                //  XBEGIN instruction.  This is the abort offset to
+                                                                //  2: below.
+                      "    jmp   3f\n"                          //  success (leave -1 in res)
+                      "2:  movl  %%eax,%0\n"                    //  store failure code in res
+                      "3:"
+                      :"=r"(res):"0"(res):"memory","%eax");
+    return res;
+}
+
+/*!
+ * Attempt to commit/end transaction
+ */
+inline static void __TBB_machine_end_transaction()
+{
+    __asm__ volatile (".byte 0x0F; .byte 0x01; .byte 0xD5" :::"memory");   // XEND
+}
+
+/*
+ * aborts with code 0xFF (lock already held)
+ */
+inline static void __TBB_machine_transaction_conflict_abort()
+{
+    __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte 0xFF" :::"memory");
+}
+
+#endif /* __TBB_TSX_INTRINSICS_PRESENT */
--- a/cs440-acg/ext/tbb/include/tbb/machine/ibm_aix51.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/ibm_aix51.h
@@ -0,0 +1,66 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+// TODO: revise by comparing with mac_ppc.h
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_ibm_aix51_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_ibm_aix51_H
+
+#define __TBB_WORDSIZE 8
+#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG // assumption based on operating system
+
+#include <stdint.h>
+#include <unistd.h>
+#include <sched.h>
+
+extern "C" {
+int32_t __TBB_machine_cas_32 (volatile void* ptr, int32_t value, int32_t comparand);
+int64_t __TBB_machine_cas_64 (volatile void* ptr, int64_t value, int64_t comparand);
+void __TBB_machine_flush ();
+void __TBB_machine_lwsync ();
+void __TBB_machine_isync ();
+}
+
+// Mapping of old entry point names retained for the sake of backward binary compatibility
+#define __TBB_machine_cmpswp4 __TBB_machine_cas_32
+#define __TBB_machine_cmpswp8 __TBB_machine_cas_64
+
+#define __TBB_Yield() sched_yield()
+
+#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
+#define __TBB_USE_GENERIC_FETCH_ADD                         1
+#define __TBB_USE_GENERIC_FETCH_STORE                       1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+#if __GNUC__
+    #define __TBB_control_consistency_helper() __asm__ __volatile__( "isync": : :"memory")
+    #define __TBB_acquire_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
+    #define __TBB_release_consistency_helper() __asm__ __volatile__("lwsync": : :"memory")
+    #define __TBB_full_memory_fence()          __asm__ __volatile__(  "sync": : :"memory")
+#else
+    // IBM C++ Compiler does not support inline assembly
+    // TODO: Since XL 9.0 or earlier GCC syntax is supported. Replace with more
+    //       lightweight implementation (like in mac_ppc.h)
+    #define __TBB_control_consistency_helper() __TBB_machine_isync ()
+    #define __TBB_acquire_consistency_helper() __TBB_machine_lwsync ()
+    #define __TBB_release_consistency_helper() __TBB_machine_lwsync ()
+    #define __TBB_full_memory_fence()          __TBB_machine_flush ()
+#endif
--- a/cs440-acg/ext/tbb/include/tbb/machine/icc_generic.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/icc_generic.h
@@ -0,0 +1,258 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_icc_generic_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if ! __TBB_ICC_BUILTIN_ATOMICS_PRESENT
+    #error "Intel(R) C++ Compiler of at least 12.0 version is needed to use ICC intrinsics port"
+#endif
+
+#define __TBB_machine_icc_generic_H
+
+//ICC mimics the "native" target compiler
+#if _MSC_VER
+    #include "msvc_ia32_common.h"
+#else
+    #include "gcc_ia32_common.h"
+#endif
+
+//TODO: Make __TBB_WORDSIZE macro optional for ICC intrinsics port.
+//As compiler intrinsics are used for all the operations it is possible to do.
+
+#if __TBB_x86_32
+    #define __TBB_WORDSIZE 4
+#else
+    #define __TBB_WORDSIZE 8
+#endif
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+//__TBB_compiler_fence() defined just in case, as it seems not to be used on its own anywhere else
+#ifndef __TBB_compiler_fence
+#if _MSC_VER
+    //TODO: any way to use same intrinsics on windows and linux?
+    #pragma intrinsic(_ReadWriteBarrier)
+    #define __TBB_compiler_fence()    _ReadWriteBarrier()
+#else
+    #define __TBB_compiler_fence()    __asm__ __volatile__("": : :"memory")
+#endif
+#endif
+
+#ifndef __TBB_full_memory_fence
+#if _MSC_VER
+    //TODO: any way to use same intrinsics on windows and linux?
+    #pragma intrinsic(_mm_mfence)
+    #define __TBB_full_memory_fence() _mm_mfence()
+#else
+    #define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
+#endif
+#endif
+
+#ifndef __TBB_control_consistency_helper
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#endif
+
+namespace tbb { namespace internal {
+//TODO: is there any way to reuse definition of memory_order enum from ICC instead of copy paste.
+//however it seems unlikely that ICC will silently change exact enum values, as they are defined
+//in the ISO exactly like this.
+//TODO: add test that exact values of the enum are same as in the ISO C++11
+typedef enum memory_order {
+    memory_order_relaxed, memory_order_consume, memory_order_acquire,
+    memory_order_release, memory_order_acq_rel, memory_order_seq_cst
+} memory_order;
+
+namespace icc_intrinsics_port {
+    template <typename T>
+    T convert_argument(T value){
+        return value;
+    }
+    //The overload below is needed to have explicit conversion of pointer to void* in argument list.
+    //compiler bug?
+    //TODO: add according broken macro and recheck with ICC 13.0 if the overload is still needed
+    template <typename T>
+    void* convert_argument(T* value){
+        return (void*)value;
+    }
+}
+//TODO: code below is a bit repetitive, consider simplifying it
+template <typename T, size_t S>
+struct machine_load_store {
+    static T load_with_acquire ( const volatile T& location ) {
+        return __atomic_load_explicit(&location, memory_order_acquire);
+    }
+    static void store_with_release ( volatile T &location, T value ) {
+        __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
+    }
+};
+
+template <typename T, size_t S>
+struct machine_load_store_relaxed {
+    static inline T load ( const T& location ) {
+        return __atomic_load_explicit(&location, memory_order_relaxed);
+    }
+    static inline void store (  T& location, T value ) {
+        __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
+    }
+};
+
+template <typename T, size_t S>
+struct machine_load_store_seq_cst {
+    static T load ( const volatile T& location ) {
+        return __atomic_load_explicit(&location, memory_order_seq_cst);
+    }
+
+    static void store ( volatile T &location, T value ) {
+        __atomic_store_explicit(&location, value, memory_order_seq_cst);
+    }
+};
+
+}} // namespace tbb::internal
+
+namespace tbb{ namespace internal { namespace icc_intrinsics_port{
+    typedef enum memory_order_map {
+        relaxed = memory_order_relaxed,
+        acquire = memory_order_acquire,
+        release = memory_order_release,
+        full_fence=  memory_order_seq_cst
+    } memory_order_map;
+}}}// namespace tbb::internal
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,M)                                                     \
+inline T __TBB_machine_cmpswp##S##M( volatile void *ptr, T value, T comparand ) {               \
+    __atomic_compare_exchange_strong_explicit(                                                  \
+            (T*)ptr                                                                             \
+            ,&comparand                                                                         \
+            ,value                                                                              \
+            , tbb::internal::icc_intrinsics_port::M                                             \
+            , tbb::internal::icc_intrinsics_port::M);                                           \
+    return comparand;                                                                           \
+}                                                                                               \
+                                                                                                \
+inline T __TBB_machine_fetchstore##S##M(volatile void *ptr, T value) {                          \
+    return __atomic_exchange_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M);   \
+}                                                                                               \
+                                                                                                \
+inline T __TBB_machine_fetchadd##S##M(volatile void *ptr, T value) {                            \
+    return __atomic_fetch_add_explicit((T*)ptr, value, tbb::internal::icc_intrinsics_port::M);  \
+}                                                                                               \
+
+__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, full_fence)
+__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, acquire)
+__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, release)
+__TBB_MACHINE_DEFINE_ATOMICS(1,tbb::internal::int8_t, relaxed)
+
+__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, full_fence)
+__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, acquire)
+__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, release)
+__TBB_MACHINE_DEFINE_ATOMICS(2,tbb::internal::int16_t, relaxed)
+
+__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, full_fence)
+__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, acquire)
+__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, release)
+__TBB_MACHINE_DEFINE_ATOMICS(4,tbb::internal::int32_t, relaxed)
+
+__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, full_fence)
+__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, acquire)
+__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, release)
+__TBB_MACHINE_DEFINE_ATOMICS(8,tbb::internal::int64_t, relaxed)
+
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+#define __TBB_USE_FENCED_ATOMICS                            1
+
+namespace tbb { namespace internal {
+#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
+__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(full_fence)
+__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(full_fence)
+
+__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(acquire)
+__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(release)
+
+__TBB_MACHINE_DEFINE_LOAD8_GENERIC_FENCED(relaxed)
+__TBB_MACHINE_DEFINE_STORE8_GENERIC_FENCED(relaxed)
+
+template <typename T>
+struct machine_load_store<T,8> {
+    static T load_with_acquire ( const volatile T& location ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            return __atomic_load_explicit(&location, memory_order_acquire);
+        } else {
+            return __TBB_machine_generic_load8acquire(&location);
+        }
+    }
+    static void store_with_release ( volatile T &location, T value ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_release);
+        } else {
+            return __TBB_machine_generic_store8release(&location,value);
+        }
+    }
+};
+
+template <typename T>
+struct machine_load_store_relaxed<T,8> {
+    static T load( const volatile T& location ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            return __atomic_load_explicit(&location, memory_order_relaxed);
+        } else {
+            return __TBB_machine_generic_load8relaxed(&location);
+        }
+    }
+    static void store( volatile T &location, T value ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            __atomic_store_explicit(&location, icc_intrinsics_port::convert_argument(value), memory_order_relaxed);
+        } else {
+            return __TBB_machine_generic_store8relaxed(&location,value);
+        }
+    }
+};
+
+template <typename T >
+struct machine_load_store_seq_cst<T,8> {
+    static T load ( const volatile T& location ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            return __atomic_load_explicit(&location, memory_order_seq_cst);
+        } else {
+            return __TBB_machine_generic_load8full_fence(&location);
+        }
+
+    }
+
+    static void store ( volatile T &location, T value ) {
+        if( tbb::internal::is_aligned(&location,8)) {
+            __atomic_store_explicit(&location, value, memory_order_seq_cst);
+        } else {
+            return __TBB_machine_generic_store8full_fence(&location,value);
+        }
+
+    }
+};
+
+#endif
+}} // namespace tbb::internal
+template <typename T>
+inline void __TBB_machine_OR( T *operand, T addend ) {
+    __atomic_fetch_or_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
+}
+
+template <typename T>
+inline void __TBB_machine_AND( T *operand, T addend ) {
+    __atomic_fetch_and_explicit(operand, addend, tbb::internal::memory_order_seq_cst);
+}
+
--- a/cs440-acg/ext/tbb/include/tbb/machine/linux_common.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/linux_common.h
@@ -0,0 +1,105 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_machine_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#include <sched.h>
+#define __TBB_Yield()  sched_yield()
+
+#include <unistd.h>
+/* Futex definitions */
+#include <sys/syscall.h>
+
+#if defined(SYS_futex)
+/* This header file is included for Linux and some other systems that may support futexes.*/
+
+#define __TBB_USE_FUTEX 1
+
+#if defined(__has_include)
+#define __TBB_has_include __has_include
+#else
+#define __TBB_has_include(x) 0
+#endif
+
+/*
+If available, use typical headers where futex API is defined. While Linux and OpenBSD
+are known to provide such headers, other systems might have them as well.
+*/
+#if defined(__linux__) || __TBB_has_include(<linux/futex.h>)
+#include <linux/futex.h>
+#elif defined(__OpenBSD__) || __TBB_has_include(<sys/futex.h>)
+#include <sys/futex.h>
+#endif
+
+#include <limits.h>
+#include <errno.h>
+
+/*
+Some systems might not define the macros or use different names. In such case we expect
+the actual parameter values to match Linux: 0 for wait, 1 for wake.
+*/
+#if defined(FUTEX_WAIT_PRIVATE)
+#define __TBB_FUTEX_WAIT FUTEX_WAIT_PRIVATE
+#elif defined(FUTEX_WAIT)
+#define __TBB_FUTEX_WAIT FUTEX_WAIT
+#else
+#define __TBB_FUTEX_WAIT 0
+#endif
+
+#if defined(FUTEX_WAKE_PRIVATE)
+#define __TBB_FUTEX_WAKE FUTEX_WAKE_PRIVATE
+#elif defined(FUTEX_WAKE)
+#define __TBB_FUTEX_WAKE FUTEX_WAKE
+#else
+#define __TBB_FUTEX_WAKE 1
+#endif
+
+#ifndef __TBB_ASSERT
+#error machine specific headers must be included after tbb_stddef.h
+#endif
+
+namespace tbb {
+
+namespace internal {
+
+inline int futex_wait( void *futex, int comparand ) {
+    int r = syscall( SYS_futex,futex,__TBB_FUTEX_WAIT,comparand,NULL,NULL,0 );
+#if TBB_USE_ASSERT
+    int e = errno;
+    __TBB_ASSERT( r==0||r==EWOULDBLOCK||(r==-1&&(e==EAGAIN||e==EINTR)), "futex_wait failed." );
+#endif /* TBB_USE_ASSERT */
+    return r;
+}
+
+inline int futex_wakeup_one( void *futex ) {
+    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,1,NULL,NULL,0 );
+    __TBB_ASSERT( r==0||r==1, "futex_wakeup_one: more than one thread woken up?" );
+    return r;
+}
+
+inline int futex_wakeup_all( void *futex ) {
+    int r = ::syscall( SYS_futex,futex,__TBB_FUTEX_WAKE,INT_MAX,NULL,NULL,0 );
+    __TBB_ASSERT( r>=0, "futex_wakeup_all: error in waking up threads" );
+    return r;
+}
+
+} /* namespace internal */
+
+} /* namespace tbb */
+
+#endif /* SYS_futex */
--- a/cs440-acg/ext/tbb/include/tbb/machine/linux_ia32.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/linux_ia32.h
@@ -0,0 +1,228 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia32_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_linux_ia32_H
+
+#include <stdint.h>
+#include "gcc_ia32_common.h"
+
+#define __TBB_WORDSIZE 4
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+#define __TBB_release_consistency_helper() __TBB_compiler_fence()
+#define __TBB_full_memory_fence()          __asm__ __volatile__("mfence": : :"memory")
+
+#if __TBB_ICC_ASM_VOLATILE_BROKEN
+#define __TBB_VOLATILE
+#else
+#define __TBB_VOLATILE volatile
+#endif
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X,R)                                        \
+static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
+{                                                                                    \
+    T result;                                                                        \
+                                                                                     \
+    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
+                          : "=a"(result), "=m"(*(__TBB_VOLATILE T*)ptr)              \
+                          : "q"(value), "0"(comparand), "m"(*(__TBB_VOLATILE T*)ptr) \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
+                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)                \
+                          : "0"(addend), "m"(*(__TBB_VOLATILE T*)ptr)                \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
+                          : R (result), "=m"(*(__TBB_VOLATILE T*)ptr)                \
+                          : "0"(value), "m"(*(__TBB_VOLATILE T*)ptr)                 \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+
+__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"","=q")
+__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"","=r")
+__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"l","=r")
+
+#if __INTEL_COMPILER
+#pragma warning( push )
+// reference to EBX in a function requiring stack alignment
+#pragma warning( disable: 998 )
+#endif
+
+#if __TBB_GCC_CAS8_BUILTIN_INLINING_BROKEN
+#define  __TBB_IA32_CAS8_NOINLINE  __attribute__ ((noinline))
+#else
+#define  __TBB_IA32_CAS8_NOINLINE
+#endif
+
+static inline __TBB_IA32_CAS8_NOINLINE int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )  {
+//TODO: remove the extra part of condition once __TBB_GCC_BUILTIN_ATOMICS_PRESENT is lowered to gcc version 4.1.2
+#if (__TBB_GCC_BUILTIN_ATOMICS_PRESENT || (__TBB_GCC_VERSION >= 40102)) && !__TBB_GCC_64BIT_ATOMIC_BUILTINS_BROKEN
+    return __sync_val_compare_and_swap( reinterpret_cast<volatile int64_t*>(ptr), comparand, value );
+#else /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
+    //TODO: look like ICC 13.0 has some issues with this code, investigate it more deeply
+    int64_t result;
+    union {
+        int64_t i64;
+        int32_t i32[2];
+    };
+    i64 = value;
+#if __PIC__
+    /* compiling position-independent code */
+    // EBX register preserved for compliance with position-independent code rules on IA32
+    int32_t tmp;
+    __asm__ __volatile__ (
+            "movl  %%ebx,%2\n\t"
+            "movl  %5,%%ebx\n\t"
+#if __GNUC__==3
+            "lock\n\t cmpxchg8b %1\n\t"
+#else
+            "lock\n\t cmpxchg8b (%3)\n\t"
+#endif
+            "movl  %2,%%ebx"
+             : "=A"(result)
+             , "=m"(*(__TBB_VOLATILE int64_t *)ptr)
+             , "=m"(tmp)
+#if __GNUC__==3
+             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
+#else
+             : "SD"(ptr)
+#endif
+             , "0"(comparand)
+             , "m"(i32[0]), "c"(i32[1])
+             : "memory"
+#if __INTEL_COMPILER
+             ,"ebx"
+#endif
+    );
+#else /* !__PIC__ */
+    __asm__ __volatile__ (
+            "lock\n\t cmpxchg8b %1\n\t"
+             : "=A"(result), "=m"(*(__TBB_VOLATILE int64_t *)ptr)
+             : "m"(*(__TBB_VOLATILE int64_t *)ptr)
+             , "0"(comparand)
+             , "b"(i32[0]), "c"(i32[1])
+             : "memory"
+    );
+#endif /* __PIC__ */
+    return result;
+#endif /* !__TBB_GCC_BUILTIN_ATOMICS_PRESENT */
+}
+
+#undef __TBB_IA32_CAS8_NOINLINE
+
+#if __INTEL_COMPILER
+#pragma warning( pop )
+#endif // warning 998 is back
+
+static inline void __TBB_machine_or( volatile void *ptr, uint32_t addend ) {
+    __asm__ __volatile__("lock\norl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint32_t addend ) {
+    __asm__ __volatile__("lock\nandl %1,%0" : "=m"(*(__TBB_VOLATILE uint32_t *)ptr) : "r"(addend), "m"(*(__TBB_VOLATILE uint32_t *)ptr) : "memory");
+}
+
+//TODO: Check if it possible and profitable for IA-32 architecture on (Linux* and Windows*)
+//to use of 64-bit load/store via floating point registers together with full fence
+//for sequentially consistent load/store, instead of CAS.
+
+#if __clang__
+#define __TBB_fildq  "fildll"
+#define __TBB_fistpq "fistpll"
+#else
+#define __TBB_fildq  "fildq"
+#define __TBB_fistpq "fistpq"
+#endif
+
+static inline int64_t __TBB_machine_aligned_load8 (const volatile void *ptr) {
+    __TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_load8 should be used with 8 byte aligned locations only \n");
+    int64_t result;
+    __asm__ __volatile__ ( __TBB_fildq  " %1\n\t"
+                           __TBB_fistpq " %0" :  "=m"(result) : "m"(*(const __TBB_VOLATILE uint64_t*)ptr) : "memory" );
+    return result;
+}
+
+static inline void __TBB_machine_aligned_store8 (volatile void *ptr, int64_t value ) {
+    __TBB_ASSERT(tbb::internal::is_aligned(ptr,8),"__TBB_machine_aligned_store8 should be used with 8 byte aligned locations only \n");
+    // Aligned store
+    __asm__ __volatile__ ( __TBB_fildq  " %1\n\t"
+                           __TBB_fistpq " %0" :  "=m"(*(__TBB_VOLATILE int64_t*)ptr) : "m"(value) : "memory" );
+}
+
+static inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
+#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
+    if( tbb::internal::is_aligned(ptr,8)) {
+#endif
+        return __TBB_machine_aligned_load8(ptr);
+#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
+    } else {
+        // Unaligned load
+        return __TBB_machine_cmpswp8(const_cast<void*>(ptr),0,0);
+    }
+#endif
+}
+
+//! Handles misaligned 8-byte store
+/** Defined in tbb_misc.cpp */
+extern "C" void __TBB_machine_store8_slow( volatile void *ptr, int64_t value );
+extern "C" void __TBB_machine_store8_slow_perf_warning( volatile void *ptr );
+
+static inline void __TBB_machine_store8(volatile void *ptr, int64_t value) {
+#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
+    if( tbb::internal::is_aligned(ptr,8)) {
+#endif
+        __TBB_machine_aligned_store8(ptr,value);
+#if __TBB_FORCE_64BIT_ALIGNMENT_BROKEN
+    } else {
+        // Unaligned store
+#if TBB_USE_PERFORMANCE_WARNINGS
+        __TBB_machine_store8_slow_perf_warning(ptr);
+#endif /* TBB_USE_PERFORMANCE_WARNINGS */
+        __TBB_machine_store8_slow(ptr,value);
+    }
+#endif
+}
+
+// Machine specific atomic operations
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+#define __TBB_USE_GENERIC_DWORD_FETCH_ADD                   1
+#define __TBB_USE_GENERIC_DWORD_FETCH_STORE                 1
+#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
--- a/cs440-acg/ext/tbb/include/tbb/machine/linux_ia64.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/linux_ia64.h
@@ -0,0 +1,177 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_ia64_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_linux_ia64_H
+
+#include <stdint.h>
+#include <ia64intrin.h>
+
+#define __TBB_WORDSIZE 8
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+#if __INTEL_COMPILER
+    #define __TBB_compiler_fence()
+    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_acquire_consistency_helper()
+    #define __TBB_release_consistency_helper()
+    #define __TBB_full_memory_fence()          __mf()
+#else
+    #define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
+    #define __TBB_control_consistency_helper() __TBB_compiler_fence()
+    // Even though GCC imbues volatile loads with acquire semantics, it sometimes moves
+    // loads over the acquire fence. The following helpers stop such incorrect code motion.
+    #define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_release_consistency_helper() __TBB_compiler_fence()
+    #define __TBB_full_memory_fence()          __asm__ __volatile__("mf": : :"memory")
+#endif /* !__INTEL_COMPILER */
+
+// Most of the functions will be in a .s file
+// TODO: revise dynamic_link, memory pools and etc. if the library dependency is removed.
+
+extern "C" {
+    int8_t __TBB_machine_fetchadd1__TBB_full_fence (volatile void *ptr, int8_t addend);
+    int8_t __TBB_machine_fetchadd1acquire(volatile void *ptr, int8_t addend);
+    int8_t __TBB_machine_fetchadd1release(volatile void *ptr, int8_t addend);
+
+    int16_t __TBB_machine_fetchadd2__TBB_full_fence (volatile void *ptr, int16_t addend);
+    int16_t __TBB_machine_fetchadd2acquire(volatile void *ptr, int16_t addend);
+    int16_t __TBB_machine_fetchadd2release(volatile void *ptr, int16_t addend);
+
+    int32_t __TBB_machine_fetchadd4__TBB_full_fence (volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchadd4acquire(volatile void *ptr, int32_t addend);
+    int32_t __TBB_machine_fetchadd4release(volatile void *ptr, int32_t addend);
+
+    int64_t __TBB_machine_fetchadd8__TBB_full_fence (volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchadd8acquire(volatile void *ptr, int64_t addend);
+    int64_t __TBB_machine_fetchadd8release(volatile void *ptr, int64_t addend);
+
+    int8_t __TBB_machine_fetchstore1__TBB_full_fence (volatile void *ptr, int8_t value);
+    int8_t __TBB_machine_fetchstore1acquire(volatile void *ptr, int8_t value);
+    int8_t __TBB_machine_fetchstore1release(volatile void *ptr, int8_t value);
+
+    int16_t __TBB_machine_fetchstore2__TBB_full_fence (volatile void *ptr, int16_t value);
+    int16_t __TBB_machine_fetchstore2acquire(volatile void *ptr, int16_t value);
+    int16_t __TBB_machine_fetchstore2release(volatile void *ptr, int16_t value);
+
+    int32_t __TBB_machine_fetchstore4__TBB_full_fence (volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchstore4acquire(volatile void *ptr, int32_t value);
+    int32_t __TBB_machine_fetchstore4release(volatile void *ptr, int32_t value);
+
+    int64_t __TBB_machine_fetchstore8__TBB_full_fence (volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchstore8acquire(volatile void *ptr, int64_t value);
+    int64_t __TBB_machine_fetchstore8release(volatile void *ptr, int64_t value);
+
+    int8_t __TBB_machine_cmpswp1__TBB_full_fence (volatile void *ptr, int8_t value, int8_t comparand);
+    int8_t __TBB_machine_cmpswp1acquire(volatile void *ptr, int8_t value, int8_t comparand);
+    int8_t __TBB_machine_cmpswp1release(volatile void *ptr, int8_t value, int8_t comparand);
+
+    int16_t __TBB_machine_cmpswp2__TBB_full_fence (volatile void *ptr, int16_t value, int16_t comparand);
+    int16_t __TBB_machine_cmpswp2acquire(volatile void *ptr, int16_t value, int16_t comparand);
+    int16_t __TBB_machine_cmpswp2release(volatile void *ptr, int16_t value, int16_t comparand);
+
+    int32_t __TBB_machine_cmpswp4__TBB_full_fence (volatile void *ptr, int32_t value, int32_t comparand);
+    int32_t __TBB_machine_cmpswp4acquire(volatile void *ptr, int32_t value, int32_t comparand);
+    int32_t __TBB_machine_cmpswp4release(volatile void *ptr, int32_t value, int32_t comparand);
+
+    int64_t __TBB_machine_cmpswp8__TBB_full_fence (volatile void *ptr, int64_t value, int64_t comparand);
+    int64_t __TBB_machine_cmpswp8acquire(volatile void *ptr, int64_t value, int64_t comparand);
+    int64_t __TBB_machine_cmpswp8release(volatile void *ptr, int64_t value, int64_t comparand);
+
+    int64_t __TBB_machine_lg(uint64_t value);
+    void __TBB_machine_pause(int32_t delay);
+    bool __TBB_machine_trylockbyte( volatile unsigned char &ptr );
+    int64_t __TBB_machine_lockbyte( volatile unsigned char &ptr );
+
+    //! Retrieves the current RSE backing store pointer. IA64 specific.
+    void* __TBB_get_bsp();
+
+    int32_t __TBB_machine_load1_relaxed(const void *ptr);
+    int32_t __TBB_machine_load2_relaxed(const void *ptr);
+    int32_t __TBB_machine_load4_relaxed(const void *ptr);
+    int64_t __TBB_machine_load8_relaxed(const void *ptr);
+
+    void __TBB_machine_store1_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store2_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store4_relaxed(void *ptr, int32_t value);
+    void __TBB_machine_store8_relaxed(void *ptr, int64_t value);
+} // extern "C"
+
+// Mapping old entry points to the names corresponding to the new full_fence identifier.
+#define __TBB_machine_fetchadd1full_fence   __TBB_machine_fetchadd1__TBB_full_fence
+#define __TBB_machine_fetchadd2full_fence   __TBB_machine_fetchadd2__TBB_full_fence
+#define __TBB_machine_fetchadd4full_fence   __TBB_machine_fetchadd4__TBB_full_fence
+#define __TBB_machine_fetchadd8full_fence   __TBB_machine_fetchadd8__TBB_full_fence
+#define __TBB_machine_fetchstore1full_fence __TBB_machine_fetchstore1__TBB_full_fence
+#define __TBB_machine_fetchstore2full_fence __TBB_machine_fetchstore2__TBB_full_fence
+#define __TBB_machine_fetchstore4full_fence __TBB_machine_fetchstore4__TBB_full_fence
+#define __TBB_machine_fetchstore8full_fence __TBB_machine_fetchstore8__TBB_full_fence
+#define __TBB_machine_cmpswp1full_fence     __TBB_machine_cmpswp1__TBB_full_fence
+#define __TBB_machine_cmpswp2full_fence     __TBB_machine_cmpswp2__TBB_full_fence
+#define __TBB_machine_cmpswp4full_fence     __TBB_machine_cmpswp4__TBB_full_fence
+#define __TBB_machine_cmpswp8full_fence     __TBB_machine_cmpswp8__TBB_full_fence
+
+// Mapping relaxed operations to the entry points implementing them.
+/** On IA64 RMW operations implicitly have acquire semantics. Thus one cannot
+    actually have completely relaxed RMW operation here. **/
+#define __TBB_machine_fetchadd1relaxed      __TBB_machine_fetchadd1acquire
+#define __TBB_machine_fetchadd2relaxed      __TBB_machine_fetchadd2acquire
+#define __TBB_machine_fetchadd4relaxed      __TBB_machine_fetchadd4acquire
+#define __TBB_machine_fetchadd8relaxed      __TBB_machine_fetchadd8acquire
+#define __TBB_machine_fetchstore1relaxed    __TBB_machine_fetchstore1acquire
+#define __TBB_machine_fetchstore2relaxed    __TBB_machine_fetchstore2acquire
+#define __TBB_machine_fetchstore4relaxed    __TBB_machine_fetchstore4acquire
+#define __TBB_machine_fetchstore8relaxed    __TBB_machine_fetchstore8acquire
+#define __TBB_machine_cmpswp1relaxed        __TBB_machine_cmpswp1acquire
+#define __TBB_machine_cmpswp2relaxed        __TBB_machine_cmpswp2acquire
+#define __TBB_machine_cmpswp4relaxed        __TBB_machine_cmpswp4acquire
+#define __TBB_machine_cmpswp8relaxed        __TBB_machine_cmpswp8acquire
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,V)                               \
+    template <typename T>                                               \
+    struct machine_load_store_relaxed<T,S> {                      \
+        static inline T load ( const T& location ) {                    \
+            return (T)__TBB_machine_load##S##_relaxed(&location);       \
+        }                                                               \
+        static inline void store ( T& location, T value ) {             \
+            __TBB_machine_store##S##_relaxed(&location, (V)value);      \
+        }                                                               \
+    }
+
+namespace tbb {
+namespace internal {
+    __TBB_MACHINE_DEFINE_ATOMICS(1,int8_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(2,int16_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(4,int32_t);
+    __TBB_MACHINE_DEFINE_ATOMICS(8,int64_t);
+}} // namespaces internal, tbb
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+#define __TBB_USE_FENCED_ATOMICS                            1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+// Definition of Lock functions
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
+#define __TBB_LockByte(P)    __TBB_machine_lockbyte(P)
+
+// Definition of other utility functions
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
--- a/cs440-acg/ext/tbb/include/tbb/machine/linux_intel64.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/linux_intel64.h
@@ -0,0 +1,92 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_linux_intel64_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_linux_intel64_H
+
+#include <stdint.h>
+#include "gcc_ia32_common.h"
+
+#define __TBB_WORDSIZE 8
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+#define __TBB_compiler_fence() __asm__ __volatile__("": : :"memory")
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+#define __TBB_release_consistency_helper() __TBB_compiler_fence()
+
+#ifndef __TBB_full_memory_fence
+#define __TBB_full_memory_fence() __asm__ __volatile__("mfence": : :"memory")
+#endif
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,X)                                          \
+static inline T __TBB_machine_cmpswp##S (volatile void *ptr, T value, T comparand )  \
+{                                                                                    \
+    T result;                                                                        \
+                                                                                     \
+    __asm__ __volatile__("lock\ncmpxchg" X " %2,%1"                                  \
+                          : "=a"(result), "=m"(*(volatile T*)ptr)                    \
+                          : "q"(value), "0"(comparand), "m"(*(volatile T*)ptr)       \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline T __TBB_machine_fetchadd##S(volatile void *ptr, T addend)              \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxadd" X " %0,%1"                                     \
+                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
+                          : "0"(addend), "m"(*(volatile T*)ptr)                      \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+                                                                                     \
+static inline  T __TBB_machine_fetchstore##S(volatile void *ptr, T value)            \
+{                                                                                    \
+    T result;                                                                        \
+    __asm__ __volatile__("lock\nxchg" X " %0,%1"                                     \
+                          : "=r"(result),"=m"(*(volatile T*)ptr)                     \
+                          : "0"(value), "m"(*(volatile T*)ptr)                       \
+                          : "memory");                                               \
+    return result;                                                                   \
+}                                                                                    \
+
+__TBB_MACHINE_DEFINE_ATOMICS(1,int8_t,"")
+__TBB_MACHINE_DEFINE_ATOMICS(2,int16_t,"")
+__TBB_MACHINE_DEFINE_ATOMICS(4,int32_t,"")
+__TBB_MACHINE_DEFINE_ATOMICS(8,int64_t,"q")
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
+    __asm__ __volatile__("lock\norq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
+    __asm__ __volatile__("lock\nandq %1,%0" : "=m"(*(volatile uint64_t*)ptr) : "r"(value), "m"(*(volatile uint64_t*)ptr) : "memory");
+}
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
--- a/cs440-acg/ext/tbb/include/tbb/machine/mac_ppc.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/mac_ppc.h
@@ -0,0 +1,309 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_gcc_power_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_gcc_power_H
+
+#include <stdint.h>
+#include <unistd.h>
+
+// TODO: rename to gcc_power.h?
+// This file is for Power Architecture with compilers supporting GNU inline-assembler syntax (currently GNU g++ and IBM XL).
+// Note that XL V9.0 (sometimes?) has trouble dealing with empty input and/or clobber lists, so they should be avoided.
+
+#if __powerpc64__ || __ppc64__
+    // IBM XL documents __powerpc64__ (and __PPC64__).
+    // Apple documents __ppc64__ (with __ppc__ only on 32-bit).
+    #define __TBB_WORDSIZE 8
+#else
+    #define __TBB_WORDSIZE 4
+#endif
+
+// Traditionally Power Architecture is big-endian.
+// Little-endian could be just an address manipulation (compatibility with TBB not verified),
+// or normal little-endian (on more recent systems). Embedded PowerPC systems may support
+// page-specific endianness, but then one endianness must be hidden from TBB so that it still sees only one.
+#if __BIG_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
+#elif __LITTLE_ENDIAN__ || (defined(__BYTE_ORDER__) && __BYTE_ORDER__==__ORDER_LITTLE_ENDIAN__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+#elif defined(__BYTE_ORDER__)
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
+#else
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_DETECT
+#endif
+
+// On Power Architecture, (lock-free) 64-bit atomics require 64-bit hardware:
+#if __TBB_WORDSIZE==8
+    // Do not change the following definition, because TBB itself will use 64-bit atomics in 64-bit builds.
+    #define __TBB_64BIT_ATOMICS 1
+#elif __bgp__
+    // Do not change the following definition, because this is known 32-bit hardware.
+    #define __TBB_64BIT_ATOMICS 0
+#else
+    // To enable 64-bit atomics in 32-bit builds, set the value below to 1 instead of 0.
+    // You must make certain that the program will only use them on actual 64-bit hardware
+    // (which typically means that the entire program is only executed on such hardware),
+    // because their implementation involves machine instructions that are illegal elsewhere.
+    // The setting can be chosen independently per compilation unit,
+    // which also means that TBB itself does not need to be rebuilt.
+    // Alternatively (but only for the current architecture and TBB version),
+    // override the default as a predefined macro when invoking the compiler.
+    #ifndef __TBB_64BIT_ATOMICS
+    #define __TBB_64BIT_ATOMICS 0
+    #endif
+#endif
+
+inline int32_t __TBB_machine_cmpswp4 (volatile void *ptr, int32_t value, int32_t comparand )
+{
+    int32_t result;
+
+    __asm__ __volatile__("sync\n"
+                         "0:\n\t"
+                         "lwarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpw %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stwcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "isync"
+                         : [res]"=&r"(result)
+                         , "+m"(* (int32_t*) ptr)        /* redundant with "memory" */
+                         : [ptr]"r"(ptr)
+                         , [val]"r"(value)
+                         , [cmp]"r"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmp and/or stwcx. */
+                         );
+    return result;
+}
+
+#if __TBB_WORDSIZE==8
+
+inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t result;
+    __asm__ __volatile__("sync\n"
+                         "0:\n\t"
+                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "isync"
+                         : [res]"=&r"(result)
+                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
+                         : [ptr]"r"(ptr)
+                         , [val]"r"(value)
+                         , [cmp]"r"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmp and/or stdcx. */
+                         );
+    return result;
+}
+
+#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
+
+inline int64_t __TBB_machine_cmpswp8 (volatile void *ptr, int64_t value, int64_t comparand )
+{
+    int64_t result;
+    int64_t value_register, comparand_register, result_register; // dummy variables to allocate registers
+    __asm__ __volatile__("sync\n\t"
+                         "ld %[val],%[valm]\n\t"
+                         "ld %[cmp],%[cmpm]\n"
+                         "0:\n\t"
+                         "ldarx %[res],0,%[ptr]\n\t"     /* load w/ reservation */
+                         "cmpd %[res],%[cmp]\n\t"        /* compare against comparand */
+                         "bne- 1f\n\t"                   /* exit if not same */
+                         "stdcx. %[val],0,%[ptr]\n\t"    /* store new value */
+                         "bne- 0b\n"                     /* retry if reservation lost */
+                         "1:\n\t"                        /* the exit */
+                         "std %[res],%[resm]\n\t"
+                         "isync"
+                         : [resm]"=m"(result)
+                         , [res] "=&r"(   result_register)
+                         , [val] "=&r"(    value_register)
+                         , [cmp] "=&r"(comparand_register)
+                         , "+m"(* (int64_t*) ptr)        /* redundant with "memory" */
+                         : [ptr] "r"(ptr)
+                         , [valm]"m"(value)
+                         , [cmpm]"m"(comparand)
+                         : "memory"                      /* compiler full fence */
+                         , "cr0"                         /* clobbered by cmpd and/or stdcx. */
+                         );
+    return result;
+}
+
+#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
+
+#define __TBB_MACHINE_DEFINE_LOAD_STORE(S,ldx,stx,cmpx)                                                       \
+    template <typename T>                                                                                     \
+    struct machine_load_store<T,S> {                                                                          \
+        static inline T load_with_acquire(const volatile T& location) {                                       \
+            T result;                                                                                         \
+            __asm__ __volatile__(ldx " %[res],0(%[ptr])\n"                                                    \
+                                 "0:\n\t"                                                                     \
+                                 cmpx " %[res],%[res]\n\t"                                                    \
+                                 "bne- 0b\n\t"                                                                \
+                                 "isync"                                                                      \
+                                 : [res]"=r"(result)                                                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , "m"(location)       /* redundant with "memory" */                          \
+                                 : "memory"            /* compiler acquire fence */                           \
+                                 , "cr0"               /* clobbered by cmpw/cmpd */);                         \
+            return result;                                                                                    \
+        }                                                                                                     \
+        static inline void store_with_release(volatile T &location, T value) {                                \
+            __asm__ __volatile__("lwsync\n\t"                                                                 \
+                                 stx " %[val],0(%[ptr])"                                                      \
+                                 : "=m"(location)      /* redundant with "memory" */                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , [val]"r"(value)                                                            \
+                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);                \
+        }                                                                                                     \
+    };                                                                                                        \
+                                                                                                              \
+    template <typename T>                                                                                     \
+    struct machine_load_store_relaxed<T,S> {                                                                  \
+        static inline T load (const __TBB_atomic T& location) {                                               \
+            T result;                                                                                         \
+            __asm__ __volatile__(ldx " %[res],0(%[ptr])"                                                      \
+                                 : [res]"=r"(result)                                                          \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , "m"(location)                                                              \
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
+            return result;                                                                                    \
+        }                                                                                                     \
+        static inline void store (__TBB_atomic T &location, T value) {                                        \
+            __asm__ __volatile__(stx " %[val],0(%[ptr])"                                                      \
+                                 : "=m"(location)                                                             \
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */                       \
+                                 , [val]"r"(value)                                                            \
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/                            \
+        }                                                                                                     \
+    };
+
+namespace tbb {
+namespace internal {
+    __TBB_MACHINE_DEFINE_LOAD_STORE(1,"lbz","stb","cmpw")
+    __TBB_MACHINE_DEFINE_LOAD_STORE(2,"lhz","sth","cmpw")
+    __TBB_MACHINE_DEFINE_LOAD_STORE(4,"lwz","stw","cmpw")
+
+#if __TBB_WORDSIZE==8
+
+    __TBB_MACHINE_DEFINE_LOAD_STORE(8,"ld" ,"std","cmpd")
+
+#elif __TBB_64BIT_ATOMICS /* && __TBB_WORDSIZE==4 */
+
+    template <typename T>
+    struct machine_load_store<T,8> {
+        static inline T load_with_acquire(const volatile T& location) {
+            T result;
+            T result_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
+                                 "std %[res],%[resm]\n"
+                                 "0:\n\t"
+                                 "cmpd %[res],%[res]\n\t"
+                                 "bne- 0b\n\t"
+                                 "isync"
+                                 : [resm]"=m"(result)
+                                 , [res]"=&r"(result_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , "m"(location)       /* redundant with "memory" */
+                                 : "memory"            /* compiler acquire fence */
+                                 , "cr0"               /* clobbered by cmpd */);
+            return result;
+        }
+
+        static inline void store_with_release(volatile T &location, T value) {
+            T value_register; // dummy variable to allocate a register
+            __asm__ __volatile__("lwsync\n\t"
+                                 "ld %[val],%[valm]\n\t"
+                                 "std %[val],0(%[ptr])"
+                                 : "=m"(location)      /* redundant with "memory" */
+                                 , [val]"=&r"(value_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , [valm]"m"(value)
+                                 : "memory"/*compiler release fence*/ /*(cr0 not affected)*/);
+        }
+    };
+
+    struct machine_load_store_relaxed<T,8> {
+        static inline T load (const volatile T& location) {
+            T result;
+            T result_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[res],0(%[ptr])\n\t"
+                                 "std %[res],%[resm]"
+                                 : [resm]"=m"(result)
+                                 , [res]"=&r"(result_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , "m"(location)
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
+            return result;
+        }
+
+        static inline void store (volatile T &location, T value) {
+            T value_register; // dummy variable to allocate a register
+            __asm__ __volatile__("ld %[val],%[valm]\n\t"
+                                 "std %[val],0(%[ptr])"
+                                 : "=m"(location)
+                                 , [val]"=&r"(value_register)
+                                 : [ptr]"b"(&location) /* cannot use register 0 here */
+                                 , [valm]"m"(value)
+                                 ); /*(no compiler fence)*/ /*(cr0 not affected)*/
+        }
+    };
+    #define __TBB_machine_load_store_relaxed_8
+
+#endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
+
+}} // namespaces internal, tbb
+
+#undef __TBB_MACHINE_DEFINE_LOAD_STORE
+
+#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
+#define __TBB_USE_GENERIC_FETCH_ADD                         1
+#define __TBB_USE_GENERIC_FETCH_STORE                       1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+#define __TBB_control_consistency_helper() __asm__ __volatile__("isync": : :"memory")
+#define __TBB_full_memory_fence()          __asm__ __volatile__( "sync": : :"memory")
+
+static inline intptr_t __TBB_machine_lg( uintptr_t x ) {
+    __TBB_ASSERT(x, "__TBB_Log2(0) undefined");
+    // cntlzd/cntlzw starts counting at 2^63/2^31 (ignoring any higher-order bits), and does not affect cr0
+#if __TBB_WORDSIZE==8
+    __asm__ __volatile__ ("cntlzd %0,%0" : "+r"(x));
+    return 63-static_cast<intptr_t>(x);
+#else
+    __asm__ __volatile__ ("cntlzw %0,%0" : "+r"(x));
+    return 31-static_cast<intptr_t>(x);
+#endif
+}
+#define __TBB_Log2(V) __TBB_machine_lg(V)
+
+// Assumes implicit alignment for any 32-bit value
+typedef uint32_t __TBB_Flag;
+#define __TBB_Flag __TBB_Flag
+
+inline bool __TBB_machine_trylockbyte( __TBB_atomic __TBB_Flag &flag ) {
+    return __TBB_machine_cmpswp4(&flag,1,0)==0;
+}
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
--- a/cs440-acg/ext/tbb/include/tbb/machine/macos_common.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/macos_common.h
@@ -0,0 +1,129 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_macos_common_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_macos_common_H
+
+#include <sched.h>
+#define __TBB_Yield()  sched_yield()
+
+// __TBB_HardwareConcurrency
+
+#include <sys/types.h>
+#include <sys/sysctl.h>
+
+static inline int __TBB_macos_available_cpu() {
+    int name[2] = {CTL_HW, HW_AVAILCPU};
+    int ncpu;
+    size_t size = sizeof(ncpu);
+    sysctl( name, 2, &ncpu, &size, NULL, 0 );
+    return ncpu;
+}
+
+#define __TBB_HardwareConcurrency() __TBB_macos_available_cpu()
+
+#ifndef __TBB_full_memory_fence
+    // TBB has not recognized the architecture (none of the architecture abstraction
+    // headers was included).
+    #define __TBB_UnknownArchitecture 1
+#endif
+
+#if __TBB_UnknownArchitecture
+// Implementation of atomic operations based on OS provided primitives
+#include <libkern/OSAtomic.h>
+
+static inline int64_t __TBB_machine_cmpswp8_OsX(volatile void *ptr, int64_t value, int64_t comparand)
+{
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for macOS* atomics");
+    int64_t* address = (int64_t*)ptr;
+    while( !OSAtomicCompareAndSwap64Barrier(comparand, value, address) ){
+#if __TBB_WORDSIZE==8
+        int64_t snapshot = *address;
+#else
+        int64_t snapshot = OSAtomicAdd64( 0, address );
+#endif
+        if( snapshot!=comparand ) return snapshot;
+    }
+    return comparand;
+}
+
+#define __TBB_machine_cmpswp8 __TBB_machine_cmpswp8_OsX
+
+#endif /* __TBB_UnknownArchitecture */
+
+#if __TBB_UnknownArchitecture
+
+#ifndef __TBB_WORDSIZE
+#define __TBB_WORDSIZE __SIZEOF_POINTER__
+#endif
+
+#ifdef __TBB_ENDIANNESS
+    // Already determined based on hardware architecture.
+#elif __BIG_ENDIAN__
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
+#elif __LITTLE_ENDIAN__
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+#else
+    #define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
+#endif
+
+/** As this generic implementation has absolutely no information about underlying
+    hardware, its performance most likely will be sub-optimal because of full memory
+    fence usages where a more lightweight synchronization means (or none at all)
+    could suffice. Thus if you use this header to enable TBB on a new platform,
+    consider forking it and relaxing below helpers as appropriate. **/
+#define __TBB_control_consistency_helper() OSMemoryBarrier()
+#define __TBB_acquire_consistency_helper() OSMemoryBarrier()
+#define __TBB_release_consistency_helper() OSMemoryBarrier()
+#define __TBB_full_memory_fence()          OSMemoryBarrier()
+
+static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand)
+{
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for macOS atomics");
+    int32_t* address = (int32_t*)ptr;
+    while( !OSAtomicCompareAndSwap32Barrier(comparand, value, address) ){
+        int32_t snapshot = *address;
+        if( snapshot!=comparand ) return snapshot;
+    }
+    return comparand;
+}
+
+static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend)
+{
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,4), "address not properly aligned for macOS atomics");
+    return OSAtomicAdd32Barrier(addend, (int32_t*)ptr) - addend;
+}
+
+static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend)
+{
+    __TBB_ASSERT( tbb::internal::is_aligned(ptr,8), "address not properly aligned for macOS atomics");
+    return OSAtomicAdd64Barrier(addend, (int64_t*)ptr) - addend;
+}
+
+#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD               1
+#define __TBB_USE_GENERIC_FETCH_STORE                       1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#if __TBB_WORDSIZE == 4
+    #define __TBB_USE_GENERIC_DWORD_LOAD_STORE              1
+#endif
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+#endif /* __TBB_UnknownArchitecture */
--- a/cs440-acg/ext/tbb/include/tbb/machine/mic_common.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/mic_common.h
@@ -0,0 +1,53 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_mic_common_H
+#define __TBB_mic_common_H
+
+#ifndef __TBB_machine_H
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#if ! __TBB_DEFINE_MIC
+    #error mic_common.h should be included only when building for Intel(R) Many Integrated Core Architecture
+#endif
+
+#ifndef __TBB_PREFETCHING
+#define __TBB_PREFETCHING 1
+#endif
+#if __TBB_PREFETCHING
+#include <immintrin.h>
+#define __TBB_cl_prefetch(p) _mm_prefetch((const char*)p, _MM_HINT_T1)
+#define __TBB_cl_evict(p) _mm_clevict(p, _MM_HINT_T1)
+#endif
+
+/** Intel(R) Many Integrated Core Architecture does not support mfence and pause instructions **/
+#define __TBB_full_memory_fence() __asm__ __volatile__("lock; addl $0,(%%rsp)":::"memory")
+#define __TBB_Pause(x) _mm_delay_32(16*(x))
+#define __TBB_STEALING_PAUSE 1500/16
+#include <sched.h>
+#define __TBB_Yield() sched_yield()
+
+/** Specifics **/
+#define __TBB_STEALING_ABORT_ON_CONTENTION 1
+#define __TBB_YIELD2P 1
+#define __TBB_HOARD_NONLOCAL_TASKS 1
+
+#if ! ( __FreeBSD__ || __linux__ )
+    #error Intel(R) Many Integrated Core Compiler does not define __FreeBSD__ or __linux__ anymore. Check for the __TBB_XXX_BROKEN defined under __FreeBSD__ or __linux__.
+#endif /* ! ( __FreeBSD__ || __linux__ ) */
+
+#endif /* __TBB_mic_common_H */
--- a/cs440-acg/ext/tbb/include/tbb/machine/msvc_armv7.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/msvc_armv7.h
@@ -0,0 +1,167 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_msvc_armv7_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_msvc_armv7_H
+
+#include <intrin.h>
+#include <float.h>
+
+#define __TBB_WORDSIZE 4
+
+#define __TBB_ENDIANNESS __TBB_ENDIAN_UNSUPPORTED
+
+#if defined(TBB_WIN32_USE_CL_BUILTINS)
+// We can test this on _M_IX86
+#pragma intrinsic(_ReadWriteBarrier)
+#pragma intrinsic(_mm_mfence)
+#define __TBB_compiler_fence()    _ReadWriteBarrier()
+#define __TBB_full_memory_fence() _mm_mfence()
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+#define __TBB_release_consistency_helper() __TBB_compiler_fence()
+#else
+//Now __dmb(_ARM_BARRIER_SY) is used for both compiler and memory fences
+//This might be changed later after testing
+#define __TBB_compiler_fence()    __dmb(_ARM_BARRIER_SY)
+#define __TBB_full_memory_fence() __dmb(_ARM_BARRIER_SY)
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_full_memory_fence()
+#define __TBB_release_consistency_helper() __TBB_full_memory_fence()
+#endif
+
+//--------------------------------------------------
+// Compare and swap
+//--------------------------------------------------
+
+/**
+ * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ * @return value originally in memory at ptr, regardless of success
+*/
+
+#define __TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(S,T,F)                                               \
+inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) {                   \
+    return _InterlockedCompareExchange##F(reinterpret_cast<volatile T *>(ptr),value,comparand);  \
+}                                                                                                \
+
+#define __TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(S,T,F)                                             \
+inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {                              \
+    return _InterlockedExchangeAdd##F(reinterpret_cast<volatile T *>(ptr),value);                \
+}                                                                                                \
+
+__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(1,char,8)
+__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(2,short,16)
+__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(4,long,)
+__TBB_MACHINE_DEFINE_ATOMICS_CMPSWP(8,__int64,64)
+__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(4,long,)
+#if defined(TBB_WIN32_USE_CL_BUILTINS)
+// No _InterlockedExchangeAdd64 intrinsic on _M_IX86
+#define __TBB_64BIT_ATOMICS 0
+#else
+__TBB_MACHINE_DEFINE_ATOMICS_FETCHADD(8,__int64,64)
+#endif
+
+inline void __TBB_machine_pause (int32_t delay )
+{
+    while(delay>0)
+    {
+        __TBB_compiler_fence();
+        delay--;
+    }
+}
+
+// API to retrieve/update FPU control setting
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+namespace tbb {
+namespace internal {
+
+template <typename T, size_t S>
+struct machine_load_store_relaxed {
+    static inline T load ( const volatile T& location ) {
+        const T value = location;
+
+        /*
+        * An extra memory barrier is required for errata #761319
+        * Please see http://infocenter.arm.com/help/topic/com.arm.doc.uan0004a
+        */
+        __TBB_acquire_consistency_helper();
+        return value;
+    }
+
+    static inline void store ( volatile T& location, T value ) {
+        location = value;
+    }
+};
+
+class cpu_ctl_env {
+private:
+    unsigned int my_ctl;
+public:
+    bool operator!=( const cpu_ctl_env& ctl ) const { return my_ctl != ctl.my_ctl; }
+    void get_env() { my_ctl = _control87(0, 0); }
+    void set_env() const { _control87( my_ctl, ~0U ); }
+};
+
+} // namespace internal
+} // namespaces tbb
+
+// Machine specific atomic operations
+#define __TBB_CompareAndSwap4(P,V,C) __TBB_machine_cmpswp4(P,V,C)
+#define __TBB_CompareAndSwap8(P,V,C) __TBB_machine_cmpswp8(P,V,C)
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+
+// Use generics for some things
+#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE               1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD                   1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_STORE                 1
+#define __TBB_USE_GENERIC_FETCH_STORE                           1
+#define __TBB_USE_GENERIC_DWORD_LOAD_STORE                      1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE     1
+
+#if defined(TBB_WIN32_USE_CL_BUILTINS)
+#if !__TBB_WIN8UI_SUPPORT
+extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
+#define __TBB_Yield()  SwitchToThread()
+#else
+#include<thread>
+#define __TBB_Yield()  std::this_thread::yield()
+#endif
+#else
+#define __TBB_Yield() __yield()
+#endif
+
+// Machine specific atomic operations
+#define __TBB_AtomicOR(P,V)     __TBB_machine_OR(P,V)
+#define __TBB_AtomicAND(P,V)    __TBB_machine_AND(P,V)
+
+template <typename T1,typename T2>
+inline void __TBB_machine_OR( T1 *operand, T2 addend ) {
+    _InterlockedOr((long volatile *)operand, (long)addend);
+}
+
+template <typename T1,typename T2>
+inline void __TBB_machine_AND( T1 *operand, T2 addend ) {
+    _InterlockedAnd((long volatile *)operand, (long)addend);
+}
+
--- a/cs440-acg/ext/tbb/include/tbb/machine/msvc_ia32_common.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/msvc_ia32_common.h
@@ -0,0 +1,275 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_msvc_ia32_common_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_msvc_ia32_common_H
+
+#include <intrin.h>
+
+//TODO: consider moving this macro to tbb_config.h and using where MSVC asm is used
+#if  !_M_X64 || __INTEL_COMPILER
+    #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 1
+#else
+    //MSVC in x64 mode does not accept inline assembler
+    #define __TBB_X86_MSVC_INLINE_ASM_AVAILABLE 0
+    #define __TBB_NO_X86_MSVC_INLINE_ASM_MSG "The compiler being used is not supported (outdated?)"
+#endif
+
+#if _M_X64
+    #define __TBB_r(reg_name) r##reg_name
+    #define __TBB_W(name) name##64
+    namespace tbb { namespace internal { namespace msvc_intrinsics {
+        typedef __int64 word;
+    }}}
+#else
+    #define __TBB_r(reg_name) e##reg_name
+    #define __TBB_W(name) name
+    namespace tbb { namespace internal { namespace msvc_intrinsics {
+        typedef long word;
+    }}}
+#endif
+
+#if __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
+    // S is the operand size in bytes, B is the suffix for intrinsics for that size
+    #define __TBB_MACHINE_DEFINE_ATOMICS(S,B,T,U)                                           \
+    __pragma(intrinsic( _InterlockedCompareExchange##B ))                                   \
+    static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
+        return _InterlockedCompareExchange##B ( (T*)ptr, value, comparand );                \
+    }                                                                                       \
+    __pragma(intrinsic( _InterlockedExchangeAdd##B ))                                       \
+    static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) {           \
+        return _InterlockedExchangeAdd##B ( (T*)ptr, addend );                              \
+    }                                                                                       \
+    __pragma(intrinsic( _InterlockedExchange##B ))                                          \
+    static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) {          \
+        return _InterlockedExchange##B ( (T*)ptr, value );                                  \
+    }
+
+    // Atomic intrinsics for 1, 2, and 4 bytes are available for x86 & x64
+    __TBB_MACHINE_DEFINE_ATOMICS(1,8,char,__int8)
+    __TBB_MACHINE_DEFINE_ATOMICS(2,16,short,__int16)
+    __TBB_MACHINE_DEFINE_ATOMICS(4,,long,__int32)
+
+    #if __TBB_WORDSIZE==8
+    __TBB_MACHINE_DEFINE_ATOMICS(8,64,__int64,__int64)
+    #endif
+
+    #undef __TBB_MACHINE_DEFINE_ATOMICS
+#endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
+
+#if _MSC_VER>=1300 || __INTEL_COMPILER>=1100
+    #pragma intrinsic(_ReadWriteBarrier)
+    #pragma intrinsic(_mm_mfence)
+    #define __TBB_compiler_fence()    _ReadWriteBarrier()
+    #define __TBB_full_memory_fence() _mm_mfence()
+#elif __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
+    #define __TBB_compiler_fence()    __asm { __asm nop }
+    #define __TBB_full_memory_fence() __asm { __asm mfence }
+#else
+    #error Unsupported compiler; define __TBB_{control,acquire,release}_consistency_helper to support it
+#endif
+
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+#define __TBB_release_consistency_helper() __TBB_compiler_fence()
+
+#if (_MSC_VER>=1300) || (__INTEL_COMPILER)
+    #pragma intrinsic(_mm_pause)
+    namespace tbb { namespace internal { namespace msvc_intrinsics {
+        static inline void pause (uintptr_t delay ) {
+            for (;delay>0; --delay )
+                _mm_pause();
+        }
+    }}}
+    #define __TBB_Pause(V) tbb::internal::msvc_intrinsics::pause(V)
+    #define __TBB_SINGLE_PAUSE _mm_pause()
+#else
+    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
+        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
+    #endif
+    namespace tbb { namespace internal { namespace msvc_inline_asm
+        static inline void pause (uintptr_t delay ) {
+            _asm
+            {
+                mov __TBB_r(ax), delay
+              __TBB_L1:
+                pause
+                add __TBB_r(ax), -1
+                jne __TBB_L1
+            }
+            return;
+        }
+    }}}
+    #define __TBB_Pause(V) tbb::internal::msvc_inline_asm::pause(V)
+    #define __TBB_SINGLE_PAUSE __asm pause
+#endif
+
+#if (_MSC_VER>=1400 && !__INTEL_COMPILER) || (__INTEL_COMPILER>=1200)
+// MSVC did not have this intrinsic prior to VC8.
+// ICL 11.1 fails to compile a TBB example if __TBB_Log2 uses the intrinsic.
+    #pragma intrinsic(__TBB_W(_BitScanReverse))
+    namespace tbb { namespace internal { namespace msvc_intrinsics {
+        static inline uintptr_t lg_bsr( uintptr_t i ){
+            unsigned long j;
+            __TBB_W(_BitScanReverse)( &j, i );
+            return j;
+        }
+    }}}
+    #define __TBB_Log2(V) tbb::internal::msvc_intrinsics::lg_bsr(V)
+#else
+    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
+        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
+    #endif
+    namespace tbb { namespace internal { namespace msvc_inline_asm {
+        static inline uintptr_t lg_bsr( uintptr_t i ){
+            uintptr_t j;
+            __asm
+            {
+                bsr __TBB_r(ax), i
+                mov j, __TBB_r(ax)
+            }
+            return j;
+        }
+    }}}
+    #define __TBB_Log2(V) tbb::internal::msvc_inline_asm::lg_bsr(V)
+#endif
+
+#if _MSC_VER>=1400
+    #pragma intrinsic(__TBB_W(_InterlockedOr))
+    #pragma intrinsic(__TBB_W(_InterlockedAnd))
+    namespace tbb { namespace internal { namespace msvc_intrinsics {
+        static inline void lock_or( volatile void *operand, intptr_t addend ){
+            __TBB_W(_InterlockedOr)((volatile word*)operand, addend);
+        }
+        static inline void lock_and( volatile void *operand, intptr_t addend ){
+            __TBB_W(_InterlockedAnd)((volatile word*)operand, addend);
+        }
+    }}}
+    #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_intrinsics::lock_or(P,V)
+    #define __TBB_AtomicAND(P,V) tbb::internal::msvc_intrinsics::lock_and(P,V)
+#else
+    #if !__TBB_X86_MSVC_INLINE_ASM_AVAILABLE
+        #error __TBB_NO_X86_MSVC_INLINE_ASM_MSG
+    #endif
+    namespace tbb { namespace internal { namespace msvc_inline_asm {
+        static inline void lock_or( volatile void *operand, __int32 addend ) {
+            __asm
+            {
+                mov eax, addend
+                mov edx, [operand]
+                lock or [edx], eax
+            }
+         }
+         static inline void lock_and( volatile void *operand, __int32 addend ) {
+            __asm
+            {
+                mov eax, addend
+                mov edx, [operand]
+                lock and [edx], eax
+            }
+         }
+    }}}
+    #define __TBB_AtomicOR(P,V)  tbb::internal::msvc_inline_asm::lock_or(P,V)
+    #define __TBB_AtomicAND(P,V) tbb::internal::msvc_inline_asm::lock_and(P,V)
+#endif
+
+#pragma intrinsic(__rdtsc)
+namespace tbb { namespace internal { typedef uint64_t machine_tsc_t; } }
+static inline tbb::internal::machine_tsc_t __TBB_machine_time_stamp() {
+    return __rdtsc();
+}
+#define __TBB_time_stamp() __TBB_machine_time_stamp()
+
+// API to retrieve/update FPU control setting
+#define __TBB_CPU_CTL_ENV_PRESENT 1
+
+namespace tbb { namespace internal { class cpu_ctl_env; } }
+#if __TBB_X86_MSVC_INLINE_ASM_AVAILABLE
+    inline void __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* ctl ) {
+        __asm {
+            __asm mov     __TBB_r(ax), ctl
+            __asm stmxcsr [__TBB_r(ax)]
+            __asm fstcw   [__TBB_r(ax)+4]
+        }
+    }
+    inline void __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* ctl ) {
+        __asm {
+            __asm mov     __TBB_r(ax), ctl
+            __asm ldmxcsr [__TBB_r(ax)]
+            __asm fldcw   [__TBB_r(ax)+4]
+        }
+    }
+#else
+    extern "C" {
+        void __TBB_EXPORTED_FUNC __TBB_get_cpu_ctl_env ( tbb::internal::cpu_ctl_env* );
+        void __TBB_EXPORTED_FUNC __TBB_set_cpu_ctl_env ( const tbb::internal::cpu_ctl_env* );
+    }
+#endif
+
+namespace tbb {
+namespace internal {
+class cpu_ctl_env {
+private:
+    int         mxcsr;
+    short       x87cw;
+    static const int MXCSR_CONTROL_MASK = ~0x3f; /* all except last six status bits */
+public:
+    bool operator!=( const cpu_ctl_env& ctl ) const { return mxcsr != ctl.mxcsr || x87cw != ctl.x87cw; }
+    void get_env() {
+        __TBB_get_cpu_ctl_env( this );
+        mxcsr &= MXCSR_CONTROL_MASK;
+    }
+    void set_env() const { __TBB_set_cpu_ctl_env( this ); }
+};
+} // namespace internal
+} // namespace tbb
+
+#if !__TBB_WIN8UI_SUPPORT
+extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
+#define __TBB_Yield()  SwitchToThread()
+#else
+#include<thread>
+#define __TBB_Yield()  std::this_thread::yield()
+#endif
+
+#undef __TBB_r
+#undef __TBB_W
+#undef __TBB_word
+
+extern "C" {
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_try_lock_elided (volatile void* ptr);
+    void   __TBB_EXPORTED_FUNC __TBB_machine_unlock_elided (volatile void* ptr);
+
+    // 'pause' instruction aborts HLE/RTM transactions
+    inline static void __TBB_machine_try_lock_elided_cancel() { __TBB_SINGLE_PAUSE; }
+
+#if __TBB_TSX_INTRINSICS_PRESENT
+    #define __TBB_machine_is_in_transaction _xtest
+    #define __TBB_machine_begin_transaction _xbegin
+    #define __TBB_machine_end_transaction   _xend
+    // The value (0xFF) below comes from the
+    // Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
+    #define __TBB_machine_transaction_conflict_abort() _xabort(0xFF)
+#else
+    __int8           __TBB_EXPORTED_FUNC __TBB_machine_is_in_transaction();
+    unsigned __int32 __TBB_EXPORTED_FUNC __TBB_machine_begin_transaction();
+    void             __TBB_EXPORTED_FUNC __TBB_machine_end_transaction();
+    void             __TBB_EXPORTED_FUNC __TBB_machine_transaction_conflict_abort();
+#endif /* __TBB_TSX_INTRINSICS_PRESENT */
+}
--- a/cs440-acg/ext/tbb/include/tbb/machine/sunos_sparc.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/sunos_sparc.h
@@ -0,0 +1,199 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_sunos_sparc_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_sunos_sparc_H
+
+#include <stdint.h>
+#include <unistd.h>
+
+#define __TBB_WORDSIZE 8
+// Big endian is assumed for SPARC.
+// While hardware may support page-specific bi-endianness, only big endian pages may be exposed to TBB
+#define __TBB_ENDIANNESS __TBB_ENDIAN_BIG
+
+/** To those working on SPARC hardware. Consider relaxing acquire and release
+    consistency helpers to no-op (as this port covers TSO mode only). **/
+#define __TBB_compiler_fence()             __asm__ __volatile__ ("": : :"memory")
+#define __TBB_control_consistency_helper() __TBB_compiler_fence()
+#define __TBB_acquire_consistency_helper() __TBB_compiler_fence()
+#define __TBB_release_consistency_helper() __TBB_compiler_fence()
+#define __TBB_full_memory_fence()          __asm__ __volatile__("membar #LoadLoad|#LoadStore|#StoreStore|#StoreLoad": : : "memory")
+
+//--------------------------------------------------
+// Compare and swap
+//--------------------------------------------------
+
+/**
+ * Atomic CAS for 32 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ ( @return value originally in memory at ptr, regardless of success
+*/
+static inline int32_t __TBB_machine_cmpswp4(volatile void *ptr, int32_t value, int32_t comparand ){
+  int32_t result;
+  __asm__ __volatile__(
+                       "cas\t[%5],%4,%1"
+                       : "=m"(*(int32_t *)ptr), "=r"(result)
+                       : "m"(*(int32_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
+                       : "memory");
+  return result;
+}
+
+/**
+ * Atomic CAS for 64 bit values, if *ptr==comparand, then *ptr=value, returns *ptr
+ * @param ptr pointer to value in memory to be swapped with value if *ptr==comparand
+ * @param value value to assign *ptr to if *ptr==comparand
+ * @param comparand value to compare with *ptr
+ ( @return value originally in memory at ptr, regardless of success
+ */
+static inline int64_t __TBB_machine_cmpswp8(volatile void *ptr, int64_t value, int64_t comparand ){
+  int64_t result;
+  __asm__ __volatile__(
+                       "casx\t[%5],%4,%1"
+               : "=m"(*(int64_t *)ptr), "=r"(result)
+               : "m"(*(int64_t *)ptr), "1"(value), "r"(comparand), "r"(ptr)
+               : "memory");
+  return result;
+}
+
+//---------------------------------------------------
+// Fetch and add
+//---------------------------------------------------
+
+/**
+ * Atomic fetch and add for 32 bit values, in this case implemented by continuously checking success of atomicity
+ * @param ptr pointer to value to add addend to
+ * @param addened value to add to *ptr
+ * @return value at ptr before addened was added
+ */
+static inline int32_t __TBB_machine_fetchadd4(volatile void *ptr, int32_t addend){
+  int32_t result;
+  __asm__ __volatile__ (
+                        "0:\t add\t %3, %4, %0\n"           // do addition
+                        "\t cas\t [%2], %3, %0\n"           // cas to store result in memory
+                        "\t cmp\t %3, %0\n"                 // check if value from memory is original
+                        "\t bne,a,pn\t %%icc, 0b\n"         // if not try again
+                        "\t mov %0, %3\n"                   // use branch delay slot to move new value in memory to be added
+               : "=&r"(result), "=m"(*(int32_t *)ptr)
+               : "r"(ptr), "r"(*(int32_t *)ptr), "r"(addend), "m"(*(int32_t *)ptr)
+               : "ccr", "memory");
+  return result;
+}
+
+/**
+ * Atomic fetch and add for 64 bit values, in this case implemented by continuously checking success of atomicity
+ * @param ptr pointer to value to add addend to
+ * @param addened value to add to *ptr
+ * @return value at ptr before addened was added
+ */
+static inline int64_t __TBB_machine_fetchadd8(volatile void *ptr, int64_t addend){
+  int64_t result;
+  __asm__ __volatile__ (
+                        "0:\t add\t %3, %4, %0\n"           // do addition
+                        "\t casx\t [%2], %3, %0\n"          // cas to store result in memory
+                        "\t cmp\t %3, %0\n"                 // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
+                        "\t mov %0, %3\n"                   // use branch delay slot to move new value in memory to be added
+                : "=&r"(result), "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(addend), "m"(*(int64_t *)ptr)
+                : "ccr", "memory");
+  return result;
+}
+
+//--------------------------------------------------------
+// Logarithm (base two, integer)
+//--------------------------------------------------------
+
+static inline int64_t __TBB_machine_lg( uint64_t x ) {
+    __TBB_ASSERT(x, "__TBB_Log2(0) undefined");
+    uint64_t count;
+    // one hot encode
+    x |= (x >> 1);
+    x |= (x >> 2);
+    x |= (x >> 4);
+    x |= (x >> 8);
+    x |= (x >> 16);
+    x |= (x >> 32);
+    // count 1's
+    __asm__ ("popc %1, %0" : "=r"(count) : "r"(x) );
+    return count-1;
+}
+
+//--------------------------------------------------------
+
+static inline void __TBB_machine_or( volatile void *ptr, uint64_t value ) {
+  __asm__ __volatile__ (
+                        "0:\t or\t %2, %3, %%g1\n"          // do operation
+                        "\t casx\t [%1], %2, %%g1\n"        // cas to store result in memory
+                        "\t cmp\t %2, %%g1\n"               // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
+                        "\t mov %%g1, %2\n"                 // use branch delay slot to move new value in memory to be added
+                : "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
+                : "ccr", "g1", "memory");
+}
+
+static inline void __TBB_machine_and( volatile void *ptr, uint64_t value ) {
+  __asm__ __volatile__ (
+                        "0:\t and\t %2, %3, %%g1\n"         // do operation
+                        "\t casx\t [%1], %2, %%g1\n"        // cas to store result in memory
+                        "\t cmp\t %2, %%g1\n"               // check if value from memory is original
+                        "\t bne,a,pn\t %%xcc, 0b\n"         // if not try again
+                        "\t mov %%g1, %2\n"                 // use branch delay slot to move new value in memory to be added
+                : "=m"(*(int64_t *)ptr)
+                : "r"(ptr), "r"(*(int64_t *)ptr), "r"(value), "m"(*(int64_t *)ptr)
+                : "ccr", "g1", "memory");
+}
+
+
+static inline void __TBB_machine_pause( int32_t delay ) {
+    // do nothing, inlined, doesn't matter
+}
+
+// put 0xff in memory location, return memory value,
+//  generic trylockbyte puts 0x01, however this is fine
+//  because all that matters is that 0 is unlocked
+static inline bool __TBB_machine_trylockbyte(unsigned char &flag){
+    unsigned char result;
+    __asm__ __volatile__ (
+            "ldstub\t [%2], %0\n"
+        : "=r"(result), "=m"(flag)
+        : "r"(&flag), "m"(flag)
+        : "memory");
+    return result == 0;
+}
+
+#define __TBB_USE_GENERIC_PART_WORD_CAS                     1
+#define __TBB_USE_GENERIC_PART_WORD_FETCH_ADD               1
+#define __TBB_USE_GENERIC_FETCH_STORE                       1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+#define __TBB_AtomicOR(P,V) __TBB_machine_or(P,V)
+#define __TBB_AtomicAND(P,V) __TBB_machine_and(P,V)
+
+// Definition of other functions
+#define __TBB_Pause(V) __TBB_machine_pause(V)
+#define __TBB_Log2(V)  __TBB_machine_lg(V)
+
+#define __TBB_TryLockByte(P) __TBB_machine_trylockbyte(P)
--- a/cs440-acg/ext/tbb/include/tbb/machine/windows_api.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/windows_api.h
@@ -0,0 +1,65 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_machine_windows_api_H
+#define __TBB_machine_windows_api_H
+
+#if _WIN32 || _WIN64
+
+#include <windows.h>
+
+#if _WIN32_WINNT < 0x0600
+// The following Windows API function is declared explicitly;
+// otherwise it fails to compile by VS2005.
+#if !defined(WINBASEAPI) || (_WIN32_WINNT < 0x0501 && _MSC_VER == 1400)
+#define __TBB_WINBASEAPI extern "C"
+#else
+#define __TBB_WINBASEAPI WINBASEAPI
+#endif
+__TBB_WINBASEAPI BOOL WINAPI TryEnterCriticalSection( LPCRITICAL_SECTION );
+__TBB_WINBASEAPI BOOL WINAPI InitializeCriticalSectionAndSpinCount( LPCRITICAL_SECTION, DWORD );
+// Overloading WINBASEAPI macro and using local functions missing in Windows XP/2003
+#define InitializeCriticalSectionEx inlineInitializeCriticalSectionEx
+#define CreateSemaphoreEx inlineCreateSemaphoreEx
+#define CreateEventEx inlineCreateEventEx
+inline BOOL WINAPI inlineInitializeCriticalSectionEx( LPCRITICAL_SECTION lpCriticalSection, DWORD dwSpinCount, DWORD )
+{
+    return InitializeCriticalSectionAndSpinCount( lpCriticalSection, dwSpinCount );
+}
+inline HANDLE WINAPI inlineCreateSemaphoreEx( LPSECURITY_ATTRIBUTES lpSemaphoreAttributes, LONG lInitialCount, LONG lMaximumCount, LPCTSTR lpName, DWORD, DWORD )
+{
+    return CreateSemaphore( lpSemaphoreAttributes, lInitialCount, lMaximumCount, lpName );
+}
+inline HANDLE WINAPI inlineCreateEventEx( LPSECURITY_ATTRIBUTES lpEventAttributes, LPCTSTR lpName, DWORD dwFlags, DWORD )
+{
+    BOOL manual_reset = dwFlags&0x00000001 ? TRUE : FALSE; // CREATE_EVENT_MANUAL_RESET
+    BOOL initial_set  = dwFlags&0x00000002 ? TRUE : FALSE; // CREATE_EVENT_INITIAL_SET
+    return CreateEvent( lpEventAttributes, manual_reset, initial_set, lpName );
+}
+#endif
+
+#if defined(RTL_SRWLOCK_INIT)
+#ifndef __TBB_USE_SRWLOCK
+// TODO: turn it on when bug 1952 will be fixed
+#define __TBB_USE_SRWLOCK 0
+#endif
+#endif
+
+#else
+#error tbb/machine/windows_api.h should only be used for Windows based platforms
+#endif // _WIN32 || _WIN64
+
+#endif // __TBB_machine_windows_api_H
--- a/cs440-acg/ext/tbb/include/tbb/machine/windows_ia32.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/windows_ia32.h
@@ -0,0 +1,105 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_ia32_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_windows_ia32_H
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    // Workaround for overzealous compiler warnings in /Wp64 mode
+    #pragma warning (push)
+    #pragma warning (disable: 4244 4267)
+#endif
+
+#include "msvc_ia32_common.h"
+
+#define __TBB_WORDSIZE 4
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+extern "C" {
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value );
+    void __TBB_EXPORTED_FUNC __TBB_machine_store8 (volatile void *ptr, __int64 value );
+    __int64 __TBB_EXPORTED_FUNC __TBB_machine_load8 (const volatile void *ptr);
+}
+
+#if !__TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
+
+#define __TBB_MACHINE_DEFINE_ATOMICS(S,T,U,A,C) \
+static inline T __TBB_machine_cmpswp##S ( volatile void * ptr, U value, U comparand ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __asm \
+    { \
+       __asm mov edx, p \
+       __asm mov C , value \
+       __asm mov A , comparand \
+       __asm lock cmpxchg [edx], C \
+       __asm mov result, A \
+    } \
+    return result; \
+} \
+\
+static inline T __TBB_machine_fetchadd##S ( volatile void * ptr, U addend ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __asm \
+    { \
+        __asm mov edx, p \
+        __asm mov A, addend \
+        __asm lock xadd [edx], A \
+        __asm mov result, A \
+    } \
+    return result; \
+}\
+\
+static inline T __TBB_machine_fetchstore##S ( volatile void * ptr, U value ) { \
+    T result; \
+    volatile T *p = (T *)ptr; \
+    __asm \
+    { \
+        __asm mov edx, p \
+        __asm mov A, value \
+        __asm lock xchg [edx], A \
+        __asm mov result, A \
+    } \
+    return result; \
+}
+
+
+__TBB_MACHINE_DEFINE_ATOMICS(1, __int8, __int8, al, cl)
+__TBB_MACHINE_DEFINE_ATOMICS(2, __int16, __int16, ax, cx)
+__TBB_MACHINE_DEFINE_ATOMICS(4, ptrdiff_t, ptrdiff_t, eax, ecx)
+
+#undef __TBB_MACHINE_DEFINE_ATOMICS
+
+#endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
+
+//TODO: Check if it possible and profitable for IA-32 architecture on (Linux and Windows)
+//to use of 64-bit load/store via floating point registers together with full fence
+//for sequentially consistent load/store, instead of CAS.
+#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
+
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warnings 4244, 4267 are back
--- a/cs440-acg/ext/tbb/include/tbb/machine/windows_intel64.h
+++ b/cs440-acg/ext/tbb/include/tbb/machine/windows_intel64.h
@@ -0,0 +1,70 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#if !defined(__TBB_machine_H) || defined(__TBB_machine_windows_intel64_H)
+#error Do not #include this internal file directly; use public TBB headers instead.
+#endif
+
+#define __TBB_machine_windows_intel64_H
+
+#define __TBB_WORDSIZE 8
+#define __TBB_ENDIANNESS __TBB_ENDIAN_LITTLE
+
+#include "msvc_ia32_common.h"
+
+#if !__TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT
+
+#include <intrin.h>
+#pragma intrinsic(_InterlockedCompareExchange,_InterlockedExchangeAdd,_InterlockedExchange)
+#pragma intrinsic(_InterlockedCompareExchange64,_InterlockedExchangeAdd64,_InterlockedExchange64)
+
+// ATTENTION: if you ever change argument types in machine-specific primitives,
+// please take care of atomic_word<> specializations in tbb/atomic.h
+extern "C" {
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp1 (volatile void *ptr, __int8 value, __int8 comparand );
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd1 (volatile void *ptr, __int8 addend );
+    __int8 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore1 (volatile void *ptr, __int8 value );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_cmpswp2 (volatile void *ptr, __int16 value, __int16 comparand );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchadd2 (volatile void *ptr, __int16 addend );
+    __int16 __TBB_EXPORTED_FUNC __TBB_machine_fetchstore2 (volatile void *ptr, __int16 value );
+}
+
+inline long __TBB_machine_cmpswp4 (volatile void *ptr, __int32 value, __int32 comparand ) {
+    return _InterlockedCompareExchange( (long*)ptr, value, comparand );
+}
+inline long __TBB_machine_fetchadd4 (volatile void *ptr, __int32 addend ) {
+    return _InterlockedExchangeAdd( (long*)ptr, addend );
+}
+inline long __TBB_machine_fetchstore4 (volatile void *ptr, __int32 value ) {
+    return _InterlockedExchange( (long*)ptr, value );
+}
+
+inline __int64 __TBB_machine_cmpswp8 (volatile void *ptr, __int64 value, __int64 comparand ) {
+    return _InterlockedCompareExchange64( (__int64*)ptr, value, comparand );
+}
+inline __int64 __TBB_machine_fetchadd8 (volatile void *ptr, __int64 addend ) {
+    return _InterlockedExchangeAdd64( (__int64*)ptr, addend );
+}
+inline __int64 __TBB_machine_fetchstore8 (volatile void *ptr, __int64 value ) {
+    return _InterlockedExchange64( (__int64*)ptr, value );
+}
+
+#endif /* __TBB_MSVC_PART_WORD_INTERLOCKED_INTRINSICS_PRESENT */
+
+#define __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE           1
+#define __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE            1
+#define __TBB_USE_GENERIC_RELAXED_LOAD_STORE                1
+#define __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE 1
--- a/cs440-acg/ext/tbb/include/tbb/memory_pool.h
+++ b/cs440-acg/ext/tbb/include/tbb/memory_pool.h
@@ -0,0 +1,275 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_memory_pool_H
+#define __TBB_memory_pool_H
+
+#if !TBB_PREVIEW_MEMORY_POOL
+#error Set TBB_PREVIEW_MEMORY_POOL to include memory_pool.h
+#endif
+/** @file */
+
+#include "scalable_allocator.h"
+#include <new> // std::bad_alloc
+#include <stdexcept> // std::runtime_error, std::invalid_argument
+// required in C++03 to construct std::runtime_error and std::invalid_argument
+#include <string>
+#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+#include <utility> // std::forward
+#endif
+
+#if __TBB_EXTRA_DEBUG
+#define __TBBMALLOC_ASSERT ASSERT
+#else
+#define __TBBMALLOC_ASSERT(a,b) ((void)0)
+#endif
+
+namespace tbb {
+namespace interface6 {
+//! @cond INTERNAL
+namespace internal {
+
+//! Base of thread-safe pool allocator for variable-size requests
+class pool_base : tbb::internal::no_copy {
+    // Pool interface is separate from standard allocator classes because it has
+    // to maintain internal state, no copy or assignment. Move and swap are possible.
+public:
+    //! Reset pool to reuse its memory (free all objects at once)
+    void recycle() { rml::pool_reset(my_pool); }
+
+    //! The "malloc" analogue to allocate block of memory of size bytes
+    void *malloc(size_t size) { return rml::pool_malloc(my_pool, size); }
+
+    //! The "free" analogue to discard a previously allocated piece of memory.
+    void free(void* ptr) { rml::pool_free(my_pool, ptr); }
+
+    //! The "realloc" analogue complementing pool_malloc.
+    // Enables some low-level optimization possibilities
+    void *realloc(void* ptr, size_t size) {
+        return rml::pool_realloc(my_pool, ptr, size);
+    }
+
+protected:
+    //! destroy pool - must be called in a child class
+    void destroy() { rml::pool_destroy(my_pool); }
+
+    rml::MemoryPool *my_pool;
+};
+
+} // namespace internal
+//! @endcond
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    // Workaround for erroneous "unreferenced parameter" warning in method destroy.
+    #pragma warning (push)
+    #pragma warning (disable: 4100)
+#endif
+
+//! Meets "allocator" requirements of ISO C++ Standard, Section 20.1.5
+/** @ingroup memory_allocation */
+template<typename T, typename P = internal::pool_base>
+class memory_pool_allocator {
+protected:
+    typedef P pool_type;
+    pool_type *my_pool;
+    template<typename U, typename R>
+    friend class memory_pool_allocator;
+    template<typename V, typename U, typename R>
+    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
+    template<typename V, typename U, typename R>
+    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
+public:
+    typedef typename tbb::internal::allocator_type<T>::value_type value_type;
+    typedef value_type* pointer;
+    typedef const value_type* const_pointer;
+    typedef value_type& reference;
+    typedef const value_type& const_reference;
+    typedef size_t size_type;
+    typedef ptrdiff_t difference_type;
+    template<typename U> struct rebind {
+        typedef memory_pool_allocator<U, P> other;
+    };
+
+    explicit memory_pool_allocator(pool_type &pool) throw() : my_pool(&pool) {}
+    memory_pool_allocator(const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
+    template<typename U>
+    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
+
+    pointer address(reference x) const { return &x; }
+    const_pointer address(const_reference x) const { return &x; }
+
+    //! Allocate space for n objects.
+    pointer allocate( size_type n, const void* /*hint*/ = 0) {
+        pointer p = static_cast<pointer>( my_pool->malloc( n*sizeof(value_type) ) );
+        if (!p)
+            tbb::internal::throw_exception(std::bad_alloc());
+        return p;
+    }
+    //! Free previously allocated block of memory.
+    void deallocate( pointer p, size_type ) {
+        my_pool->free(p);
+    }
+    //! Largest value for which method allocate might succeed.
+    size_type max_size() const throw() {
+        size_type max = static_cast<size_type>(-1) / sizeof (value_type);
+        return (max > 0 ? max : 1);
+    }
+    //! Copy-construct value at location pointed to by p.
+#if __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+    template<typename U, typename... Args>
+    void construct(U *p, Args&&... args)
+        { ::new((void *)p) U(std::forward<Args>(args)...); }
+#else // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    void construct( pointer p, value_type&& value ) {::new((void*)(p)) value_type(std::move(value));}
+#endif
+    void construct( pointer p, const value_type& value ) { ::new((void*)(p)) value_type(value); }
+#endif // __TBB_ALLOCATOR_CONSTRUCT_VARIADIC
+
+    //! Destroy value at location pointed to by p.
+    void destroy( pointer p ) { p->~value_type(); }
+
+};
+
+#if _MSC_VER && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4100 is back
+
+//! Analogous to std::allocator<void>, as defined in ISO C++ Standard, Section 20.4.1
+/** @ingroup memory_allocation */
+template<typename P>
+class memory_pool_allocator<void, P> {
+public:
+    typedef P pool_type;
+    typedef void* pointer;
+    typedef const void* const_pointer;
+    typedef void value_type;
+    template<typename U> struct rebind {
+        typedef memory_pool_allocator<U, P> other;
+    };
+
+    explicit memory_pool_allocator( pool_type &pool) throw() : my_pool(&pool) {}
+    memory_pool_allocator( const memory_pool_allocator& src) throw() : my_pool(src.my_pool) {}
+    template<typename U>
+    memory_pool_allocator(const memory_pool_allocator<U,P>& src) throw() : my_pool(src.my_pool) {}
+
+protected:
+    pool_type *my_pool;
+    template<typename U, typename R>
+    friend class memory_pool_allocator;
+    template<typename V, typename U, typename R>
+    friend bool operator==( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
+    template<typename V, typename U, typename R>
+    friend bool operator!=( const memory_pool_allocator<V,R>& a, const memory_pool_allocator<U,R>& b);
+};
+
+template<typename T, typename U, typename P>
+inline bool operator==( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool==b.my_pool;}
+
+template<typename T, typename U, typename P>
+inline bool operator!=( const memory_pool_allocator<T,P>& a, const memory_pool_allocator<U,P>& b) {return a.my_pool!=b.my_pool;}
+
+
+//! Thread-safe growable pool allocator for variable-size requests
+template <typename Alloc>
+class memory_pool : public internal::pool_base {
+    Alloc my_alloc; // TODO: base-class optimization
+    static void *allocate_request(intptr_t pool_id, size_t & bytes);
+    static int deallocate_request(intptr_t pool_id, void*, size_t raw_bytes);
+
+public:
+    //! construct pool with underlying allocator
+    explicit memory_pool(const Alloc &src = Alloc());
+
+    //! destroy pool
+    ~memory_pool() { destroy(); } // call the callbacks first and destroy my_alloc latter
+
+};
+
+class fixed_pool : public internal::pool_base {
+    void *my_buffer;
+    size_t my_size;
+    inline static void *allocate_request(intptr_t pool_id, size_t & bytes);
+
+public:
+    //! construct pool with underlying allocator
+    inline fixed_pool(void *buf, size_t size);
+    //! destroy pool
+    ~fixed_pool() { destroy(); }
+};
+
+//////////////// Implementation ///////////////
+
+template <typename Alloc>
+memory_pool<Alloc>::memory_pool(const Alloc &src) : my_alloc(src) {
+    rml::MemPoolPolicy args(allocate_request, deallocate_request,
+                            sizeof(typename Alloc::value_type));
+    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
+    if (res!=rml::POOL_OK)
+        tbb::internal::throw_exception(std::runtime_error("Can't create pool"));
+}
+template <typename Alloc>
+void *memory_pool<Alloc>::allocate_request(intptr_t pool_id, size_t & bytes) {
+    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
+    const size_t unit_size = sizeof(typename Alloc::value_type);
+    __TBBMALLOC_ASSERT( 0 == bytes%unit_size, NULL);
+    void *ptr;
+    __TBB_TRY { ptr = self.my_alloc.allocate( bytes/unit_size ); }
+    __TBB_CATCH(...) { return 0; }
+    return ptr;
+}
+#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
+    // Workaround for erroneous "unreachable code" warning in the template below.
+    // Specific for VC++ 17-18 compiler
+    #pragma warning (push)
+    #pragma warning (disable: 4702)
+#endif
+template <typename Alloc>
+int memory_pool<Alloc>::deallocate_request(intptr_t pool_id, void* raw_ptr, size_t raw_bytes) {
+    memory_pool<Alloc> &self = *reinterpret_cast<memory_pool<Alloc>*>(pool_id);
+    const size_t unit_size = sizeof(typename Alloc::value_type);
+    __TBBMALLOC_ASSERT( 0 == raw_bytes%unit_size, NULL);
+    self.my_alloc.deallocate( static_cast<typename Alloc::value_type*>(raw_ptr), raw_bytes/unit_size );
+    return 0;
+}
+#if __TBB_MSVC_UNREACHABLE_CODE_IGNORED
+    #pragma warning (pop)
+#endif
+inline fixed_pool::fixed_pool(void *buf, size_t size) : my_buffer(buf), my_size(size) {
+    if (!buf || !size)
+        // TODO: improve support for mode with exceptions disabled
+        tbb::internal::throw_exception(std::invalid_argument("Zero in parameter is invalid"));
+    rml::MemPoolPolicy args(allocate_request, 0, size, /*fixedPool=*/true);
+    rml::MemPoolError res = rml::pool_create_v1(intptr_t(this), &args, &my_pool);
+    if (res!=rml::POOL_OK)
+        tbb::internal::throw_exception(std::runtime_error("Can't create pool"));
+}
+inline void *fixed_pool::allocate_request(intptr_t pool_id, size_t & bytes) {
+    fixed_pool &self = *reinterpret_cast<fixed_pool*>(pool_id);
+    __TBBMALLOC_ASSERT(0 != self.my_size, "The buffer must not be used twice.");
+    bytes = self.my_size;
+    self.my_size = 0; // remember that buffer has been used
+    return self.my_buffer;
+}
+
+} //namespace interface6
+using interface6::memory_pool_allocator;
+using interface6::memory_pool;
+using interface6::fixed_pool;
+} //namespace tbb
+
+#undef __TBBMALLOC_ASSERT
+#endif// __TBB_memory_pool_H
--- a/cs440-acg/ext/tbb/include/tbb/mutex.h
+++ b/cs440-acg/ext/tbb/include/tbb/mutex.h
@@ -0,0 +1,246 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#include "internal/_deprecated_header_message_guard.h"
+
+#if !defined(__TBB_show_deprecation_message_mutex_H) && defined(__TBB_show_deprecated_header_message)
+#define  __TBB_show_deprecation_message_mutex_H
+#pragma message("TBB Warning: tbb/mutex.h is deprecated. For details, please see Deprecated Features appendix in the TBB reference manual.")
+#endif
+
+#if defined(__TBB_show_deprecated_header_message)
+#undef __TBB_show_deprecated_header_message
+#endif
+
+#ifndef __TBB_mutex_H
+#define __TBB_mutex_H
+
+#define __TBB_mutex_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#if _WIN32||_WIN64
+#include "machine/windows_api.h"
+#else
+#include <pthread.h>
+#endif /* _WIN32||_WIN64 */
+
+#include <new>
+#include "aligned_space.h"
+#include "tbb_stddef.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! Wrapper around the platform's native lock.
+/** @ingroup synchronization */
+class __TBB_DEPRECATED_IN_VERBOSE_MODE_MSG("tbb::critical_section is deprecated, use std::mutex") mutex : internal::mutex_copy_deprecated_and_disabled {
+public:
+    //! Construct unacquired mutex.
+    mutex() {
+#if TBB_USE_ASSERT || TBB_USE_THREADING_TOOLS
+    internal_construct();
+#else
+  #if _WIN32||_WIN64
+        InitializeCriticalSectionEx(&impl, 4000, 0);
+  #else
+        int error_code = pthread_mutex_init(&impl,NULL);
+        if( error_code )
+            tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_init failed");
+  #endif /* _WIN32||_WIN64*/
+#endif /* TBB_USE_ASSERT */
+    };
+
+    ~mutex() {
+#if TBB_USE_ASSERT
+        internal_destroy();
+#else
+  #if _WIN32||_WIN64
+        DeleteCriticalSection(&impl);
+  #else
+        pthread_mutex_destroy(&impl);
+
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    };
+
+    class scoped_lock;
+    friend class scoped_lock;
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock : internal::no_copy {
+    public:
+        //! Construct lock that has not acquired a mutex.
+        scoped_lock() : my_mutex(NULL) {};
+
+        //! Acquire lock on given mutex.
+        scoped_lock( mutex& mutex ) {
+            acquire( mutex );
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( my_mutex )
+                release();
+        }
+
+        //! Acquire lock on given mutex.
+        void acquire( mutex& mutex ) {
+#if TBB_USE_ASSERT
+            internal_acquire(mutex);
+#else
+            mutex.lock();
+            my_mutex = &mutex;
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Try acquire lock on given mutex.
+        bool try_acquire( mutex& mutex ) {
+#if TBB_USE_ASSERT
+            return internal_try_acquire (mutex);
+#else
+            bool result = mutex.try_lock();
+            if( result )
+                my_mutex = &mutex;
+            return result;
+#endif /* TBB_USE_ASSERT */
+        }
+
+        //! Release lock
+        void release() {
+#if TBB_USE_ASSERT
+            internal_release ();
+#else
+            my_mutex->unlock();
+            my_mutex = NULL;
+#endif /* TBB_USE_ASSERT */
+        }
+
+    private:
+        //! The pointer to the current mutex to work
+        mutex* my_mutex;
+
+        //! All checks from acquire using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_acquire( mutex& m );
+
+        //! All checks from try_acquire using mutex.state were moved here
+        bool __TBB_EXPORTED_METHOD internal_try_acquire( mutex& m );
+
+        //! All checks from release using mutex.state were moved here
+        void __TBB_EXPORTED_METHOD internal_release();
+
+        friend class mutex;
+    };
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = false;
+
+    // ISO C++0x compatibility methods
+
+    //! Acquire lock
+    void lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock> tmp;
+        new(tmp.begin()) scoped_lock(*this);
+#else
+  #if _WIN32||_WIN64
+        EnterCriticalSection(&impl);
+  #else
+        int error_code = pthread_mutex_lock(&impl);
+        if( error_code )
+            tbb::internal::handle_perror(error_code,"mutex: pthread_mutex_lock failed");
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Try acquiring lock (non-blocking)
+    /** Return true if lock acquired; false otherwise. */
+    bool try_lock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = NULL;
+        return s.internal_try_acquire(*this);
+#else
+  #if _WIN32||_WIN64
+        return TryEnterCriticalSection(&impl)!=0;
+  #else
+        return pthread_mutex_trylock(&impl)==0;
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Release lock
+    void unlock() {
+#if TBB_USE_ASSERT
+        aligned_space<scoped_lock> tmp;
+        scoped_lock& s = *tmp.begin();
+        s.my_mutex = this;
+        s.internal_release();
+#else
+  #if _WIN32||_WIN64
+        LeaveCriticalSection(&impl);
+  #else
+        pthread_mutex_unlock(&impl);
+  #endif /* _WIN32||_WIN64 */
+#endif /* TBB_USE_ASSERT */
+    }
+
+    //! Return native_handle
+  #if _WIN32||_WIN64
+    typedef LPCRITICAL_SECTION native_handle_type;
+  #else
+    typedef pthread_mutex_t* native_handle_type;
+  #endif
+    native_handle_type native_handle() { return (native_handle_type) &impl; }
+
+    enum state_t {
+        INITIALIZED=0x1234,
+        DESTROYED=0x789A,
+        HELD=0x56CD
+    };
+private:
+#if _WIN32||_WIN64
+    CRITICAL_SECTION impl;
+    enum state_t state;
+#else
+    pthread_mutex_t impl;
+#endif /* _WIN32||_WIN64 */
+
+    //! All checks from mutex constructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    //! All checks from mutex destructor using mutex.state were moved here
+    void __TBB_EXPORTED_METHOD internal_destroy();
+
+#if _WIN32||_WIN64
+public:
+    //!  Set the internal state
+    void set_state( state_t to ) { state = to; }
+#endif
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(mutex)
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_mutex_H_include_area
+
+#endif /* __TBB_mutex_H */
--- a/cs440-acg/ext/tbb/include/tbb/null_mutex.h
+++ b/cs440-acg/ext/tbb/include/tbb/null_mutex.h
@@ -0,0 +1,50 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_null_mutex_H
+#define __TBB_null_mutex_H
+
+#include "tbb_stddef.h"
+
+namespace tbb {
+
+//! A mutex which does nothing
+/** A null_mutex does no operation and simulates success.
+    @ingroup synchronization */
+class null_mutex : internal::mutex_copy_deprecated_and_disabled {
+public:
+    //! Represents acquisition of a mutex.
+    class scoped_lock : internal::no_copy {
+    public:
+        scoped_lock() {}
+        scoped_lock( null_mutex& ) {}
+        ~scoped_lock() {}
+        void acquire( null_mutex& ) {}
+        bool try_acquire( null_mutex& ) { return true; }
+        void release() {}
+    };
+
+    null_mutex() {}
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = true;
+    static const bool is_fair_mutex = true;
+};
+
+}
+
+#endif /* __TBB_null_mutex_H */
--- a/cs440-acg/ext/tbb/include/tbb/null_rw_mutex.h
+++ b/cs440-acg/ext/tbb/include/tbb/null_rw_mutex.h
@@ -0,0 +1,52 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_null_rw_mutex_H
+#define __TBB_null_rw_mutex_H
+
+#include "tbb_stddef.h"
+
+namespace tbb {
+
+//! A rw mutex which does nothing
+/** A null_rw_mutex is a rw mutex that does nothing and simulates successful operation.
+    @ingroup synchronization */
+class null_rw_mutex : internal::mutex_copy_deprecated_and_disabled {
+public:
+    //! Represents acquisition of a mutex.
+    class scoped_lock : internal::no_copy {
+    public:
+        scoped_lock() {}
+        scoped_lock( null_rw_mutex& , bool = true ) {}
+        ~scoped_lock() {}
+        void acquire( null_rw_mutex& , bool = true ) {}
+        bool upgrade_to_writer() { return true; }
+        bool downgrade_to_reader() { return true; }
+        bool try_acquire( null_rw_mutex& , bool = true ) { return true; }
+        void release() {}
+    };
+
+    null_rw_mutex() {}
+
+    // Mutex traits
+    static const bool is_rw_mutex = true;
+    static const bool is_recursive_mutex = true;
+    static const bool is_fair_mutex = true;
+};
+
+}
+
+#endif /* __TBB_null_rw_mutex_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_do.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_do.h
@@ -0,0 +1,553 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_do_H
+#define __TBB_parallel_do_H
+
+#define __TBB_parallel_do_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "internal/_range_iterator.h"
+#include "internal/_template_helpers.h"
+#include "task.h"
+#include "aligned_space.h"
+#include <iterator>
+
+namespace tbb {
+namespace interface9 {
+//! @cond INTERNAL
+namespace internal {
+    template<typename Body, typename Item> class parallel_do_feeder_impl;
+} // namespace internal
+//! @endcond
+
+//! Class the user supplied algorithm body uses to add new tasks
+/** \param Item Work item type **/
+    template<typename Item>
+    class parallel_do_feeder: ::tbb::internal::no_copy
+    {
+        parallel_do_feeder() {}
+        virtual ~parallel_do_feeder () {}
+        virtual void internal_add_copy( const Item& item ) = 0;
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        virtual void internal_add_move( Item&& item ) = 0;
+#endif
+        template<typename Body_, typename Item_> friend class internal::parallel_do_feeder_impl;
+    public:
+        //! Add a work item to a running parallel_do.
+        void add( const Item& item ) {internal_add_copy(item);}
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        void add( Item&& item ) {internal_add_move(std::move(item));}
+#endif
+    };
+
+//! @cond INTERNAL
+namespace internal {
+    template<typename Body> class do_group_task;
+
+    //! For internal use only.
+    /** Selects one of the two possible forms of function call member operator.
+        @ingroup algorithms **/
+    template<class Body, typename Item>
+    class parallel_do_operator_selector
+    {
+        typedef parallel_do_feeder<Item> Feeder;
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2&, void (Body::*)(CvItem) const ) {
+            obj(tbb::internal::forward<A1>(arg1));
+        }
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2, void (Body::*)(CvItem, parallel_do_feeder<Item>&) const ) {
+            obj(tbb::internal::forward<A1>(arg1), arg2);
+        }
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2&, void (Body::*)(CvItem&) const ) {
+            obj(arg1);
+        }
+        template<typename A1, typename A2, typename CvItem >
+        static void internal_call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2, void (Body::*)(CvItem&, parallel_do_feeder<Item>&) const ) {
+            obj(arg1, arg2);
+        }
+    public:
+        template<typename A1, typename A2>
+        static void call( const Body& obj, __TBB_FORWARDING_REF(A1) arg1, A2& arg2 )
+        {
+            internal_call( obj, tbb::internal::forward<A1>(arg1), arg2, &Body::operator() );
+        }
+    };
+
+    //! For internal use only.
+    /** Executes one iteration of a do.
+        @ingroup algorithms */
+    template<typename Body, typename Item>
+    class do_iteration_task: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        Item my_value;
+        feeder_type& my_feeder;
+
+        do_iteration_task( const Item& value, feeder_type& feeder ) :
+            my_value(value), my_feeder(feeder)
+        {}
+
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        do_iteration_task( Item&& value, feeder_type& feeder ) :
+            my_value(std::move(value)), my_feeder(feeder)
+        {}
+#endif
+
+        task* execute() __TBB_override
+        {
+            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, tbb::internal::move(my_value), my_feeder);
+            return NULL;
+        }
+
+        template<typename Body_, typename Item_> friend class parallel_do_feeder_impl;
+    }; // class do_iteration_task
+
+    template<typename Iterator, typename Body, typename Item>
+    class do_iteration_task_iter: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        Iterator my_iter;
+        feeder_type& my_feeder;
+
+        do_iteration_task_iter( const Iterator& iter, feeder_type& feeder ) :
+            my_iter(iter), my_feeder(feeder)
+        {}
+
+        task* execute() __TBB_override
+        {
+            parallel_do_operator_selector<Body, Item>::call(*my_feeder.my_body, *my_iter, my_feeder);
+            return NULL;
+        }
+
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_group_task_forward;
+        template<typename Body_, typename Item_> friend class do_group_task_input;
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
+    }; // class do_iteration_task_iter
+
+    //! For internal use only.
+    /** Implements new task adding procedure.
+        @ingroup algorithms **/
+    template<class Body, typename Item>
+    class parallel_do_feeder_impl : public parallel_do_feeder<Item>
+    {
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+        //Avoiding use of copy constructor in a virtual method if the type does not support it
+        void internal_add_copy_impl(std::true_type, const Item& item) {
+            typedef do_iteration_task<Body, Item> iteration_type;
+            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
+            task::spawn(t);
+        }
+        void internal_add_copy_impl(std::false_type, const Item&) {
+            __TBB_ASSERT(false, "Overloading for r-value reference doesn't work or it's not movable and not copyable object");
+        }
+        void internal_add_copy( const Item& item ) __TBB_override
+        {
+#if __TBB_CPP11_IS_COPY_CONSTRUCTIBLE_PRESENT
+            internal_add_copy_impl(typename std::is_copy_constructible<Item>::type(), item);
+#else
+            internal_add_copy_impl(std::true_type(), item);
+#endif
+        }
+        void internal_add_move( Item&& item ) __TBB_override
+        {
+            typedef do_iteration_task<Body, Item> iteration_type;
+            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(std::move(item), *this);
+            task::spawn(t);
+        }
+#else /* ! __TBB_CPP11_RVALUE_REF_PRESENT */
+        void internal_add_copy(const Item& item) __TBB_override {
+            typedef do_iteration_task<Body, Item> iteration_type;
+            iteration_type& t = *new (task::allocate_additional_child_of(*my_barrier)) iteration_type(item, *this);
+            task::spawn(t);
+        }
+#endif /* __TBB_CPP11_RVALUE_REF_PRESENT */
+    public:
+        const Body* my_body;
+        empty_task* my_barrier;
+
+        parallel_do_feeder_impl()
+        {
+            my_barrier = new( task::allocate_root() ) empty_task();
+            __TBB_ASSERT(my_barrier, "root task allocation failed");
+        }
+
+#if __TBB_TASK_GROUP_CONTEXT
+        parallel_do_feeder_impl(tbb::task_group_context &context)
+        {
+            my_barrier = new( task::allocate_root(context) ) empty_task();
+            __TBB_ASSERT(my_barrier, "root task allocation failed");
+        }
+#endif
+
+        ~parallel_do_feeder_impl()
+        {
+            my_barrier->destroy(*my_barrier);
+        }
+    }; // class parallel_do_feeder_impl
+
+
+    //! For internal use only
+    /** Unpacks a block of iterations.
+        @ingroup algorithms */
+
+    template<typename Iterator, typename Body, typename Item>
+    class do_group_task_forward: public task
+    {
+        static const size_t max_arg_size = 4;
+
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        feeder_type& my_feeder;
+        Iterator my_first;
+        size_t my_size;
+
+        do_group_task_forward( Iterator first, size_t size, feeder_type& feeder )
+            : my_feeder(feeder), my_first(first), my_size(size)
+        {}
+
+        task* execute() __TBB_override
+        {
+            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
+            __TBB_ASSERT( my_size>0, NULL );
+            task_list list;
+            task* t;
+            size_t k=0;
+            for(;;) {
+                t = new( allocate_child() ) iteration_type( my_first, my_feeder );
+                ++my_first;
+                if( ++k==my_size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+
+        template<typename Iterator_, typename Body_, typename _Item> friend class do_task_iter;
+    }; // class do_group_task_forward
+
+    template<typename Body, typename Item>
+    class do_group_task_input: public task
+    {
+        static const size_t max_arg_size = 4;
+
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+        feeder_type& my_feeder;
+        size_t my_size;
+        aligned_space<Item, max_arg_size> my_arg;
+
+        do_group_task_input( feeder_type& feeder )
+            : my_feeder(feeder), my_size(0)
+        {}
+
+        task* execute() __TBB_override
+        {
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+            typedef std::move_iterator<Item*> Item_iterator;
+#else
+            typedef Item* Item_iterator;
+#endif
+            typedef do_iteration_task_iter<Item_iterator, Body, Item> iteration_type;
+            __TBB_ASSERT( my_size>0, NULL );
+            task_list list;
+            task* t;
+            size_t k=0;
+            for(;;) {
+                t = new( allocate_child() ) iteration_type( Item_iterator(my_arg.begin() + k), my_feeder );
+                if( ++k==my_size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+
+        ~do_group_task_input(){
+            for( size_t k=0; k<my_size; ++k)
+                (my_arg.begin() + k)->~Item();
+        }
+
+        template<typename Iterator_, typename Body_, typename Item_> friend class do_task_iter;
+    }; // class do_group_task_input
+
+    //! For internal use only.
+    /** Gets block of iterations and packages them into a do_group_task.
+        @ingroup algorithms */
+    template<typename Iterator, typename Body, typename Item>
+    class do_task_iter: public task
+    {
+        typedef parallel_do_feeder_impl<Body, Item> feeder_type;
+
+    public:
+        do_task_iter( Iterator first, Iterator last , feeder_type& feeder ) :
+            my_first(first), my_last(last), my_feeder(feeder)
+        {}
+
+    private:
+        Iterator my_first;
+        Iterator my_last;
+        feeder_type& my_feeder;
+
+        /* Do not merge run(xxx) and run_xxx() methods. They are separated in order
+            to make sure that compilers will eliminate unused argument of type xxx
+            (that is will not put it on stack). The sole purpose of this argument
+            is overload resolution.
+
+            An alternative could be using template functions, but explicit specialization
+            of member function templates is not supported for non specialized class
+            templates. Besides template functions would always fall back to the least
+            efficient variant (the one for input iterators) in case of iterators having
+            custom tags derived from basic ones. */
+        task* execute() __TBB_override
+        {
+            typedef typename std::iterator_traits<Iterator>::iterator_category iterator_tag;
+            return run( (iterator_tag*)NULL );
+        }
+
+        /** This is the most restricted variant that operates on input iterators or
+            iterators with unknown tags (tags not derived from the standard ones). **/
+        inline task* run( void* ) { return run_for_input_iterator(); }
+
+        task* run_for_input_iterator() {
+            typedef do_group_task_input<Body, Item> block_type;
+
+            block_type& t = *new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(my_feeder);
+            size_t k=0;
+            while( !(my_first == my_last) ) {
+                // Move semantics are automatically used when supported by the iterator
+                new (t.my_arg.begin() + k) Item(*my_first);
+                ++my_first;
+                if( ++k==block_type::max_arg_size ) {
+                    if ( !(my_first == my_last) )
+                        recycle_to_reexecute();
+                    break;
+                }
+            }
+            if( k==0 ) {
+                destroy(t);
+                return NULL;
+            } else {
+                t.my_size = k;
+                return &t;
+            }
+        }
+
+        inline task* run( std::forward_iterator_tag* ) { return run_for_forward_iterator(); }
+
+        task* run_for_forward_iterator() {
+            typedef do_group_task_forward<Iterator, Body, Item> block_type;
+
+            Iterator first = my_first;
+            size_t k=0;
+            while( !(my_first==my_last) ) {
+                ++my_first;
+                if( ++k==block_type::max_arg_size ) {
+                    if ( !(my_first==my_last) )
+                        recycle_to_reexecute();
+                    break;
+                }
+            }
+            return k==0 ? NULL : new( allocate_additional_child_of(*my_feeder.my_barrier) ) block_type(first, k, my_feeder);
+        }
+
+        inline task* run( std::random_access_iterator_tag* ) { return run_for_random_access_iterator(); }
+
+        task* run_for_random_access_iterator() {
+            typedef do_group_task_forward<Iterator, Body, Item> block_type;
+            typedef do_iteration_task_iter<Iterator, Body, Item> iteration_type;
+
+            size_t k = static_cast<size_t>(my_last-my_first);
+            if( k > block_type::max_arg_size ) {
+                Iterator middle = my_first + k/2;
+
+                empty_task& c = *new( allocate_continuation() ) empty_task;
+                do_task_iter& b = *new( c.allocate_child() ) do_task_iter(middle, my_last, my_feeder);
+                recycle_as_child_of(c);
+
+                my_last = middle;
+                c.set_ref_count(2);
+                c.spawn(b);
+                return this;
+            }else if( k != 0 ) {
+                task_list list;
+                task* t;
+                size_t k1=0;
+                for(;;) {
+                    t = new( allocate_child() ) iteration_type(my_first, my_feeder);
+                    ++my_first;
+                    if( ++k1==k ) break;
+                    list.push_back(*t);
+                }
+                set_ref_count(int(k+1));
+                spawn(list);
+                spawn_and_wait_for_all(*t);
+            }
+            return NULL;
+        }
+    }; // class do_task_iter
+
+    //! For internal use only.
+    /** Implements parallel iteration over a range.
+        @ingroup algorithms */
+    template<typename Iterator, typename Body, typename Item>
+    void run_parallel_do( Iterator first, Iterator last, const Body& body
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context
+#endif
+        )
+    {
+        typedef do_task_iter<Iterator, Body, Item> root_iteration_task;
+#if __TBB_TASK_GROUP_CONTEXT
+        parallel_do_feeder_impl<Body, Item> feeder(context);
+#else
+        parallel_do_feeder_impl<Body, Item> feeder;
+#endif
+        feeder.my_body = &body;
+
+        root_iteration_task &t = *new( feeder.my_barrier->allocate_child() ) root_iteration_task(first, last, feeder);
+
+        feeder.my_barrier->set_ref_count(2);
+        feeder.my_barrier->spawn_and_wait_for_all(t);
+    }
+
+    //! For internal use only.
+    /** Detects types of Body's operator function arguments.
+        @ingroup algorithms **/
+    template<typename Iterator, typename Body, typename Item>
+    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item) const
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context
+#endif
+        )
+    {
+        run_parallel_do<Iterator, Body, typename ::tbb::internal::strip<Item>::type>( first, last, body
+#if __TBB_TASK_GROUP_CONTEXT
+            , context
+#endif
+            );
+    }
+
+    //! For internal use only.
+    /** Detects types of Body's operator function arguments.
+        @ingroup algorithms **/
+    template<typename Iterator, typename Body, typename Item, typename _Item>
+    void select_parallel_do( Iterator first, Iterator last, const Body& body, void (Body::*)(Item, parallel_do_feeder<_Item>&) const
+#if __TBB_TASK_GROUP_CONTEXT
+        , task_group_context& context
+#endif
+        )
+    {
+        run_parallel_do<Iterator, Body, typename ::tbb::internal::strip<Item>::type>( first, last, body
+#if __TBB_TASK_GROUP_CONTEXT
+            , context
+#endif
+            );
+    }
+
+} // namespace internal
+} // namespace interface9
+//! @endcond
+
+/** \page parallel_do_body_req Requirements on parallel_do body
+    Class \c Body implementing the concept of parallel_do body must define:
+    - \code
+        B::operator()(
+                cv_item_type item,
+                parallel_do_feeder<item_type>& feeder
+        ) const
+
+        OR
+
+        B::operator()( cv_item_type& item ) const
+      \endcode                                               Process item.
+                                                             May be invoked concurrently  for the same \c this but different \c item.
+
+    - \code item_type( const item_type& ) \endcode
+                                                             Copy a work item.
+    - \code ~item_type() \endcode                            Destroy a work item
+**/
+
+/** \name parallel_do
+    See also requirements on \ref parallel_do_body_req "parallel_do Body". **/
+//@{
+//! Parallel iteration over a range, with optional addition of more work.
+/** @ingroup algorithms */
+template<typename Iterator, typename Body>
+void parallel_do( Iterator first, Iterator last, const Body& body )
+{
+    if ( first == last )
+        return;
+#if __TBB_TASK_GROUP_CONTEXT
+    task_group_context context(internal::PARALLEL_DO);
+#endif
+    interface9::internal::select_parallel_do( first, last, body, &Body::operator()
+#if __TBB_TASK_GROUP_CONTEXT
+        , context
+#endif
+        );
+}
+
+template<typename Range, typename Body>
+void parallel_do(Range& rng, const Body& body) {
+    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
+}
+
+template<typename Range, typename Body>
+void parallel_do(const Range& rng, const Body& body) {
+    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body);
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration over a range, with optional addition of more work and user-supplied context
+/** @ingroup algorithms */
+template<typename Iterator, typename Body>
+void parallel_do( Iterator first, Iterator last, const Body& body, task_group_context& context  )
+{
+    if ( first == last )
+        return;
+    interface9::internal::select_parallel_do( first, last, body, &Body::operator(), context );
+}
+
+template<typename Range, typename Body>
+void parallel_do(Range& rng, const Body& body, task_group_context& context) {
+    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
+}
+
+template<typename Range, typename Body>
+void parallel_do(const Range& rng, const Body& body, task_group_context& context) {
+    parallel_do(tbb::internal::first(rng), tbb::internal::last(rng), body, context);
+}
+
+#endif // __TBB_TASK_GROUP_CONTEXT
+
+//@}
+
+using interface9::parallel_do_feeder;
+
+} // namespace
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_do_H_include_area
+
+#endif /* __TBB_parallel_do_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_for.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_for.h
@@ -0,0 +1,425 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_for_H
+#define __TBB_parallel_for_H
+
+#define __TBB_parallel_for_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include <new>
+#include "task.h"
+#include "partitioner.h"
+#include "blocked_range.h"
+#include "tbb_exception.h"
+#include "internal/_tbb_trace_impl.h"
+
+namespace tbb {
+
+namespace interface9 {
+//! @cond INTERNAL
+namespace internal {
+
+    //! allocate right task with new parent
+    void* allocate_sibling(task* start_for_task, size_t bytes);
+
+    //! Task type used in parallel_for
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner>
+    class start_for: public task {
+        Range my_range;
+        const Body my_body;
+        typename Partitioner::task_partition_type my_partition;
+        task* execute() __TBB_override;
+
+        //! Update affinity info, if any.
+        void note_affinity( affinity_id id ) __TBB_override {
+            my_partition.note_affinity( id );
+        }
+
+    public:
+        //! Constructor for root task.
+        start_for( const Range& range, const Body& body, Partitioner& partitioner ) :
+            my_range(range),
+            my_body(body),
+            my_partition(partitioner)
+        {
+            tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, NULL);
+        }
+        //! Splitting constructor used to generate children.
+        /** parent_ becomes left child.  Newly constructed object is right child. */
+        start_for( start_for& parent_, typename Partitioner::split_type& split_obj) :
+            my_range(parent_.my_range, split_obj),
+            my_body(parent_.my_body),
+            my_partition(parent_.my_partition, split_obj)
+        {
+            my_partition.set_affinity(*this);
+            tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
+        }
+        //! Construct right child from the given range as response to the demand.
+        /** parent_ remains left child.  Newly constructed object is right child. */
+        start_for( start_for& parent_, const Range& r, depth_t d ) :
+            my_range(r),
+            my_body(parent_.my_body),
+            my_partition(parent_.my_partition, split())
+        {
+            my_partition.set_affinity(*this);
+            my_partition.align_depth( d );
+            tbb::internal::fgt_algorithm(tbb::internal::PARALLEL_FOR_TASK, this, (void *)&parent_);
+        }
+        static void run(  const Range& range, const Body& body, Partitioner& partitioner ) {
+            if( !range.empty() ) {
+#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
+                start_for& a = *new(task::allocate_root()) start_for(range,body,partitioner);
+#else
+                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
+                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
+                task_group_context context(PARALLEL_FOR);
+                start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
+#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
+		// REGION BEGIN
+                fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
+                task::spawn_root_and_wait(a);
+                fgt_end_algorithm( (void*)&context );
+		// REGION END
+            }
+        }
+#if __TBB_TASK_GROUP_CONTEXT
+        static void run(  const Range& range, const Body& body, Partitioner& partitioner, task_group_context& context ) {
+            if( !range.empty() ) {
+                start_for& a = *new(task::allocate_root(context)) start_for(range,body,partitioner);
+		// REGION BEGIN
+                fgt_begin_algorithm( tbb::internal::PARALLEL_FOR_TASK, (void*)&context );
+                task::spawn_root_and_wait(a);
+                fgt_end_algorithm( (void*)&context );
+		// END REGION
+            }
+        }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+        //! Run body for range, serves as callback for partitioner
+        void run_body( Range &r ) {
+            fgt_alg_begin_body( tbb::internal::PARALLEL_FOR_TASK, (void *)const_cast<Body*>(&(this->my_body)), (void*)this );
+            my_body( r );
+            fgt_alg_end_body( (void *)const_cast<Body*>(&(this->my_body)) );
+        }
+
+        //! spawn right task, serves as callback for partitioner
+        void offer_work(typename Partitioner::split_type& split_obj) {
+            spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, split_obj) );
+        }
+        //! spawn right task, serves as callback for partitioner
+        void offer_work(const Range& r, depth_t d = 0) {
+            spawn( *new( allocate_sibling(static_cast<task*>(this), sizeof(start_for)) ) start_for(*this, r, d) );
+        }
+    };
+
+    //! allocate right task with new parent
+    // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
+    inline void* allocate_sibling(task* start_for_task, size_t bytes) {
+        task* parent_ptr = new( start_for_task->allocate_continuation() ) flag_task();
+        start_for_task->set_parent(parent_ptr);
+        parent_ptr->set_ref_count(2);
+        return &parent_ptr->allocate_child().allocate(bytes);
+    }
+
+    //! execute task for parallel_for
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_for<Range,Body,Partitioner>::execute() {
+        my_partition.check_being_stolen( *this );
+        my_partition.execute(*this, my_range);
+        return NULL;
+    }
+} // namespace internal
+//! @endcond
+} // namespace interfaceX
+
+//! @cond INTERNAL
+namespace internal {
+    using interface9::internal::start_for;
+
+    //! Calls the function with values from range [begin, end) with a step provided
+    template<typename Function, typename Index>
+    class parallel_for_body : internal::no_assign {
+        const Function &my_func;
+        const Index my_begin;
+        const Index my_step;
+    public:
+        parallel_for_body( const Function& _func, Index& _begin, Index& _step )
+            : my_func(_func), my_begin(_begin), my_step(_step) {}
+
+        void operator()( const tbb::blocked_range<Index>& r ) const {
+            // A set of local variables to help the compiler with vectorization of the following loop.
+            Index b = r.begin();
+            Index e = r.end();
+            Index ms = my_step;
+            Index k = my_begin + b*ms;
+
+#if __INTEL_COMPILER
+#pragma ivdep
+#if __TBB_ASSERT_ON_VECTORIZATION_FAILURE
+#pragma vector always assert
+#endif
+#endif
+            for ( Index i = b; i < e; ++i, k += ms ) {
+                my_func( k );
+            }
+        }
+    };
+} // namespace internal
+//! @endcond
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_for_body_req Requirements on parallel_for body
+    Class \c Body implementing the concept of parallel_for body must define:
+    - \code Body::Body( const Body& ); \endcode                 Copy constructor
+    - \code Body::~Body(); \endcode                             Destructor
+    - \code void Body::operator()( Range& r ) const; \endcode   Function call operator applying the body to range \c r.
+**/
+
+/** \name parallel_for
+    See also requirements on \ref range_req "Range" and \ref parallel_for_body_req "parallel_for Body". **/
+//@{
+
+//! Parallel iteration over range with default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body ) {
+    internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
+}
+
+//! Parallel iteration over range with simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner ) {
+    internal::start_for<Range,Body,const simple_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with auto_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner ) {
+    internal::start_for<Range,Body,const auto_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with static_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner ) {
+    internal::start_for<Range,Body,const static_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel iteration over range with affinity_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner ) {
+    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner);
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration over range with default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, task_group_context& context ) {
+    internal::start_for<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run(range, body, __TBB_DEFAULT_PARTITIONER(), context);
+}
+
+//! Parallel iteration over range with simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,const simple_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration over range with auto_partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,const auto_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration over range with static_partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, const static_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,const static_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration over range with affinity_partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_for( const Range& range, const Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::start_for<Range,Body,affinity_partitioner>::run(range,body,partitioner, context);
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+//@}
+
+namespace strict_ppl {
+
+//@{
+//! Implementation of parallel iteration over stepped range of integers with explicit step and partitioner
+template <typename Index, typename Function, typename Partitioner>
+void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner) {
+    if (step <= 0 )
+        internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
+    else if (last > first) {
+        // Above "else" avoids "potential divide by zero" warning on some platforms
+        Index end = (last - first - Index(1)) / step + Index(1);
+        tbb::blocked_range<Index> range(static_cast<Index>(0), end);
+        internal::parallel_for_body<Function, Index> body(f, first, step);
+        tbb::parallel_for(range, body, partitioner);
+    }
+}
+
+//! Parallel iteration over a range of integers with a step provided and default partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner());
+}
+//! Parallel iteration over a range of integers with a step provided and simple partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner);
+}
+//! Parallel iteration over a range of integers with a step provided and auto partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner);
+}
+//! Parallel iteration over a range of integers with a step provided and static partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner);
+}
+//! Parallel iteration over a range of integers with a step provided and affinity partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner) {
+    parallel_for_impl(first, last, step, f, partitioner);
+}
+
+//! Parallel iteration over a range of integers with a default step value and default partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner());
+}
+//! Parallel iteration over a range of integers with a default step value and simple partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
+}
+//! Parallel iteration over a range of integers with a default step value and auto partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
+}
+//! Parallel iteration over a range of integers with a default step value and static partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner);
+}
+//! Parallel iteration over a range of integers with a default step value and affinity partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner) {
+    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner);
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Implementation of parallel iteration over stepped range of integers with explicit step, task group context, and partitioner
+template <typename Index, typename Function, typename Partitioner>
+void parallel_for_impl(Index first, Index last, Index step, const Function& f, Partitioner& partitioner, tbb::task_group_context &context) {
+    if (step <= 0 )
+        internal::throw_exception(internal::eid_nonpositive_step); // throws std::invalid_argument
+    else if (last > first) {
+        // Above "else" avoids "potential divide by zero" warning on some platforms
+        Index end = (last - first - Index(1)) / step + Index(1);
+        tbb::blocked_range<Index> range(static_cast<Index>(0), end);
+        internal::parallel_for_body<Function, Index> body(f, first, step);
+        tbb::parallel_for(range, body, partitioner, context);
+    }
+}
+
+//! Parallel iteration over a range of integers with explicit step, task group context, and default partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, auto_partitioner(), context);
+}
+//! Parallel iteration over a range of integers with explicit step, task group context, and simple partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, step, f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with explicit step, task group context, and auto partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, step, f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with explicit step, task group context, and static partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, step, f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with explicit step, task group context, and affinity partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, Index step, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl(first, last, step, f, partitioner, context);
+}
+
+
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and default partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, auto_partitioner(), context);
+}
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and simple partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const simple_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const simple_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and auto partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const auto_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const auto_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and static partitioner
+template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, const static_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl<Index,Function,const static_partitioner>(first, last, static_cast<Index>(1), f, partitioner, context);
+}
+//! Parallel iteration over a range of integers with a default step value, explicit task group context, and affinity_partitioner
+ template <typename Index, typename Function>
+void parallel_for(Index first, Index last, const Function& f, affinity_partitioner& partitioner, tbb::task_group_context &context) {
+    parallel_for_impl(first, last, static_cast<Index>(1), f, partitioner, context);
+}
+
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+//@}
+
+} // namespace strict_ppl
+
+using strict_ppl::parallel_for;
+
+} // namespace tbb
+
+#if TBB_PREVIEW_SERIAL_SUBSET
+#define __TBB_NORMAL_EXECUTION
+#include "../serial/tbb/parallel_for.h"
+#undef __TBB_NORMAL_EXECUTION
+#endif
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_for_H_include_area
+
+#endif /* __TBB_parallel_for_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_for_each.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_for_each.h
@@ -0,0 +1,133 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_for_each_H
+#define __TBB_parallel_for_each_H
+
+#include "parallel_do.h"
+#include "parallel_for.h"
+
+namespace tbb {
+
+//! @cond INTERNAL
+namespace internal {
+    // The class calls user function in operator()
+    template <typename Function, typename Iterator>
+    class parallel_for_each_body_do : internal::no_assign {
+        const Function &my_func;
+    public:
+        parallel_for_each_body_do(const Function &_func) : my_func(_func) {}
+
+        void operator()(typename std::iterator_traits<Iterator>::reference value) const {
+            my_func(value);
+        }
+    };
+
+    // The class calls user function in operator()
+    template <typename Function, typename Iterator>
+    class parallel_for_each_body_for : internal::no_assign {
+        const Function &my_func;
+    public:
+        parallel_for_each_body_for(const Function &_func) : my_func(_func) {}
+
+        void operator()(tbb::blocked_range<Iterator> range) const {
+#if __INTEL_COMPILER
+#pragma ivdep
+#endif
+            for(Iterator it = range.begin(), end = range.end(); it != end; ++it) {
+                my_func(*it);
+            }
+        }
+    };
+
+    template<typename Iterator, typename Function, typename Generic>
+    struct parallel_for_each_impl {
+#if __TBB_TASK_GROUP_CONTEXT
+        static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) {
+            internal::parallel_for_each_body_do<Function, Iterator> body(f);
+            tbb::parallel_do(first, last, body, context);
+        }
+#endif
+        static void doit(Iterator first, Iterator last, const Function& f) {
+            internal::parallel_for_each_body_do<Function, Iterator> body(f);
+            tbb::parallel_do(first, last, body);
+        }
+    };
+    template<typename Iterator, typename Function>
+    struct parallel_for_each_impl<Iterator, Function, std::random_access_iterator_tag> {
+#if __TBB_TASK_GROUP_CONTEXT
+        static void doit(Iterator first, Iterator last, const Function& f, task_group_context &context) {
+            internal::parallel_for_each_body_for<Function, Iterator> body(f);
+            tbb::parallel_for(tbb::blocked_range<Iterator>(first, last), body, context);
+        }
+#endif
+        static void doit(Iterator first, Iterator last, const Function& f) {
+            internal::parallel_for_each_body_for<Function, Iterator> body(f);
+            tbb::parallel_for(tbb::blocked_range<Iterator>(first, last), body);
+        }
+    };
+} // namespace internal
+//! @endcond
+
+/** \name parallel_for_each
+    **/
+//@{
+//! Calls function f for all items from [first, last) interval using user-supplied context
+/** @ingroup algorithms */
+#if __TBB_TASK_GROUP_CONTEXT
+template<typename Iterator, typename Function>
+void parallel_for_each(Iterator first, Iterator last, const Function& f, task_group_context &context) {
+    internal::parallel_for_each_impl<Iterator, Function, typename std::iterator_traits<Iterator>::iterator_category>::doit(first, last, f, context);
+}
+
+//! Calls function f for all items from rng using user-supplied context
+/** @ingroup algorithms */
+template<typename Range, typename Function>
+void parallel_for_each(Range& rng, const Function& f, task_group_context& context) {
+    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
+}
+
+//! Calls function f for all items from const rng user-supplied context
+/** @ingroup algorithms */
+template<typename Range, typename Function>
+void parallel_for_each(const Range& rng, const Function& f, task_group_context& context) {
+    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f, context);
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+//! Uses default context
+template<typename Iterator, typename Function>
+void parallel_for_each(Iterator first, Iterator last, const Function& f) {
+    internal::parallel_for_each_impl<Iterator, Function, typename std::iterator_traits<Iterator>::iterator_category>::doit(first, last, f);
+}
+
+//! Uses default context
+template<typename Range, typename Function>
+void parallel_for_each(Range& rng, const Function& f) {
+    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
+}
+
+//! Uses default context
+template<typename Range, typename Function>
+void parallel_for_each(const Range& rng, const Function& f) {
+    parallel_for_each(tbb::internal::first(rng), tbb::internal::last(rng), f);
+}
+
+//@}
+
+} // namespace
+
+#endif /* __TBB_parallel_for_each_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_invoke.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_invoke.h
@@ -0,0 +1,460 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_invoke_H
+#define __TBB_parallel_invoke_H
+
+#define __TBB_parallel_invoke_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "task.h"
+#include "tbb_profiling.h"
+
+#if __TBB_VARIADIC_PARALLEL_INVOKE
+    #include <utility> // std::forward
+#endif
+
+namespace tbb {
+
+#if !__TBB_TASK_GROUP_CONTEXT
+    /** Dummy to avoid cluttering the bulk of the header with enormous amount of ifdefs. **/
+    struct task_group_context {
+        task_group_context(tbb::internal::string_index){}
+    };
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+//! @cond INTERNAL
+namespace internal {
+    // Simple task object, executing user method
+    template<typename function>
+    class function_invoker : public task{
+    public:
+        function_invoker(const function& _function) : my_function(_function) {}
+    private:
+        const function &my_function;
+        task* execute() __TBB_override
+        {
+            my_function();
+            return NULL;
+        }
+    };
+
+    // The class spawns two or three child tasks
+    template <size_t N, typename function1, typename function2, typename function3>
+    class spawner : public task {
+    private:
+        const function1& my_func1;
+        const function2& my_func2;
+        const function3& my_func3;
+        bool is_recycled;
+
+        task* execute () __TBB_override {
+            if(is_recycled){
+                return NULL;
+            }else{
+                __TBB_ASSERT(N==2 || N==3, "Number of arguments passed to spawner is wrong");
+                set_ref_count(N);
+                recycle_as_safe_continuation();
+                internal::function_invoker<function2>* invoker2 = new (allocate_child()) internal::function_invoker<function2>(my_func2);
+                __TBB_ASSERT(invoker2, "Child task allocation failed");
+                spawn(*invoker2);
+                size_t n = N; // To prevent compiler warnings
+                if (n>2) {
+                    internal::function_invoker<function3>* invoker3 = new (allocate_child()) internal::function_invoker<function3>(my_func3);
+                    __TBB_ASSERT(invoker3, "Child task allocation failed");
+                    spawn(*invoker3);
+                }
+                my_func1();
+                is_recycled = true;
+                return NULL;
+            }
+        } // execute
+
+    public:
+        spawner(const function1& _func1, const function2& _func2, const function3& _func3) : my_func1(_func1), my_func2(_func2), my_func3(_func3), is_recycled(false) {}
+    };
+
+    // Creates and spawns child tasks
+    class parallel_invoke_helper : public empty_task {
+    public:
+        // Dummy functor class
+        class parallel_invoke_noop {
+        public:
+            void operator() () const {}
+        };
+        // Creates a helper object with user-defined number of children expected
+        parallel_invoke_helper(int number_of_children)
+        {
+            set_ref_count(number_of_children + 1);
+        }
+
+#if __TBB_VARIADIC_PARALLEL_INVOKE
+        void add_children() {}
+        void add_children(tbb::task_group_context&) {}
+
+        template <typename function>
+        void add_children(function&& _func)
+        {
+            internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(std::forward<function>(_func));
+            __TBB_ASSERT(invoker, "Child task allocation failed");
+            spawn(*invoker);
+        }
+
+        template<typename function>
+        void add_children(function&& _func, tbb::task_group_context&)
+        {
+            add_children(std::forward<function>(_func));
+        }
+
+        // Adds child(ren) task(s) and spawns them
+        template <typename function1, typename function2, typename... function>
+        void add_children(function1&& _func1, function2&& _func2, function&&... _func)
+        {
+            // The third argument is dummy, it is ignored actually.
+            parallel_invoke_noop noop;
+            typedef internal::spawner<2, function1, function2, parallel_invoke_noop> spawner_type;
+            spawner_type & sub_root = *new(allocate_child()) spawner_type(std::forward<function1>(_func1), std::forward<function2>(_func2), noop);
+            spawn(sub_root);
+            add_children(std::forward<function>(_func)...);
+        }
+#else
+        // Adds child task and spawns it
+        template <typename function>
+        void add_children (const function &_func)
+        {
+            internal::function_invoker<function>* invoker = new (allocate_child()) internal::function_invoker<function>(_func);
+            __TBB_ASSERT(invoker, "Child task allocation failed");
+            spawn(*invoker);
+        }
+
+        // Adds a task with multiple child tasks and spawns it
+        // two arguments
+        template <typename function1, typename function2>
+        void add_children (const function1& _func1, const function2& _func2)
+        {
+            // The third argument is dummy, it is ignored actually.
+            parallel_invoke_noop noop;
+            internal::spawner<2, function1, function2, parallel_invoke_noop>& sub_root = *new(allocate_child())internal::spawner<2, function1, function2, parallel_invoke_noop>(_func1, _func2, noop);
+            spawn(sub_root);
+        }
+        // three arguments
+        template <typename function1, typename function2, typename function3>
+        void add_children (const function1& _func1, const function2& _func2, const function3& _func3)
+        {
+            internal::spawner<3, function1, function2, function3>& sub_root = *new(allocate_child())internal::spawner<3, function1, function2, function3>(_func1, _func2, _func3);
+            spawn(sub_root);
+        }
+#endif // __TBB_VARIADIC_PARALLEL_INVOKE
+
+        // Waits for all child tasks
+        template <typename F0>
+        void run_and_finish(const F0& f0)
+        {
+            internal::function_invoker<F0>* invoker = new (allocate_child()) internal::function_invoker<F0>(f0);
+            __TBB_ASSERT(invoker, "Child task allocation failed");
+            spawn_and_wait_for_all(*invoker);
+        }
+    };
+    // The class destroys root if exception occurred as well as in normal case
+    class parallel_invoke_cleaner: internal::no_copy {
+    public:
+#if __TBB_TASK_GROUP_CONTEXT
+        parallel_invoke_cleaner(int number_of_children, tbb::task_group_context& context)
+            : root(*new(task::allocate_root(context)) internal::parallel_invoke_helper(number_of_children))
+#else
+        parallel_invoke_cleaner(int number_of_children, tbb::task_group_context&)
+            : root(*new(task::allocate_root()) internal::parallel_invoke_helper(number_of_children))
+#endif /* !__TBB_TASK_GROUP_CONTEXT */
+        {}
+
+        ~parallel_invoke_cleaner(){
+            root.destroy(root);
+        }
+        internal::parallel_invoke_helper& root;
+    };
+
+#if __TBB_VARIADIC_PARALLEL_INVOKE
+//  Determine whether the last parameter in a pack is task_group_context
+    template<typename... T> struct impl_selector; // to workaround a GCC bug
+
+    template<typename T1, typename... T> struct impl_selector<T1, T...> {
+        typedef typename impl_selector<T...>::type type;
+    };
+
+    template<typename T> struct impl_selector<T> {
+        typedef false_type type;
+    };
+    template<> struct impl_selector<task_group_context&> {
+        typedef true_type  type;
+    };
+
+    // Select task_group_context parameter from the back of a pack
+    inline task_group_context& get_context( task_group_context& tgc ) { return tgc; }
+
+    template<typename T1, typename... T>
+    task_group_context& get_context( T1&& /*ignored*/, T&&... t )
+    { return get_context( std::forward<T>(t)... ); }
+
+    // task_group_context is known to be at the back of the parameter pack
+    template<typename F0, typename F1, typename... F>
+    void parallel_invoke_impl(true_type, F0&& f0, F1&& f1, F&&... f) {
+        __TBB_STATIC_ASSERT(sizeof...(F)>0, "Variadic parallel_invoke implementation broken?");
+        // # of child tasks: f0, f1, and a task for each two elements of the pack except the last
+        const size_t number_of_children = 2 + sizeof...(F)/2;
+        parallel_invoke_cleaner cleaner(number_of_children, get_context(std::forward<F>(f)...));
+        parallel_invoke_helper& root = cleaner.root;
+
+        root.add_children(std::forward<F>(f)...);
+        root.add_children(std::forward<F1>(f1));
+        root.run_and_finish(std::forward<F0>(f0));
+    }
+
+    // task_group_context is not in the pack, needs to be added
+    template<typename F0, typename F1, typename... F>
+    void parallel_invoke_impl(false_type, F0&& f0, F1&& f1, F&&... f) {
+        tbb::task_group_context context(PARALLEL_INVOKE);
+        // Add context to the arguments, and redirect to the other overload
+        parallel_invoke_impl(true_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)..., context);
+    }
+#endif
+} // namespace internal
+//! @endcond
+
+/** \name parallel_invoke
+    **/
+//@{
+//! Executes a list of tasks in parallel and waits for all tasks to complete.
+/** @ingroup algorithms */
+
+#if __TBB_VARIADIC_PARALLEL_INVOKE
+
+// parallel_invoke for two or more arguments via variadic templates
+// presence of task_group_context is defined automatically
+template<typename F0, typename F1, typename... F>
+void parallel_invoke(F0&& f0, F1&& f1, F&&... f) {
+    typedef typename internal::impl_selector<internal::false_type, F...>::type selector_type;
+    internal::parallel_invoke_impl(selector_type(), std::forward<F0>(f0), std::forward<F1>(f1), std::forward<F>(f)...);
+}
+
+#else
+
+// parallel_invoke with user-defined context
+// two arguments
+template<typename F0, typename F1 >
+void parallel_invoke(const F0& f0, const F1& f1, tbb::task_group_context& context) {
+    internal::parallel_invoke_cleaner cleaner(2, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f1);
+
+    root.run_and_finish(f0);
+}
+
+// three arguments
+template<typename F0, typename F1, typename F2 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, tbb::task_group_context& context) {
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f2);
+    root.add_children(f1);
+
+    root.run_and_finish(f0);
+}
+
+// four arguments
+template<typename F0, typename F1, typename F2, typename F3>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f3);
+    root.add_children(f2);
+    root.add_children(f1);
+
+    root.run_and_finish(f0);
+}
+
+// five arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// six arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f5, f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// seven arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(3, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f6, f5, f4);
+    root.add_children(f3, f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// eight arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f7, f6, f5);
+    root.add_children(f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// nine arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f8, f7, f6);
+    root.add_children(f5, f4, f3);
+    root.add_children(f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// ten arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8, typename F9>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9,
+                     tbb::task_group_context& context)
+{
+    internal::parallel_invoke_cleaner cleaner(4, context);
+    internal::parallel_invoke_helper& root = cleaner.root;
+
+    root.add_children(f9, f8, f7);
+    root.add_children(f6, f5, f4);
+    root.add_children(f3, f2, f1);
+
+    root.run_and_finish(f0);
+}
+
+// two arguments
+template<typename F0, typename F1>
+void parallel_invoke(const F0& f0, const F1& f1) {
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1>(f0, f1, context);
+}
+// three arguments
+template<typename F0, typename F1, typename F2>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2) {
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2>(f0, f1, f2, context);
+}
+// four arguments
+template<typename F0, typename F1, typename F2, typename F3 >
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3) {
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3>(f0, f1, f2, f3, context);
+}
+// five arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4) {
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4>(f0, f1, f2, f3, f4, context);
+}
+// six arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4, const F5& f5) {
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4, F5>(f0, f1, f2, f3, f4, f5, context);
+}
+// seven arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4, typename F5, typename F6>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6)
+{
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6>(f0, f1, f2, f3, f4, f5, f6, context);
+}
+// eight arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7)
+{
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7>(f0, f1, f2, f3, f4, f5, f6, f7, context);
+}
+// nine arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8)
+{
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8>(f0, f1, f2, f3, f4, f5, f6, f7, f8, context);
+}
+// ten arguments
+template<typename F0, typename F1, typename F2, typename F3, typename F4,
+         typename F5, typename F6, typename F7, typename F8, typename F9>
+void parallel_invoke(const F0& f0, const F1& f1, const F2& f2, const F3& f3, const F4& f4,
+                     const F5& f5, const F6& f6, const F7& f7, const F8& f8, const F9& f9)
+{
+    task_group_context context(internal::PARALLEL_INVOKE);
+    parallel_invoke<F0, F1, F2, F3, F4, F5, F6, F7, F8, F9>(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, context);
+}
+#endif // __TBB_VARIADIC_PARALLEL_INVOKE
+//@}
+
+} // namespace
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_invoke_H_include_area
+
+#endif /* __TBB_parallel_invoke_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_reduce.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_reduce.h
@@ -0,0 +1,657 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_reduce_H
+#define __TBB_parallel_reduce_H
+
+#define __TBB_parallel_reduce_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include <new>
+#include "task.h"
+#include "aligned_space.h"
+#include "partitioner.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+namespace interface9 {
+//! @cond INTERNAL
+namespace internal {
+
+    using namespace tbb::internal;
+
+    /** Values for reduction_context. */
+    enum {
+        root_task, left_child, right_child
+    };
+
+    /** Represented as a char, not enum, for compactness. */
+    typedef char reduction_context;
+
+    //! Task type used to combine the partial results of parallel_reduce.
+    /** @ingroup algorithms */
+    template<typename Body>
+    class finish_reduce: public flag_task {
+        //! Pointer to body, or NULL if the left child has not yet finished.
+        bool has_right_zombie;
+        const reduction_context my_context;
+        Body* my_body;
+        aligned_space<Body> zombie_space;
+        finish_reduce( reduction_context context_ ) :
+            has_right_zombie(false), // TODO: substitute by flag_task::child_stolen?
+            my_context(context_),
+            my_body(NULL)
+        {
+        }
+        ~finish_reduce() {
+            if( has_right_zombie )
+                zombie_space.begin()->~Body();
+        }
+        task* execute() __TBB_override {
+            if( has_right_zombie ) {
+                // Right child was stolen.
+                Body* s = zombie_space.begin();
+                my_body->join( *s );
+                // Body::join() won't be called if canceled. Defer destruction to destructor
+            }
+            if( my_context==left_child )
+                itt_store_word_with_release( static_cast<finish_reduce*>(parent())->my_body, my_body );
+            return NULL;
+        }
+        template<typename Range,typename Body_, typename Partitioner>
+        friend class start_reduce;
+    };
+
+    //! allocate right task with new parent
+    void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes);
+
+    //! Task type used to split the work of parallel_reduce.
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner>
+    class start_reduce: public task {
+        typedef finish_reduce<Body> finish_type;
+        Body* my_body;
+        Range my_range;
+        typename Partitioner::task_partition_type my_partition;
+        reduction_context my_context;
+        task* execute() __TBB_override;
+        //! Update affinity info, if any
+        void note_affinity( affinity_id id ) __TBB_override {
+            my_partition.note_affinity( id );
+        }
+        template<typename Body_>
+        friend class finish_reduce;
+
+public:
+        //! Constructor used for root task
+        start_reduce( const Range& range, Body* body, Partitioner& partitioner ) :
+            my_body(body),
+            my_range(range),
+            my_partition(partitioner),
+            my_context(root_task)
+        {
+        }
+        //! Splitting constructor used to generate children.
+        /** parent_ becomes left child.  Newly constructed object is right child. */
+        start_reduce( start_reduce& parent_, typename Partitioner::split_type& split_obj ) :
+            my_body(parent_.my_body),
+            my_range(parent_.my_range, split_obj),
+            my_partition(parent_.my_partition, split_obj),
+            my_context(right_child)
+        {
+            my_partition.set_affinity(*this);
+            parent_.my_context = left_child;
+        }
+        //! Construct right child from the given range as response to the demand.
+        /** parent_ remains left child.  Newly constructed object is right child. */
+        start_reduce( start_reduce& parent_, const Range& r, depth_t d ) :
+            my_body(parent_.my_body),
+            my_range(r),
+            my_partition(parent_.my_partition, split()),
+            my_context(right_child)
+        {
+            my_partition.set_affinity(*this);
+            my_partition.align_depth( d ); // TODO: move into constructor of partitioner
+            parent_.my_context = left_child;
+        }
+        static void run( const Range& range, Body& body, Partitioner& partitioner ) {
+            if( !range.empty() ) {
+#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
+                task::spawn_root_and_wait( *new(task::allocate_root()) start_reduce(range,&body,partitioner) );
+#else
+                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
+                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
+                task_group_context context(PARALLEL_REDUCE);
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
+#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
+            }
+        }
+#if __TBB_TASK_GROUP_CONTEXT
+        static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
+            if( !range.empty() )
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_reduce(range,&body,partitioner) );
+        }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+        //! Run body for range
+        void run_body( Range &r ) { (*my_body)( r ); }
+
+        //! spawn right task, serves as callback for partitioner
+        // TODO: remove code duplication from 'offer_work' methods
+        void offer_work(typename Partitioner::split_type& split_obj) {
+            task *tasks[2];
+            allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
+            new((void*)tasks[0]) finish_type(my_context);
+            new((void*)tasks[1]) start_reduce(*this, split_obj);
+            spawn(*tasks[1]);
+        }
+        //! spawn right task, serves as callback for partitioner
+        void offer_work(const Range& r, depth_t d = 0) {
+            task *tasks[2];
+            allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_reduce), sizeof(finish_type));
+            new((void*)tasks[0]) finish_type(my_context);
+            new((void*)tasks[1]) start_reduce(*this, r, d);
+            spawn(*tasks[1]);
+        }
+    };
+
+    //! allocate right task with new parent
+    // TODO: 'inline' here is to avoid multiple definition error but for sake of code size this should not be inlined
+    inline void allocate_sibling(task* start_reduce_task, task *tasks[], size_t start_bytes, size_t finish_bytes) {
+        tasks[0] = &start_reduce_task->allocate_continuation().allocate(finish_bytes);
+        start_reduce_task->set_parent(tasks[0]);
+        tasks[0]->set_ref_count(2);
+        tasks[1] = &tasks[0]->allocate_child().allocate(start_bytes);
+    }
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_reduce<Range,Body,Partitioner>::execute() {
+        my_partition.check_being_stolen( *this );
+        if( my_context==right_child ) {
+            finish_type* parent_ptr = static_cast<finish_type*>(parent());
+            if( !itt_load_word_with_acquire(parent_ptr->my_body) ) { // TODO: replace by is_stolen_task() or by parent_ptr->ref_count() == 2???
+                my_body = new( parent_ptr->zombie_space.begin() ) Body(*my_body,split());
+                parent_ptr->has_right_zombie = true;
+            }
+        } else __TBB_ASSERT(my_context==root_task,NULL);// because left leaf spawns right leafs without recycling
+        my_partition.execute(*this, my_range);
+        if( my_context==left_child ) {
+            finish_type* parent_ptr = static_cast<finish_type*>(parent());
+            __TBB_ASSERT(my_body!=parent_ptr->zombie_space.begin(),NULL);
+            itt_store_word_with_release(parent_ptr->my_body, my_body );
+        }
+        return NULL;
+    }
+
+    //! Task type used to combine the partial results of parallel_deterministic_reduce.
+    /** @ingroup algorithms */
+    template<typename Body>
+    class finish_deterministic_reduce: public task {
+        Body &my_left_body;
+        Body my_right_body;
+
+        finish_deterministic_reduce( Body &body ) :
+            my_left_body( body ),
+            my_right_body( body, split() )
+        {
+        }
+        task* execute() __TBB_override {
+            my_left_body.join( my_right_body );
+            return NULL;
+        }
+        template<typename Range,typename Body_, typename Partitioner>
+        friend class start_deterministic_reduce;
+    };
+
+    //! Task type used to split the work of parallel_deterministic_reduce.
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner>
+    class start_deterministic_reduce: public task {
+        typedef finish_deterministic_reduce<Body> finish_type;
+        Body &my_body;
+        Range my_range;
+        typename Partitioner::task_partition_type my_partition;
+        task* execute() __TBB_override;
+
+        //! Constructor used for root task
+        start_deterministic_reduce( const Range& range, Body& body, Partitioner& partitioner ) :
+            my_body( body ),
+            my_range( range ),
+            my_partition( partitioner )
+        {
+        }
+        //! Splitting constructor used to generate children.
+        /** parent_ becomes left child.  Newly constructed object is right child. */
+        start_deterministic_reduce( start_deterministic_reduce& parent_, finish_type& c, typename Partitioner::split_type& split_obj ) :
+            my_body( c.my_right_body ),
+            my_range( parent_.my_range, split_obj ),
+            my_partition( parent_.my_partition, split_obj )
+        {
+        }
+
+public:
+        static void run( const Range& range, Body& body, Partitioner& partitioner ) {
+            if( !range.empty() ) {
+#if !__TBB_TASK_GROUP_CONTEXT || TBB_JOIN_OUTER_TASK_GROUP
+                task::spawn_root_and_wait( *new(task::allocate_root()) start_deterministic_reduce(range,&body,partitioner) );
+#else
+                // Bound context prevents exceptions from body to affect nesting or sibling algorithms,
+                // and allows users to handle exceptions safely by wrapping parallel_for in the try-block.
+                task_group_context context(PARALLEL_REDUCE);
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) );
+#endif /* __TBB_TASK_GROUP_CONTEXT && !TBB_JOIN_OUTER_TASK_GROUP */
+            }
+        }
+#if __TBB_TASK_GROUP_CONTEXT
+        static void run( const Range& range, Body& body, Partitioner& partitioner, task_group_context& context ) {
+            if( !range.empty() )
+                task::spawn_root_and_wait( *new(task::allocate_root(context)) start_deterministic_reduce(range,body,partitioner) );
+        }
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+        void offer_work( typename Partitioner::split_type& split_obj) {
+            task* tasks[2];
+            allocate_sibling(static_cast<task*>(this), tasks, sizeof(start_deterministic_reduce), sizeof(finish_type));
+            new((void*)tasks[0]) finish_type(my_body);
+            new((void*)tasks[1]) start_deterministic_reduce(*this, *static_cast<finish_type*>(tasks[0]), split_obj);
+            spawn(*tasks[1]);
+        }
+
+        void run_body( Range &r ) { my_body(r); }
+    };
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_deterministic_reduce<Range,Body, Partitioner>::execute() {
+        my_partition.execute(*this, my_range);
+        return NULL;
+    }
+} // namespace internal
+//! @endcond
+} //namespace interfaceX
+
+//! @cond INTERNAL
+namespace internal {
+    using interface9::internal::start_reduce;
+    using interface9::internal::start_deterministic_reduce;
+    //! Auxiliary class for parallel_reduce; for internal use only.
+    /** The adaptor class that implements \ref parallel_reduce_body_req "parallel_reduce Body"
+        using given \ref parallel_reduce_lambda_req "anonymous function objects".
+     **/
+    /** @ingroup algorithms */
+    template<typename Range, typename Value, typename RealBody, typename Reduction>
+    class lambda_reduce_body {
+
+//FIXME: decide if my_real_body, my_reduction, and identity_element should be copied or referenced
+//       (might require some performance measurements)
+
+        const Value&     identity_element;
+        const RealBody&  my_real_body;
+        const Reduction& my_reduction;
+        Value            my_value;
+        lambda_reduce_body& operator= ( const lambda_reduce_body& other );
+    public:
+        lambda_reduce_body( const Value& identity, const RealBody& body, const Reduction& reduction )
+            : identity_element(identity)
+            , my_real_body(body)
+            , my_reduction(reduction)
+            , my_value(identity)
+        { }
+        lambda_reduce_body( const lambda_reduce_body& other )
+            : identity_element(other.identity_element)
+            , my_real_body(other.my_real_body)
+            , my_reduction(other.my_reduction)
+            , my_value(other.my_value)
+        { }
+        lambda_reduce_body( lambda_reduce_body& other, tbb::split )
+            : identity_element(other.identity_element)
+            , my_real_body(other.my_real_body)
+            , my_reduction(other.my_reduction)
+            , my_value(other.identity_element)
+        { }
+        void operator()(Range& range) {
+            my_value = my_real_body(range, const_cast<const Value&>(my_value));
+        }
+        void join( lambda_reduce_body& rhs ) {
+            my_value = my_reduction(const_cast<const Value&>(my_value), const_cast<const Value&>(rhs.my_value));
+        }
+        Value result() const {
+            return my_value;
+        }
+    };
+
+} // namespace internal
+//! @endcond
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_reduce_body_req Requirements on parallel_reduce body
+    Class \c Body implementing the concept of parallel_reduce body must define:
+    - \code Body::Body( Body&, split ); \endcode        Splitting constructor.
+                                                        Must be able to run concurrently with operator() and method \c join
+    - \code Body::~Body(); \endcode                     Destructor
+    - \code void Body::operator()( Range& r ); \endcode Function call operator applying body to range \c r
+                                                        and accumulating the result
+    - \code void Body::join( Body& b ); \endcode        Join results.
+                                                        The result in \c b should be merged into the result of \c this
+**/
+
+/** \page parallel_reduce_lambda_req Requirements on parallel_reduce anonymous function objects (lambda functions)
+    TO BE DOCUMENTED
+**/
+
+/** \name parallel_reduce
+    See also requirements on \ref range_req "Range" and \ref parallel_reduce_body_req "parallel_reduce Body". **/
+//@{
+
+//! Parallel iteration with reduction and default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body ) {
+    internal::start_reduce<Range,Body, const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER() );
+}
+
+//! Parallel iteration with reduction and simple_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner );
+}
+
+//! Parallel iteration with reduction and auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner );
+}
+
+//! Parallel iteration with reduction and static_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner );
+}
+
+//! Parallel iteration with reduction and affinity_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner ) {
+    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const __TBB_DEFAULT_PARTITIONER>::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+}
+
+//! Parallel iteration with reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const simple_partitioner>::run( range, body, partitioner, context );
+}
+
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const auto_partitioner>::run( range, body, partitioner, context );
+}
+
+//! Parallel iteration with reduction, static_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,const static_partitioner>::run( range, body, partitioner, context );
+}
+
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_reduce( const Range& range, Body& body, affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::start_reduce<Range,Body,affinity_partitioner>::run( range, body, partitioner, context );
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+/** parallel_reduce overloads that work with anonymous function objects
+    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
+
+//! Parallel iteration with reduction and default partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
+                          ::run(range, body, __TBB_DEFAULT_PARTITIONER() );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and simple_partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const simple_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
+                          ::run(range, body, partitioner );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const auto_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
+                          ::run( range, body, partitioner );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and static_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const static_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
+                                        ::run( range, body, partitioner );
+    return body.result();
+}
+
+//! Parallel iteration with reduction and affinity_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       affinity_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
+                                        ::run( range, body, partitioner );
+    return body.result();
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with reduction, default partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const __TBB_DEFAULT_PARTITIONER>
+                          ::run( range, body, __TBB_DEFAULT_PARTITIONER(), context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const simple_partitioner>
+                          ::run( range, body, partitioner, context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, auto_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const auto_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const auto_partitioner>
+                          ::run( range, body, partitioner, context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, static_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       const static_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,const static_partitioner>
+                                        ::run( range, body, partitioner, context );
+    return body.result();
+}
+
+//! Parallel iteration with reduction, affinity_partitioner and user-supplied context
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+                       affinity_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>,affinity_partitioner>
+                                        ::run( range, body, partitioner, context );
+    return body.result();
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+//! Parallel iteration with deterministic reduction and default simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body ) {
+    internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, simple_partitioner());
+}
+
+//! Parallel iteration with deterministic reduction and simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner ) {
+    internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner);
+}
+
+//! Parallel iteration with deterministic reduction and static partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner ) {
+    internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner);
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body, task_group_context& context ) {
+    internal::start_deterministic_reduce<Range,Body, const simple_partitioner>::run( range, body, simple_partitioner(), context );
+}
+
+//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body, const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::start_deterministic_reduce<Range, Body, const simple_partitioner>::run(range, body, partitioner, context);
+}
+
+//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_deterministic_reduce( const Range& range, Body& body, const static_partitioner& partitioner, task_group_context& context ) {
+    internal::start_deterministic_reduce<Range, Body, const static_partitioner>::run(range, body, partitioner, context);
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+/** parallel_reduce overloads that work with anonymous function objects
+    (see also \ref parallel_reduce_lambda_req "requirements on parallel_reduce anonymous function objects"). **/
+
+//! Parallel iteration with deterministic reduction and default simple partitioner.
+// TODO: consider making static_partitioner the default
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction ) {
+    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner());
+}
+
+//! Parallel iteration with deterministic reduction and simple partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const simple_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range,Value,RealBody,Reduction> body(identity, real_body, reduction);
+    internal::start_deterministic_reduce<Range,internal::lambda_reduce_body<Range,Value,RealBody,Reduction>, const simple_partitioner>
+                          ::run(range, body, partitioner);
+    return body.result();
+}
+
+//! Parallel iteration with deterministic reduction and static partitioner.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction, const static_partitioner& partitioner ) {
+    internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
+    internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
+        ::run(range, body, partitioner);
+    return body.result();
+}
+#if __TBB_TASK_GROUP_CONTEXT
+//! Parallel iteration with deterministic reduction, default simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+    task_group_context& context ) {
+    return parallel_deterministic_reduce(range, identity, real_body, reduction, simple_partitioner(), context);
+}
+
+//! Parallel iteration with deterministic reduction, simple partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+    const simple_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
+    internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const simple_partitioner>
+        ::run(range, body, partitioner, context);
+    return body.result();
+}
+
+//! Parallel iteration with deterministic reduction, static partitioner and user-supplied context.
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename RealBody, typename Reduction>
+Value parallel_deterministic_reduce( const Range& range, const Value& identity, const RealBody& real_body, const Reduction& reduction,
+    const static_partitioner& partitioner, task_group_context& context ) {
+    internal::lambda_reduce_body<Range, Value, RealBody, Reduction> body(identity, real_body, reduction);
+    internal::start_deterministic_reduce<Range, internal::lambda_reduce_body<Range, Value, RealBody, Reduction>, const static_partitioner>
+        ::run(range, body, partitioner, context);
+    return body.result();
+}
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+//@}
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_reduce_H_include_area
+
+#endif /* __TBB_parallel_reduce_H */
--- a/cs440-acg/ext/tbb/include/tbb/parallel_scan.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_scan.h
@@ -0,0 +1,416 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_scan_H
+#define __TBB_parallel_scan_H
+
+#define __TBB_parallel_scan_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "task.h"
+#include "aligned_space.h"
+#include <new>
+#include "partitioner.h"
+
+namespace tbb {
+
+//! Used to indicate that the initial scan is being performed.
+/** @ingroup algorithms */
+struct pre_scan_tag {
+    static bool is_final_scan() {return false;}
+    operator bool() {return is_final_scan();}
+};
+
+//! Used to indicate that the final scan is being performed.
+/** @ingroup algorithms */
+struct final_scan_tag {
+    static bool is_final_scan() {return true;}
+    operator bool() {return is_final_scan();}
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+    //! Performs final scan for a leaf
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class final_sum: public task {
+    public:
+        Body my_body;
+    private:
+        aligned_space<Range> my_range;
+        //! Where to put result of last subrange, or NULL if not last subrange.
+        Body* my_stuff_last;
+    public:
+        final_sum( Body& body_ ) :
+            my_body(body_,split())
+        {
+            poison_pointer(my_stuff_last);
+        }
+        ~final_sum() {
+            my_range.begin()->~Range();
+        }
+        void finish_construction( const Range& range_, Body* stuff_last_ ) {
+            new( my_range.begin() ) Range(range_);
+            my_stuff_last = stuff_last_;
+        }
+    private:
+        task* execute() __TBB_override {
+            my_body( *my_range.begin(), final_scan_tag() );
+            if( my_stuff_last )
+                my_stuff_last->assign(my_body);
+            return NULL;
+        }
+    };
+
+    //! Split work to be done in the scan.
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class sum_node: public task {
+        typedef final_sum<Range,Body> final_sum_type;
+    public:
+        final_sum_type *my_incoming;
+        final_sum_type *my_body;
+        Body *my_stuff_last;
+    private:
+        final_sum_type *my_left_sum;
+        sum_node *my_left;
+        sum_node *my_right;
+        bool my_left_is_final;
+        Range my_range;
+        sum_node( const Range range_, bool left_is_final_ ) :
+            my_stuff_last(NULL),
+            my_left_sum(NULL),
+            my_left(NULL),
+            my_right(NULL),
+            my_left_is_final(left_is_final_),
+            my_range(range_)
+        {
+            // Poison fields that will be set by second pass.
+            poison_pointer(my_body);
+            poison_pointer(my_incoming);
+        }
+        task* create_child( const Range& range_, final_sum_type& f, sum_node* n, final_sum_type* incoming_, Body* stuff_last_ ) {
+            if( !n ) {
+                f.recycle_as_child_of( *this );
+                f.finish_construction( range_, stuff_last_ );
+                return &f;
+            } else {
+                n->my_body = &f;
+                n->my_incoming = incoming_;
+                n->my_stuff_last = stuff_last_;
+                return n;
+            }
+        }
+        task* execute() __TBB_override {
+            if( my_body ) {
+                if( my_incoming )
+                    my_left_sum->my_body.reverse_join( my_incoming->my_body );
+                recycle_as_continuation();
+                sum_node& c = *this;
+                task* b = c.create_child(Range(my_range,split()),*my_left_sum,my_right,my_left_sum,my_stuff_last);
+                task* a = my_left_is_final ? NULL : c.create_child(my_range,*my_body,my_left,my_incoming,NULL);
+                set_ref_count( (a!=NULL)+(b!=NULL) );
+                my_body = NULL;
+                if( a ) spawn(*b);
+                else a = b;
+                return a;
+            } else {
+                return NULL;
+            }
+        }
+        template<typename Range_,typename Body_,typename Partitioner_>
+        friend class start_scan;
+
+        template<typename Range_,typename Body_>
+        friend class finish_scan;
+    };
+
+    //! Combine partial results
+    /** @ingroup algorithms */
+    template<typename Range, typename Body>
+    class finish_scan: public task {
+        typedef sum_node<Range,Body> sum_node_type;
+        typedef final_sum<Range,Body> final_sum_type;
+        final_sum_type** const my_sum;
+        sum_node_type*& my_return_slot;
+    public:
+        final_sum_type* my_right_zombie;
+        sum_node_type& my_result;
+
+        task* execute() __TBB_override {
+            __TBB_ASSERT( my_result.ref_count()==(my_result.my_left!=NULL)+(my_result.my_right!=NULL), NULL );
+            if( my_result.my_left )
+                my_result.my_left_is_final = false;
+            if( my_right_zombie && my_sum )
+                ((*my_sum)->my_body).reverse_join(my_result.my_left_sum->my_body);
+            __TBB_ASSERT( !my_return_slot, NULL );
+            if( my_right_zombie || my_result.my_right ) {
+                my_return_slot = &my_result;
+            } else {
+                destroy( my_result );
+            }
+            if( my_right_zombie && !my_sum && !my_result.my_right ) {
+                destroy(*my_right_zombie);
+                my_right_zombie = NULL;
+            }
+            return NULL;
+        }
+
+        finish_scan( sum_node_type*& return_slot_, final_sum_type** sum_, sum_node_type& result_ ) :
+            my_sum(sum_),
+            my_return_slot(return_slot_),
+            my_right_zombie(NULL),
+            my_result(result_)
+        {
+            __TBB_ASSERT( !my_return_slot, NULL );
+        }
+    };
+
+    //! Initial task to split the work
+    /** @ingroup algorithms */
+    template<typename Range, typename Body, typename Partitioner=simple_partitioner>
+    class start_scan: public task {
+        typedef sum_node<Range,Body> sum_node_type;
+        typedef final_sum<Range,Body> final_sum_type;
+        final_sum_type* my_body;
+        /** Non-null if caller is requesting total. */
+        final_sum_type** my_sum;
+        sum_node_type** my_return_slot;
+        /** Null if computing root. */
+        sum_node_type* my_parent_sum;
+        bool my_is_final;
+        bool my_is_right_child;
+        Range my_range;
+        typename Partitioner::partition_type my_partition;
+        task* execute() __TBB_override ;
+    public:
+        start_scan( sum_node_type*& return_slot_, start_scan& parent_, sum_node_type* parent_sum_ ) :
+            my_body(parent_.my_body),
+            my_sum(parent_.my_sum),
+            my_return_slot(&return_slot_),
+            my_parent_sum(parent_sum_),
+            my_is_final(parent_.my_is_final),
+            my_is_right_child(false),
+            my_range(parent_.my_range,split()),
+            my_partition(parent_.my_partition,split())
+        {
+            __TBB_ASSERT( !*my_return_slot, NULL );
+        }
+
+        start_scan( sum_node_type*& return_slot_, const Range& range_, final_sum_type& body_, const Partitioner& partitioner_) :
+            my_body(&body_),
+            my_sum(NULL),
+            my_return_slot(&return_slot_),
+            my_parent_sum(NULL),
+            my_is_final(true),
+            my_is_right_child(false),
+            my_range(range_),
+            my_partition(partitioner_)
+        {
+            __TBB_ASSERT( !*my_return_slot, NULL );
+        }
+
+        static void run( const Range& range_, Body& body_, const Partitioner& partitioner_ ) {
+            if( !range_.empty() ) {
+                typedef internal::start_scan<Range,Body,Partitioner> start_pass1_type;
+                internal::sum_node<Range,Body>* root = NULL;
+                final_sum_type* temp_body = new(task::allocate_root()) final_sum_type( body_ );
+                start_pass1_type& pass1 = *new(task::allocate_root()) start_pass1_type(
+                    /*my_return_slot=*/root,
+                    range_,
+                    *temp_body,
+                    partitioner_ );
+                temp_body->my_body.reverse_join(body_);
+                task::spawn_root_and_wait( pass1 );
+                if( root ) {
+                    root->my_body = temp_body;
+                    root->my_incoming = NULL;
+                    root->my_stuff_last = &body_;
+                    task::spawn_root_and_wait( *root );
+                } else {
+                    body_.assign(temp_body->my_body);
+                    temp_body->finish_construction( range_, NULL );
+                    temp_body->destroy(*temp_body);
+                }
+            }
+        }
+    };
+
+    template<typename Range, typename Body, typename Partitioner>
+    task* start_scan<Range,Body,Partitioner>::execute() {
+        typedef internal::finish_scan<Range,Body> finish_pass1_type;
+        finish_pass1_type* p = my_parent_sum ? static_cast<finish_pass1_type*>( parent() ) : NULL;
+        // Inspecting p->result.left_sum would ordinarily be a race condition.
+        // But we inspect it only if we are not a stolen task, in which case we
+        // know that task assigning to p->result.left_sum has completed.
+        bool treat_as_stolen = my_is_right_child && (is_stolen_task() || my_body!=p->my_result.my_left_sum);
+        if( treat_as_stolen ) {
+            // Invocation is for right child that has been really stolen or needs to be virtually stolen
+            p->my_right_zombie = my_body = new( allocate_root() ) final_sum_type(my_body->my_body);
+            my_is_final = false;
+        }
+        task* next_task = NULL;
+        if( (my_is_right_child && !treat_as_stolen) || !my_range.is_divisible() || my_partition.should_execute_range(*this) ) {
+            if( my_is_final )
+                (my_body->my_body)( my_range, final_scan_tag() );
+            else if( my_sum )
+                (my_body->my_body)( my_range, pre_scan_tag() );
+            if( my_sum )
+                *my_sum = my_body;
+            __TBB_ASSERT( !*my_return_slot, NULL );
+        } else {
+            sum_node_type* result;
+            if( my_parent_sum )
+                result = new(allocate_additional_child_of(*my_parent_sum)) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
+            else
+                result = new(task::allocate_root()) sum_node_type(my_range,/*my_left_is_final=*/my_is_final);
+            finish_pass1_type& c = *new( allocate_continuation()) finish_pass1_type(*my_return_slot,my_sum,*result);
+            // Split off right child
+            start_scan& b = *new( c.allocate_child() ) start_scan( /*my_return_slot=*/result->my_right, *this, result );
+            b.my_is_right_child = true;
+            // Left child is recycling of *this.  Must recycle this before spawning b,
+            // otherwise b might complete and decrement c.ref_count() to zero, which
+            // would cause c.execute() to run prematurely.
+            recycle_as_child_of(c);
+            c.set_ref_count(2);
+            c.spawn(b);
+            my_sum = &result->my_left_sum;
+            my_return_slot = &result->my_left;
+            my_is_right_child = false;
+            next_task = this;
+            my_parent_sum = result;
+            __TBB_ASSERT( !*my_return_slot, NULL );
+        }
+        return next_task;
+    }
+
+    template<typename Range, typename Value, typename Scan, typename ReverseJoin>
+    class lambda_scan_body : no_assign {
+        Value               my_sum;
+        const Value&        identity_element;
+        const Scan&         my_scan;
+        const ReverseJoin&  my_reverse_join;
+    public:
+        lambda_scan_body( const Value& identity, const Scan& scan, const ReverseJoin& rev_join)
+            : my_sum(identity)
+            , identity_element(identity)
+            , my_scan(scan)
+            , my_reverse_join(rev_join) {}
+
+        lambda_scan_body( lambda_scan_body& b, split )
+            : my_sum(b.identity_element)
+            , identity_element(b.identity_element)
+            , my_scan(b.my_scan)
+            , my_reverse_join(b.my_reverse_join) {}
+
+        template<typename Tag>
+        void operator()( const Range& r, Tag tag ) {
+            my_sum = my_scan(r, my_sum, tag);
+        }
+
+        void reverse_join( lambda_scan_body& a ) {
+            my_sum = my_reverse_join(a.my_sum, my_sum);
+        }
+
+        void assign( lambda_scan_body& b ) {
+            my_sum = b.my_sum;
+        }
+
+        Value result() const {
+            return my_sum;
+        }
+    };
+} // namespace internal
+//! @endcond
+
+// Requirements on Range concept are documented in blocked_range.h
+
+/** \page parallel_scan_body_req Requirements on parallel_scan body
+    Class \c Body implementing the concept of parallel_scan body must define:
+    - \code Body::Body( Body&, split ); \endcode    Splitting constructor.
+                                                    Split \c b so that \c this and \c b can accumulate separately
+    - \code Body::~Body(); \endcode                 Destructor
+    - \code void Body::operator()( const Range& r, pre_scan_tag ); \endcode
+                                                    Preprocess iterations for range \c r
+    - \code void Body::operator()( const Range& r, final_scan_tag ); \endcode
+                                                    Do final processing for iterations of range \c r
+    - \code void Body::reverse_join( Body& a ); \endcode
+                                                    Merge preprocessing state of \c a into \c this, where \c a was
+                                                    created earlier from \c b by b's splitting constructor
+**/
+
+/** \name parallel_scan
+    See also requirements on \ref range_req "Range" and \ref parallel_scan_body_req "parallel_scan Body". **/
+//@{
+
+//! Parallel prefix with default partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body ) {
+    internal::start_scan<Range,Body,__TBB_DEFAULT_PARTITIONER>::run(range,body,__TBB_DEFAULT_PARTITIONER());
+}
+
+//! Parallel prefix with simple_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body, const simple_partitioner& partitioner ) {
+    internal::start_scan<Range,Body,simple_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel prefix with auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Body>
+void parallel_scan( const Range& range, Body& body, const auto_partitioner& partitioner ) {
+    internal::start_scan<Range,Body,auto_partitioner>::run(range,body,partitioner);
+}
+
+//! Parallel prefix with default partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename Scan, typename ReverseJoin>
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join ) {
+    internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
+    tbb::parallel_scan(range,body,__TBB_DEFAULT_PARTITIONER());
+    return body.result();
+}
+
+//! Parallel prefix with simple_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename Scan, typename ReverseJoin>
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const simple_partitioner& partitioner ) {
+    internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
+    tbb::parallel_scan(range,body,partitioner);
+    return body.result();
+}
+
+//! Parallel prefix with auto_partitioner
+/** @ingroup algorithms **/
+template<typename Range, typename Value, typename Scan, typename ReverseJoin>
+Value parallel_scan( const Range& range, const Value& identity, const Scan& scan, const ReverseJoin& reverse_join, const auto_partitioner& partitioner ) {
+    internal::lambda_scan_body<Range, Value, Scan, ReverseJoin> body(identity, scan, reverse_join);
+    tbb::parallel_scan(range,body,partitioner);
+    return body.result();
+}
+
+//@}
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_scan_H_include_area
+
+#endif /* __TBB_parallel_scan_H */
+
--- a/cs440-acg/ext/tbb/include/tbb/parallel_sort.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_sort.h
@@ -0,0 +1,257 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_sort_H
+#define __TBB_parallel_sort_H
+
+#define __TBB_parallel_sort_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "parallel_for.h"
+#include "blocked_range.h"
+#include "internal/_range_iterator.h"
+#include <algorithm>
+#include <iterator>
+#include <functional>
+#if __TBB_TASK_GROUP_CONTEXT
+    #include "tbb_profiling.h"
+#endif
+
+namespace tbb {
+
+namespace interface9 {
+//! @cond INTERNAL
+namespace internal {
+
+using tbb::internal::no_assign;
+
+//! Range used in quicksort to split elements into subranges based on a value.
+/** The split operation selects a splitter and places all elements less than or equal
+    to the value in the first range and the remaining elements in the second range.
+    @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+class quick_sort_range: private no_assign {
+
+    inline size_t median_of_three(const RandomAccessIterator &array, size_t l, size_t m, size_t r) const {
+        return comp(array[l], array[m]) ? ( comp(array[m], array[r]) ? m : ( comp( array[l], array[r]) ? r : l ) )
+                                        : ( comp(array[r], array[m]) ? m : ( comp( array[r], array[l] ) ? r : l ) );
+    }
+
+    inline size_t pseudo_median_of_nine( const RandomAccessIterator &array, const quick_sort_range &range ) const {
+        size_t offset = range.size/8u;
+        return median_of_three(array,
+                               median_of_three(array, 0, offset, offset*2),
+                               median_of_three(array, offset*3, offset*4, offset*5),
+                               median_of_three(array, offset*6, offset*7, range.size - 1) );
+
+    }
+
+    size_t split_range( quick_sort_range& range ) {
+        using std::iter_swap;
+        RandomAccessIterator array = range.begin;
+        RandomAccessIterator key0 = range.begin;
+        size_t m = pseudo_median_of_nine(array, range);
+        if (m) iter_swap ( array, array+m );
+
+        size_t i=0;
+        size_t j=range.size;
+        // Partition interval [i+1,j-1] with key *key0.
+        for(;;) {
+            __TBB_ASSERT( i<j, NULL );
+            // Loop must terminate since array[l]==*key0.
+            do {
+                --j;
+                __TBB_ASSERT( i<=j, "bad ordering relation?" );
+            } while( comp( *key0, array[j] ));
+            do {
+                __TBB_ASSERT( i<=j, NULL );
+                if( i==j ) goto partition;
+                ++i;
+            } while( comp( array[i],*key0 ));
+            if( i==j ) goto partition;
+            iter_swap( array+i, array+j );
+        }
+partition:
+        // Put the partition key were it belongs
+        iter_swap( array+j, key0 );
+        // array[l..j) is less or equal to key.
+        // array(j..r) is greater or equal to key.
+        // array[j] is equal to key
+        i=j+1;
+        size_t new_range_size = range.size-i;
+        range.size = j;
+        return new_range_size;
+    }
+
+public:
+
+    static const size_t grainsize = 500;
+    const Compare &comp;
+    size_t size;
+    RandomAccessIterator begin;
+
+    quick_sort_range( RandomAccessIterator begin_, size_t size_, const Compare &comp_ ) :
+        comp(comp_), size(size_), begin(begin_) {}
+
+    bool empty() const {return size==0;}
+    bool is_divisible() const {return size>=grainsize;}
+
+    quick_sort_range( quick_sort_range& range, split )
+        : comp(range.comp)
+        , size(split_range(range))
+          // +1 accounts for the pivot element, which is at its correct place
+          // already and, therefore, is not included into subranges.
+        , begin(range.begin+range.size+1) {}
+};
+
+#if __TBB_TASK_GROUP_CONTEXT
+//! Body class used to test if elements in a range are presorted
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+class quick_sort_pretest_body : no_assign {
+    const Compare &comp;
+
+public:
+    quick_sort_pretest_body(const Compare &_comp) : comp(_comp) {}
+
+    void operator()( const blocked_range<RandomAccessIterator>& range ) const {
+        task &my_task = task::self();
+        RandomAccessIterator my_end = range.end();
+
+        int i = 0;
+        for (RandomAccessIterator k = range.begin(); k != my_end; ++k, ++i) {
+            if ( i%64 == 0 && my_task.is_cancelled() ) break;
+
+            // The k-1 is never out-of-range because the first chunk starts at begin+serial_cutoff+1
+            if ( comp( *(k), *(k-1) ) ) {
+                my_task.cancel_group_execution();
+                break;
+            }
+        }
+    }
+
+};
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+
+//! Body class used to sort elements in a range that is smaller than the grainsize.
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+struct quick_sort_body {
+    void operator()( const quick_sort_range<RandomAccessIterator,Compare>& range ) const {
+        //SerialQuickSort( range.begin, range.size, range.comp );
+        std::sort( range.begin, range.begin + range.size, range.comp );
+    }
+};
+
+//! Wrapper method to initiate the sort by calling parallel_for.
+/** @ingroup algorithms */
+template<typename RandomAccessIterator, typename Compare>
+void parallel_quick_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp ) {
+#if __TBB_TASK_GROUP_CONTEXT
+    task_group_context my_context(PARALLEL_SORT);
+    const int serial_cutoff = 9;
+
+    __TBB_ASSERT( begin + serial_cutoff < end, "min_parallel_size is smaller than serial cutoff?" );
+    RandomAccessIterator k = begin;
+    for ( ; k != begin + serial_cutoff; ++k ) {
+        if ( comp( *(k+1), *k ) ) {
+            goto do_parallel_quick_sort;
+        }
+    }
+
+    parallel_for( blocked_range<RandomAccessIterator>(k+1, end),
+                  quick_sort_pretest_body<RandomAccessIterator,Compare>(comp),
+                  auto_partitioner(),
+                  my_context);
+
+    if (my_context.is_group_execution_cancelled())
+do_parallel_quick_sort:
+#endif /* __TBB_TASK_GROUP_CONTEXT */
+        parallel_for( quick_sort_range<RandomAccessIterator,Compare>(begin, end-begin, comp ),
+                      quick_sort_body<RandomAccessIterator,Compare>(),
+                      auto_partitioner() );
+}
+
+} // namespace internal
+//! @endcond
+} // namespace interfaceX
+
+/** \page parallel_sort_iter_req Requirements on iterators for parallel_sort
+    Requirements on the iterator type \c It and its value type \c T for \c parallel_sort:
+
+    - \code void iter_swap( It a, It b ) \endcode Swaps the values of the elements the given
+    iterators \c a and \c b are pointing to. \c It should be a random access iterator.
+
+    - \code bool Compare::operator()( const T& x, const T& y ) \endcode True if x comes before y;
+**/
+
+/** \name parallel_sort
+    See also requirements on \ref parallel_sort_iter_req "iterators for parallel_sort". **/
+//@{
+
+//! Sorts the data in [begin,end) using the given comparator
+/** The compare function object is used for all comparisons between elements during sorting.
+    The compare object must define a bool operator() function.
+    @ingroup algorithms **/
+template<typename RandomAccessIterator, typename Compare>
+void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end, const Compare& comp) {
+    const int min_parallel_size = 500;
+    if( end > begin ) {
+        if (end - begin < min_parallel_size) {
+            std::sort(begin, end, comp);
+        } else {
+            interface9::internal::parallel_quick_sort(begin, end, comp);
+        }
+    }
+}
+
+//! Sorts the data in [begin,end) with a default comparator \c std::less<RandomAccessIterator>
+/** @ingroup algorithms **/
+template<typename RandomAccessIterator>
+inline void parallel_sort( RandomAccessIterator begin, RandomAccessIterator end ) {
+    parallel_sort( begin, end, std::less< typename std::iterator_traits<RandomAccessIterator>::value_type >() );
+}
+
+//! Sorts the data in rng using the given comparator
+/** @ingroup algorithms **/
+template<typename Range, typename Compare>
+void parallel_sort(Range& rng, const Compare& comp) {
+    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng), comp);
+}
+
+//! Sorts the data in rng with a default comparator \c std::less<RandomAccessIterator>
+/** @ingroup algorithms **/
+template<typename Range>
+void parallel_sort(Range& rng) {
+    parallel_sort(tbb::internal::first(rng), tbb::internal::last(rng));
+}
+
+//! Sorts the data in the range \c [begin,end) with a default comparator \c std::less<T>
+/** @ingroup algorithms **/
+template<typename T>
+inline void parallel_sort( T * begin, T * end ) {
+    parallel_sort( begin, end, std::less< T >() );
+}
+//@}
+
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_sort_H_include_area
+
+#endif
+
--- a/cs440-acg/ext/tbb/include/tbb/parallel_while.h
+++ b/cs440-acg/ext/tbb/include/tbb/parallel_while.h
@@ -0,0 +1,188 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_parallel_while
+#define __TBB_parallel_while
+
+#define __TBB_parallel_while_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "task.h"
+#include <new>
+
+namespace tbb {
+
+template<typename Body>
+class parallel_while;
+
+//! @cond INTERNAL
+namespace internal {
+
+    template<typename Stream, typename Body> class while_task;
+
+    //! For internal use only.
+    /** Executes one iteration of a while.
+        @ingroup algorithms */
+    template<typename Body>
+    class while_iteration_task: public task {
+        const Body& my_body;
+        typename Body::argument_type my_value;
+        task* execute() __TBB_override {
+            my_body(my_value);
+            return NULL;
+        }
+        while_iteration_task( const typename Body::argument_type& value, const Body& body ) :
+            my_body(body), my_value(value)
+        {}
+        template<typename Body_> friend class while_group_task;
+        friend class tbb::parallel_while<Body>;
+    };
+
+    //! For internal use only
+    /** Unpacks a block of iterations.
+        @ingroup algorithms */
+    template<typename Body>
+    class while_group_task: public task {
+        static const size_t max_arg_size = 4;
+        const Body& my_body;
+        size_t size;
+        typename Body::argument_type my_arg[max_arg_size];
+        while_group_task( const Body& body ) : my_body(body), size(0) {}
+        task* execute() __TBB_override {
+            typedef while_iteration_task<Body> iteration_type;
+            __TBB_ASSERT( size>0, NULL );
+            task_list list;
+            task* t;
+            size_t k=0;
+            for(;;) {
+                t = new( allocate_child() ) iteration_type(my_arg[k],my_body);
+                if( ++k==size ) break;
+                list.push_back(*t);
+            }
+            set_ref_count(int(k+1));
+            spawn(list);
+            spawn_and_wait_for_all(*t);
+            return NULL;
+        }
+        template<typename Stream, typename Body_> friend class while_task;
+    };
+
+    //! For internal use only.
+    /** Gets block of iterations from a stream and packages them into a while_group_task.
+        @ingroup algorithms */
+    template<typename Stream, typename Body>
+    class while_task: public task {
+        Stream& my_stream;
+        const Body& my_body;
+        empty_task& my_barrier;
+        task* execute() __TBB_override {
+            typedef while_group_task<Body> block_type;
+            block_type& t = *new( allocate_additional_child_of(my_barrier) ) block_type(my_body);
+            size_t k=0;
+            while( my_stream.pop_if_present(t.my_arg[k]) ) {
+                if( ++k==block_type::max_arg_size ) {
+                    // There might be more iterations.
+                    recycle_to_reexecute();
+                    break;
+                }
+            }
+            if( k==0 ) {
+                destroy(t);
+                return NULL;
+            } else {
+                t.size = k;
+                return &t;
+            }
+        }
+        while_task( Stream& stream, const Body& body, empty_task& barrier ) :
+            my_stream(stream),
+            my_body(body),
+            my_barrier(barrier)
+        {}
+        friend class tbb::parallel_while<Body>;
+    };
+
+} // namespace internal
+//! @endcond
+
+//! Parallel iteration over a stream, with optional addition of more work.
+/** The Body b has the requirement: \n
+        "b(v)"                      \n
+        "b.argument_type"           \n
+    where v is an argument_type
+    @ingroup algorithms */
+template<typename Body>
+class parallel_while: internal::no_copy {
+public:
+    //! Construct empty non-running parallel while.
+    parallel_while() : my_body(NULL), my_barrier(NULL) {}
+
+    //! Destructor cleans up data members before returning.
+    ~parallel_while() {
+        if( my_barrier ) {
+            my_barrier->destroy(*my_barrier);
+            my_barrier = NULL;
+        }
+    }
+
+    //! Type of items
+    typedef typename Body::argument_type value_type;
+
+    //! Apply body.apply to each item in the stream.
+    /** A Stream s has the requirements \n
+         "S::value_type"                \n
+         "s.pop_if_present(value) is convertible to bool */
+    template<typename Stream>
+    void run( Stream& stream, const Body& body );
+
+    //! Add a work item while running.
+    /** Should be executed only by body.apply or a thread spawned therefrom. */
+    void add( const value_type& item );
+
+private:
+    const Body* my_body;
+    empty_task* my_barrier;
+};
+
+template<typename Body>
+template<typename Stream>
+void parallel_while<Body>::run( Stream& stream, const Body& body ) {
+    using namespace internal;
+    empty_task& barrier = *new( task::allocate_root() ) empty_task();
+    my_body = &body;
+    my_barrier = &barrier;
+    my_barrier->set_ref_count(2);
+    while_task<Stream,Body>& w = *new( my_barrier->allocate_child() ) while_task<Stream,Body>( stream, body, barrier );
+    my_barrier->spawn_and_wait_for_all(w);
+    my_barrier->destroy(*my_barrier);
+    my_barrier = NULL;
+    my_body = NULL;
+}
+
+template<typename Body>
+void parallel_while<Body>::add( const value_type& item ) {
+    __TBB_ASSERT(my_barrier,"attempt to add to parallel_while that is not running");
+    typedef internal::while_iteration_task<Body> iteration_type;
+    iteration_type& i = *new( task::allocate_additional_child_of(*my_barrier) ) iteration_type(item,*my_body);
+    task::self().spawn( i );
+}
+
+} // namespace
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_parallel_while_H_include_area
+
+#endif /* __TBB_parallel_while */
--- a/cs440-acg/ext/tbb/include/tbb/partitioner.h
+++ b/cs440-acg/ext/tbb/include/tbb/partitioner.h
@@ -0,0 +1,681 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_partitioner_H
+#define __TBB_partitioner_H
+
+#define __TBB_partitioner_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#ifndef __TBB_INITIAL_CHUNKS
+// initial task divisions per thread
+#define __TBB_INITIAL_CHUNKS 2
+#endif
+#ifndef __TBB_RANGE_POOL_CAPACITY
+// maximum number of elements in range pool
+#define __TBB_RANGE_POOL_CAPACITY 8
+#endif
+#ifndef __TBB_INIT_DEPTH
+// initial value for depth of range pool
+#define __TBB_INIT_DEPTH 5
+#endif
+#ifndef __TBB_DEMAND_DEPTH_ADD
+// when imbalance is found range splits this value times more
+#define __TBB_DEMAND_DEPTH_ADD 1
+#endif
+#ifndef __TBB_STATIC_THRESHOLD
+// necessary number of clocks for the work to be distributed among all tasks
+#define __TBB_STATIC_THRESHOLD 40000
+#endif
+#if __TBB_DEFINE_MIC
+#define __TBB_NONUNIFORM_TASK_CREATION 1
+#ifdef __TBB_time_stamp
+#define __TBB_USE_MACHINE_TIME_STAMPS 1
+#define __TBB_task_duration() __TBB_STATIC_THRESHOLD
+#endif // __TBB_machine_time_stamp
+#endif // __TBB_DEFINE_MIC
+
+#include "task.h"
+#include "task_arena.h"
+#include "aligned_space.h"
+#include "atomic.h"
+#include "internal/_template_helpers.h"
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    // Workaround for overzealous compiler warnings
+    #pragma warning (push)
+    #pragma warning (disable: 4244)
+#endif
+
+namespace tbb {
+
+class auto_partitioner;
+class simple_partitioner;
+class static_partitioner;
+class affinity_partitioner;
+
+namespace interface9 {
+    namespace internal {
+        class affinity_partition_type;
+    }
+}
+
+namespace internal { //< @cond INTERNAL
+size_t __TBB_EXPORTED_FUNC get_initial_auto_partitioner_divisor();
+
+//! Defines entry point for affinity partitioner into tbb run-time library.
+class affinity_partitioner_base_v3: no_copy {
+    friend class tbb::affinity_partitioner;
+    friend class tbb::interface9::internal::affinity_partition_type;
+    //! Array that remembers affinities of tree positions to affinity_id.
+    /** NULL if my_size==0. */
+    affinity_id* my_array;
+    //! Number of elements in my_array.
+    size_t my_size;
+    //! Zeros the fields.
+    affinity_partitioner_base_v3() : my_array(NULL), my_size(0) {}
+    //! Deallocates my_array.
+    ~affinity_partitioner_base_v3() {resize(0);}
+    //! Resize my_array.
+    /** Retains values if resulting size is the same. */
+    void __TBB_EXPORTED_METHOD resize( unsigned factor );
+};
+
+//! Provides backward-compatible methods for partition objects without affinity.
+class partition_type_base {
+public:
+    void set_affinity( task & ) {}
+    void note_affinity( task::affinity_id ) {}
+    task* continue_after_execute_range() {return NULL;}
+    bool decide_whether_to_delay() {return false;}
+    void spawn_or_delay( bool, task& b ) {
+        task::spawn(b);
+    }
+};
+
+template<typename Range, typename Body, typename Partitioner> class start_scan;
+
+} //< namespace internal @endcond
+
+namespace serial {
+namespace interface9 {
+template<typename Range, typename Body, typename Partitioner> class start_for;
+}
+}
+
+namespace interface9 {
+//! @cond INTERNAL
+namespace internal {
+using namespace tbb::internal;
+template<typename Range, typename Body, typename Partitioner> class start_for;
+template<typename Range, typename Body, typename Partitioner> class start_reduce;
+template<typename Range, typename Body, typename Partitioner> class start_deterministic_reduce;
+
+//! Join task node that contains shared flag for stealing feedback
+class flag_task: public task {
+public:
+    tbb::atomic<bool> my_child_stolen;
+    flag_task() { my_child_stolen = false; }
+    task* execute() __TBB_override { return NULL; }
+    static void mark_task_stolen(task &t) {
+        tbb::atomic<bool> &flag = static_cast<flag_task*>(t.parent())->my_child_stolen;
+#if TBB_USE_THREADING_TOOLS
+        // Threading tools respect lock prefix but report false-positive data-race via plain store
+        flag.fetch_and_store<release>(true);
+#else
+        flag = true;
+#endif //TBB_USE_THREADING_TOOLS
+    }
+    static bool is_peer_stolen(task &t) {
+        return static_cast<flag_task*>(t.parent())->my_child_stolen;
+    }
+};
+
+//! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
+//! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
+//! by a number that cannot fit into machine word.
+typedef unsigned char depth_t;
+
+//! Range pool stores ranges of type T in a circular buffer with MaxCapacity
+template <typename T, depth_t MaxCapacity>
+class range_vector {
+    depth_t my_head;
+    depth_t my_tail;
+    depth_t my_size;
+    depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
+    tbb::aligned_space<T, MaxCapacity> my_pool;
+
+public:
+    //! initialize via first range in pool
+    range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
+        my_depth[0] = 0;
+        new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
+    }
+    ~range_vector() {
+        while( !empty() ) pop_back();
+    }
+    bool empty() const { return my_size == 0; }
+    depth_t size() const { return my_size; }
+    //! Populates range pool via ranges up to max depth or while divisible
+    //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
+    void split_to_fill(depth_t max_depth) {
+        while( my_size < MaxCapacity && is_divisible(max_depth) ) {
+            depth_t prev = my_head;
+            my_head = (my_head + 1) % MaxCapacity;
+            new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
+            my_pool.begin()[prev].~T(); // instead of assignment
+            new(my_pool.begin()+prev) T(my_pool.begin()[my_head], split()); // do 'inverse' split
+            my_depth[my_head] = ++my_depth[prev];
+            my_size++;
+        }
+    }
+    void pop_back() {
+        __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
+        my_pool.begin()[my_head].~T();
+        my_size--;
+        my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
+    }
+    void pop_front() {
+        __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
+        my_pool.begin()[my_tail].~T();
+        my_size--;
+        my_tail = (my_tail + 1) % MaxCapacity;
+    }
+    T& back() {
+        __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
+        return my_pool.begin()[my_head];
+    }
+    T& front() {
+        __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
+        return my_pool.begin()[my_tail];
+    }
+    //! similarly to front(), returns depth of the first range in the pool
+    depth_t front_depth() {
+        __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
+        return my_depth[my_tail];
+    }
+    depth_t back_depth() {
+        __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
+        return my_depth[my_head];
+    }
+    bool is_divisible(depth_t max_depth) {
+        return back_depth() < max_depth && back().is_divisible();
+    }
+};
+
+//! Provides default methods for partition objects and common algorithm blocks.
+template <typename Partition>
+struct partition_type_base {
+    typedef split split_type;
+    // decision makers
+    void set_affinity( task & ) {}
+    void note_affinity( task::affinity_id ) {}
+    bool check_being_stolen(task &) { return false; } // part of old should_execute_range()
+    bool check_for_demand(task &) { return false; }
+    bool is_divisible() { return true; } // part of old should_execute_range()
+    depth_t max_depth() { return 0; }
+    void align_depth(depth_t) { }
+    template <typename Range> split_type get_split() { return split(); }
+    Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
+
+    template<typename StartType, typename Range>
+    void work_balance(StartType &start, Range &range) {
+        start.run_body( range ); // simple partitioner goes always here
+    }
+
+    template<typename StartType, typename Range>
+    void execute(StartType &start, Range &range) {
+        // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
+        // [If this task is stolen, adjust depth and divisions if necessary, set flag].
+        // If range is divisible {
+        //    Spread the work while [initial divisions left];
+        //    Create trap task [if necessary];
+        // }
+        // If not divisible or [max depth is reached], execute, else do the range pool part
+        if ( range.is_divisible() ) {
+            if ( self().is_divisible() ) {
+                do { // split until is divisible
+                    typename Partition::split_type split_obj = self().template get_split<Range>();
+                    start.offer_work( split_obj );
+                } while ( range.is_divisible() && self().is_divisible() );
+            }
+        }
+        self().work_balance(start, range);
+    }
+};
+
+//! Provides default splitting strategy for partition objects.
+template <typename Partition>
+struct adaptive_mode : partition_type_base<Partition> {
+    typedef Partition my_partition;
+    size_t my_divisor;
+    // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
+    // A task which has only one index must produce the right split without reserved index in order to avoid
+    // it to be overwritten in note_affinity() of the created (right) task.
+    // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
+    static const unsigned factor = 1;
+    adaptive_mode() : my_divisor(tbb::internal::get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
+    adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
+    /*! Override do_split methods in order to specify splitting strategy */
+    size_t do_split(adaptive_mode &src, split) {
+        return src.my_divisor /= 2u;
+    }
+};
+
+//! A helper class to create a proportional_split object for a given type of Range.
+/** If the Range has static boolean constant 'is_splittable_in_proportion' set to 'true',
+    the created object splits a provided value in an implemenation-defined proportion;
+    otherwise it represents equal-size split. */
+// TODO: check if this helper can be a nested class of proportional_mode.
+template <typename Range, typename = void>
+struct proportion_helper {
+    static proportional_split get_split(size_t) { return proportional_split(1,1); }
+};
+template <typename Range>
+struct proportion_helper<Range, typename enable_if<Range::is_splittable_in_proportion, void>::type> {
+    static proportional_split get_split(size_t n) {
+#if __TBB_NONUNIFORM_TASK_CREATION
+        size_t right = (n + 2) / 3;
+#else
+        size_t right = n / 2;
+#endif
+        size_t left = n - right;
+        return proportional_split(left, right);
+    }
+};
+
+//! Provides proportional splitting strategy for partition objects
+template <typename Partition>
+struct proportional_mode : adaptive_mode<Partition> {
+    typedef Partition my_partition;
+    using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
+
+    proportional_mode() : adaptive_mode<Partition>() {}
+    proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
+    proportional_mode(proportional_mode &src, const proportional_split& split_obj) { self().my_divisor = do_split(src, split_obj); }
+    size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
+#if __TBB_ENABLE_RANGE_FEEDBACK
+        size_t portion = size_t(float(src.my_divisor) * float(split_obj.right())
+                                / float(split_obj.left() + split_obj.right()) + 0.5f);
+#else
+        size_t portion = split_obj.right() * my_partition::factor;
+#endif
+        portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
+#if __TBB_ENABLE_RANGE_FEEDBACK
+        /** Corner case handling */
+        if (!portion)
+            portion = my_partition::factor;
+        else if (portion == src.my_divisor)
+            portion = src.my_divisor - my_partition::factor;
+#endif
+        src.my_divisor -= portion;
+        return portion;
+    }
+    bool is_divisible() { // part of old should_execute_range()
+        return self().my_divisor > my_partition::factor;
+    }
+    template <typename Range>
+    proportional_split get_split() {
+        // Create a proportion for the number of threads expected to handle "this" subrange
+        return proportion_helper<Range>::get_split( self().my_divisor / my_partition::factor );
+    }
+};
+
+static size_t get_initial_partition_head() {
+    int current_index = tbb::this_task_arena::current_thread_index();
+    if (current_index == tbb::task_arena::not_initialized)
+        current_index = 0;
+    return size_t(current_index);
+}
+
+//! Provides default linear indexing of partitioner's sequence
+template <typename Partition>
+struct linear_affinity_mode : proportional_mode<Partition> {
+    size_t my_head;
+    size_t my_max_affinity;
+    using proportional_mode<Partition>::self;
+    linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
+                             my_max_affinity(self().my_divisor) {}
+    linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
+        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
+    linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
+        , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
+    void set_affinity( task &t ) {
+        if( self().my_divisor )
+            t.set_affinity( affinity_id(my_head) + 1 );
+    }
+};
+
+/*! Determine work-balance phase implementing splitting & stealing actions */
+template<class Mode>
+struct dynamic_grainsize_mode : Mode {
+    using Mode::self;
+#ifdef __TBB_USE_MACHINE_TIME_STAMPS
+    tbb::internal::machine_tsc_t my_dst_tsc;
+#endif
+    enum {
+        begin = 0,
+        run,
+        pass
+    } my_delay;
+    depth_t my_max_depth;
+    static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
+    dynamic_grainsize_mode(): Mode()
+#ifdef __TBB_USE_MACHINE_TIME_STAMPS
+        , my_dst_tsc(0)
+#endif
+        , my_delay(begin)
+        , my_max_depth(__TBB_INIT_DEPTH) {}
+    dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
+        : Mode(p, split())
+#ifdef __TBB_USE_MACHINE_TIME_STAMPS
+        , my_dst_tsc(0)
+#endif
+        , my_delay(pass)
+        , my_max_depth(p.my_max_depth) {}
+    dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
+        : Mode(p, split_obj)
+#ifdef __TBB_USE_MACHINE_TIME_STAMPS
+        , my_dst_tsc(0)
+#endif
+        , my_delay(begin)
+        , my_max_depth(p.my_max_depth) {}
+    bool check_being_stolen(task &t) { // part of old should_execute_range()
+        if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
+            self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
+            if( t.is_stolen_task() && t.parent()->ref_count() >= 2 ) { // runs concurrently with the left task
+#if __TBB_USE_OPTIONAL_RTTI
+                // RTTI is available, check whether the cast is valid
+                __TBB_ASSERT(dynamic_cast<flag_task*>(t.parent()), 0);
+                // correctness of the cast relies on avoiding the root task for which:
+                // - initial value of my_divisor != 0 (protected by separate assertion)
+                // - is_stolen_task() always returns false for the root task.
+#endif
+                flag_task::mark_task_stolen(t);
+                if( !my_max_depth ) my_max_depth++;
+                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
+                return true;
+            }
+        }
+        return false;
+    }
+    depth_t max_depth() { return my_max_depth; }
+    void align_depth(depth_t base) {
+        __TBB_ASSERT(base <= my_max_depth, 0);
+        my_max_depth -= base;
+    }
+    template<typename StartType, typename Range>
+    void work_balance(StartType &start, Range &range) {
+        if( !range.is_divisible() || !self().max_depth() ) {
+            start.run_body( range ); // simple partitioner goes always here
+        }
+        else { // do range pool
+            internal::range_vector<Range, range_pool_size> range_pool(range);
+            do {
+                range_pool.split_to_fill(self().max_depth()); // fill range pool
+                if( self().check_for_demand( start ) ) {
+                    if( range_pool.size() > 1 ) {
+                        start.offer_work( range_pool.front(), range_pool.front_depth() );
+                        range_pool.pop_front();
+                        continue;
+                    }
+                    if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
+                        continue; // note: next split_to_fill() should split range at least once
+                }
+                start.run_body( range_pool.back() );
+                range_pool.pop_back();
+            } while( !range_pool.empty() && !start.is_cancelled() );
+        }
+    }
+    bool check_for_demand( task &t ) {
+        if( pass == my_delay ) {
+            if( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
+                return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
+            else if( self().my_divisor && my_max_depth ) { // make balancing task
+                self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
+                return true;
+            }
+            else if( flag_task::is_peer_stolen(t) ) {
+                my_max_depth += __TBB_DEMAND_DEPTH_ADD;
+                return true;
+            }
+        } else if( begin == my_delay ) {
+#ifndef __TBB_USE_MACHINE_TIME_STAMPS
+            my_delay = pass;
+#else
+            my_dst_tsc = __TBB_time_stamp() + __TBB_task_duration();
+            my_delay = run;
+        } else if( run == my_delay ) {
+            if( __TBB_time_stamp() < my_dst_tsc ) {
+                __TBB_ASSERT(my_max_depth > 0, NULL);
+                 my_max_depth--; // increase granularity since tasks seem having too small work
+                return false;
+            }
+            my_delay = pass;
+            return true;
+#endif // __TBB_USE_MACHINE_TIME_STAMPS
+        }
+        return false;
+    }
+};
+
+class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
+public:
+    auto_partition_type( const auto_partitioner& )
+        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >() {
+        my_divisor *= __TBB_INITIAL_CHUNKS;
+    }
+    auto_partition_type( auto_partition_type& src, split)
+        : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
+    bool is_divisible() { // part of old should_execute_range()
+        if( my_divisor > 1 ) return true;
+        if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
+            // keep same fragmentation while splitting for the local task pool
+            my_max_depth--;
+            my_divisor = 0; // decrease max_depth once per task
+            return true;
+        } else return false;
+    }
+    bool check_for_demand(task &t) {
+        if( flag_task::is_peer_stolen(t) ) {
+            my_max_depth += __TBB_DEMAND_DEPTH_ADD;
+            return true;
+        } else return false;
+    }
+};
+
+class simple_partition_type: public partition_type_base<simple_partition_type> {
+public:
+    simple_partition_type( const simple_partitioner& ) {}
+    simple_partition_type( const simple_partition_type&, split ) {}
+    //! simplified algorithm
+    template<typename StartType, typename Range>
+    void execute(StartType &start, Range &range) {
+        split_type split_obj = split(); // start.offer_work accepts split_type as reference
+        while( range.is_divisible() )
+            start.offer_work( split_obj );
+        start.run_body( range );
+    }
+};
+
+class static_partition_type : public linear_affinity_mode<static_partition_type> {
+public:
+    typedef proportional_split split_type;
+    static_partition_type( const static_partitioner& )
+        : linear_affinity_mode<static_partition_type>() {}
+    static_partition_type( static_partition_type& p, split )
+        : linear_affinity_mode<static_partition_type>(p, split()) {}
+    static_partition_type( static_partition_type& p, const proportional_split& split_obj )
+        : linear_affinity_mode<static_partition_type>(p, split_obj) {}
+};
+
+class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
+    static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
+    tbb::internal::affinity_id* my_array;
+public:
+    static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
+    typedef proportional_split split_type;
+    affinity_partition_type( tbb::internal::affinity_partitioner_base_v3& ap )
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >() {
+        __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
+        ap.resize(factor);
+        my_array = ap.my_array;
+        my_max_depth = factor_power + 1;
+        __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, 0 );
+    }
+    affinity_partition_type(affinity_partition_type& p, split)
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
+        , my_array(p.my_array) {}
+    affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
+        : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
+        , my_array(p.my_array) {}
+    void set_affinity( task &t ) {
+        if( my_divisor ) {
+            if( !my_array[my_head] )
+                // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
+                t.set_affinity( affinity_id(my_head / factor + 1) );
+            else
+                t.set_affinity( my_array[my_head] );
+        }
+    }
+    void note_affinity( task::affinity_id id ) {
+        if( my_divisor )
+            my_array[my_head] = id;
+    }
+};
+
+//! Backward-compatible partition for auto and affinity partition objects.
+class old_auto_partition_type: public tbb::internal::partition_type_base {
+    size_t num_chunks;
+    static const size_t VICTIM_CHUNKS = 4;
+public:
+    bool should_execute_range(const task &t) {
+        if( num_chunks<VICTIM_CHUNKS && t.is_stolen_task() )
+            num_chunks = VICTIM_CHUNKS;
+        return num_chunks==1;
+    }
+    old_auto_partition_type( const auto_partitioner& )
+      : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
+    old_auto_partition_type( const affinity_partitioner& )
+      : num_chunks(internal::get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
+    old_auto_partition_type( old_auto_partition_type& pt, split ) {
+        num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
+    }
+};
+
+} // namespace interfaceX::internal
+//! @endcond
+} // namespace interfaceX
+
+//! A simple partitioner
+/** Divides the range until the range is not divisible.
+    @ingroup algorithms */
+class simple_partitioner {
+public:
+    simple_partitioner() {}
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_deterministic_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+    // backward compatibility
+    class partition_type: public internal::partition_type_base {
+    public:
+        bool should_execute_range(const task& ) {return false;}
+        partition_type( const simple_partitioner& ) {}
+        partition_type( const partition_type&, split ) {}
+    };
+    // new implementation just extends existing interface
+    typedef interface9::internal::simple_partition_type task_partition_type;
+
+    // TODO: consider to make split_type public
+    typedef interface9::internal::simple_partition_type::split_type split_type;
+};
+
+//! An auto partitioner
+/** The range is initial divided into several large chunks.
+    Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
+    @ingroup algorithms */
+class auto_partitioner {
+public:
+    auto_partitioner() {}
+
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+    // backward compatibility
+    typedef interface9::internal::old_auto_partition_type partition_type;
+    // new implementation just extends existing interface
+    typedef interface9::internal::auto_partition_type task_partition_type;
+
+    // TODO: consider to make split_type public
+    typedef interface9::internal::auto_partition_type::split_type split_type;
+};
+
+//! A static partitioner
+class static_partitioner {
+public:
+    static_partitioner() {}
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_deterministic_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+    // backward compatibility
+    typedef interface9::internal::old_auto_partition_type partition_type;
+    // new implementation just extends existing interface
+    typedef interface9::internal::static_partition_type task_partition_type;
+
+    // TODO: consider to make split_type public
+    typedef interface9::internal::static_partition_type::split_type split_type;
+};
+
+//! An affinity partitioner
+class affinity_partitioner: internal::affinity_partitioner_base_v3 {
+public:
+    affinity_partitioner() {}
+
+private:
+    template<typename Range, typename Body, typename Partitioner> friend class serial::interface9::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_for;
+    template<typename Range, typename Body, typename Partitioner> friend class interface9::internal::start_reduce;
+    template<typename Range, typename Body, typename Partitioner> friend class internal::start_scan;
+    // backward compatibility - for parallel_scan only
+    typedef interface9::internal::old_auto_partition_type partition_type;
+    // new implementation just extends existing interface
+    typedef interface9::internal::affinity_partition_type task_partition_type;
+
+    // TODO: consider to make split_type public
+    typedef interface9::internal::affinity_partition_type::split_type split_type;
+};
+
+} // namespace tbb
+
+#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
+    #pragma warning (pop)
+#endif // warning 4244 is back
+#undef __TBB_INITIAL_CHUNKS
+#undef __TBB_RANGE_POOL_CAPACITY
+#undef __TBB_INIT_DEPTH
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_partitioner_H_include_area
+
+#endif /* __TBB_partitioner_H */
--- a/cs440-acg/ext/tbb/include/tbb/pipeline.h
+++ b/cs440-acg/ext/tbb/include/tbb/pipeline.h
@@ -0,0 +1,682 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_pipeline_H
+#define __TBB_pipeline_H
+
+#define __TBB_pipeline_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include "atomic.h"
+#include "task.h"
+#include "tbb_allocator.h"
+#include <cstddef>
+
+#if __TBB_CPP11_TYPE_PROPERTIES_PRESENT
+#include <type_traits>
+#endif
+
+namespace tbb {
+
+class pipeline;
+class filter;
+
+//! @cond INTERNAL
+namespace internal {
+
+// The argument for PIPELINE_VERSION should be an integer between 2 and 9
+#define __TBB_PIPELINE_VERSION(x) ((unsigned char)(x-2)<<1)
+
+typedef unsigned long Token;
+typedef long tokendiff_t;
+class stage_task;
+class input_buffer;
+class pipeline_root_task;
+class pipeline_cleaner;
+
+} // namespace internal
+
+namespace interface6 {
+    template<typename T, typename U> class filter_t;
+
+    namespace internal {
+        class pipeline_proxy;
+    }
+}
+
+//! @endcond
+
+//! A stage in a pipeline.
+/** @ingroup algorithms */
+class filter: internal::no_copy {
+private:
+    //! Value used to mark "not in pipeline"
+    static filter* not_in_pipeline() { return reinterpret_cast<filter*>(intptr_t(-1)); }
+protected:
+    //! The lowest bit 0 is for parallel vs. serial
+    static const unsigned char filter_is_serial = 0x1;
+
+    //! 4th bit distinguishes ordered vs unordered filters.
+    /** The bit was not set for parallel filters in TBB 2.1 and earlier,
+        but is_ordered() function always treats parallel filters as out of order. */
+    static const unsigned char filter_is_out_of_order = 0x1<<4;
+
+    //! 5th bit distinguishes thread-bound and regular filters.
+    static const unsigned char filter_is_bound = 0x1<<5;
+
+    //! 6th bit marks input filters emitting small objects
+    static const unsigned char filter_may_emit_null = 0x1<<6;
+
+    //! 7th bit defines exception propagation mode expected by the application.
+    static const unsigned char exact_exception_propagation =
+#if TBB_USE_CAPTURED_EXCEPTION
+            0x0;
+#else
+            0x1<<7;
+#endif /* TBB_USE_CAPTURED_EXCEPTION */
+
+    static const unsigned char current_version = __TBB_PIPELINE_VERSION(5);
+    static const unsigned char version_mask = 0x7<<1; // bits 1-3 are for version
+public:
+    enum mode {
+        //! processes multiple items in parallel and in no particular order
+        parallel = current_version | filter_is_out_of_order,
+        //! processes items one at a time; all such filters process items in the same order
+        serial_in_order = current_version | filter_is_serial,
+        //! processes items one at a time and in no particular order
+        serial_out_of_order = current_version | filter_is_serial | filter_is_out_of_order,
+        //! @deprecated use serial_in_order instead
+        serial = serial_in_order
+    };
+protected:
+    explicit filter( bool is_serial_ ) :
+        next_filter_in_pipeline(not_in_pipeline()),
+        my_input_buffer(NULL),
+        my_filter_mode(static_cast<unsigned char>((is_serial_ ? serial : parallel) | exact_exception_propagation)),
+        prev_filter_in_pipeline(not_in_pipeline()),
+        my_pipeline(NULL),
+        next_segment(NULL)
+    {}
+
+    explicit filter( mode filter_mode ) :
+        next_filter_in_pipeline(not_in_pipeline()),
+        my_input_buffer(NULL),
+        my_filter_mode(static_cast<unsigned char>(filter_mode | exact_exception_propagation)),
+        prev_filter_in_pipeline(not_in_pipeline()),
+        my_pipeline(NULL),
+        next_segment(NULL)
+    {}
+
+    // signal end-of-input for concrete_filters
+    void __TBB_EXPORTED_METHOD set_end_of_input();
+
+public:
+    //! True if filter is serial.
+    bool is_serial() const {
+        return bool( my_filter_mode & filter_is_serial );
+    }
+
+    //! True if filter must receive stream in order.
+    bool is_ordered() const {
+        return (my_filter_mode & (filter_is_out_of_order|filter_is_serial))==filter_is_serial;
+    }
+
+    //! True if filter is thread-bound.
+    bool is_bound() const {
+        return ( my_filter_mode & filter_is_bound )==filter_is_bound;
+    }
+
+    //! true if an input filter can emit null
+    bool object_may_be_null() {
+        return ( my_filter_mode & filter_may_emit_null ) == filter_may_emit_null;
+    }
+
+    //! Operate on an item from the input stream, and return item for output stream.
+    /** Returns NULL if filter is a sink. */
+    virtual void* operator()( void* item ) = 0;
+
+    //! Destroy filter.
+    /** If the filter was added to a pipeline, the pipeline must be destroyed first. */
+    virtual __TBB_EXPORTED_METHOD ~filter();
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Destroys item if pipeline was cancelled.
+    /** Required to prevent memory leaks.
+        Note it can be called concurrently even for serial filters.*/
+    virtual void finalize( void* /*item*/ ) {}
+#endif
+
+private:
+    //! Pointer to next filter in the pipeline.
+    filter* next_filter_in_pipeline;
+
+    //! has the filter not yet processed all the tokens it will ever see?
+    //  (pipeline has not yet reached end_of_input or this filter has not yet
+    //  seen the last token produced by input_filter)
+    bool has_more_work();
+
+    //! Buffer for incoming tokens, or NULL if not required.
+    /** The buffer is required if the filter is serial or follows a thread-bound one. */
+    internal::input_buffer* my_input_buffer;
+
+    friend class internal::stage_task;
+    friend class internal::pipeline_root_task;
+    friend class pipeline;
+    friend class thread_bound_filter;
+
+    //! Storage for filter mode and dynamically checked implementation version.
+    const unsigned char my_filter_mode;
+
+    //! Pointer to previous filter in the pipeline.
+    filter* prev_filter_in_pipeline;
+
+    //! Pointer to the pipeline.
+    pipeline* my_pipeline;
+
+    //! Pointer to the next "segment" of filters, or NULL if not required.
+    /** In each segment, the first filter is not thread-bound but follows a thread-bound one. */
+    filter* next_segment;
+};
+
+//! A stage in a pipeline served by a user thread.
+/** @ingroup algorithms */
+class thread_bound_filter: public filter {
+public:
+    enum result_type {
+        // item was processed
+        success,
+        // item is currently not available
+        item_not_available,
+        // there are no more items to process
+        end_of_stream
+    };
+protected:
+    explicit thread_bound_filter(mode filter_mode):
+         filter(static_cast<mode>(filter_mode | filter::filter_is_bound))
+    {
+        __TBB_ASSERT(filter_mode & filter::filter_is_serial, "thread-bound filters must be serial");
+    }
+public:
+    //! If a data item is available, invoke operator() on that item.
+    /** This interface is non-blocking.
+        Returns 'success' if an item was processed.
+        Returns 'item_not_available' if no item can be processed now
+        but more may arrive in the future, or if token limit is reached.
+        Returns 'end_of_stream' if there are no more items to process. */
+    result_type __TBB_EXPORTED_METHOD try_process_item();
+
+    //! Wait until a data item becomes available, and invoke operator() on that item.
+    /** This interface is blocking.
+        Returns 'success' if an item was processed.
+        Returns 'end_of_stream' if there are no more items to process.
+        Never returns 'item_not_available', as it blocks until another return condition applies. */
+    result_type __TBB_EXPORTED_METHOD process_item();
+
+private:
+    //! Internal routine for item processing
+    result_type internal_process_item(bool is_blocking);
+};
+
+//! A processing pipeline that applies filters to items.
+/** @ingroup algorithms */
+class __TBB_DEPRECATED_MSG("tbb::pipeline is deprecated, use tbb::parallel_pipeline") pipeline {
+public:
+    //! Construct empty pipeline.
+    __TBB_EXPORTED_METHOD pipeline();
+
+    /** Though the current implementation declares the destructor virtual, do not rely on this
+        detail.  The virtualness is deprecated and may disappear in future versions of TBB. */
+    virtual __TBB_EXPORTED_METHOD ~pipeline();
+
+    //! Add filter to end of pipeline.
+    void __TBB_EXPORTED_METHOD add_filter( filter& filter_ );
+
+    //! Run the pipeline to completion.
+    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens );
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Run the pipeline to completion with user-supplied context.
+    void __TBB_EXPORTED_METHOD run( size_t max_number_of_live_tokens, tbb::task_group_context& context );
+#endif
+
+    //! Remove all filters from the pipeline.
+    void __TBB_EXPORTED_METHOD clear();
+
+private:
+    friend class internal::stage_task;
+    friend class internal::pipeline_root_task;
+    friend class filter;
+    friend class thread_bound_filter;
+    friend class internal::pipeline_cleaner;
+    friend class tbb::interface6::internal::pipeline_proxy;
+
+    //! Pointer to first filter in the pipeline.
+    filter* filter_list;
+
+    //! Pointer to location where address of next filter to be added should be stored.
+    filter* filter_end;
+
+    //! task who's reference count is used to determine when all stages are done.
+    task* end_counter;
+
+    //! Number of idle tokens waiting for input stage.
+    atomic<internal::Token> input_tokens;
+
+    //! Global counter of tokens
+    atomic<internal::Token> token_counter;
+
+    //! False until fetch_input returns NULL.
+    bool end_of_input;
+
+    //! True if the pipeline contains a thread-bound filter; false otherwise.
+    bool has_thread_bound_filters;
+
+    //! Remove filter from pipeline.
+    void remove_filter( filter& filter_ );
+
+    //! Not used, but retained to satisfy old export files.
+    void __TBB_EXPORTED_METHOD inject_token( task& self );
+
+#if __TBB_TASK_GROUP_CONTEXT
+    //! Does clean up if pipeline is cancelled or exception occurred
+    void clear_filters();
+#endif
+};
+
+//------------------------------------------------------------------------
+// Support for lambda-friendly parallel_pipeline interface
+//------------------------------------------------------------------------
+
+namespace interface6 {
+
+namespace internal {
+    template<typename T, typename U, typename Body> class concrete_filter;
+}
+
+//! input_filter control to signal end-of-input for parallel_pipeline
+class flow_control {
+    bool is_pipeline_stopped;
+    flow_control() { is_pipeline_stopped = false; }
+    template<typename T, typename U, typename Body> friend class internal::concrete_filter;
+public:
+    void stop() { is_pipeline_stopped = true; }
+};
+
+//! @cond INTERNAL
+namespace internal {
+
+// Emulate std::is_trivially_copyable (false positives not allowed, false negatives suboptimal but safe).
+#if   __TBB_CPP11_TYPE_PROPERTIES_PRESENT
+template<typename T> struct tbb_trivially_copyable { enum { value = std::is_trivially_copyable<T>::value }; };
+#else
+template<typename T> struct tbb_trivially_copyable                      { enum { value = false }; };
+template<typename T> struct tbb_trivially_copyable <         T*       > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         bool     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         char     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <  signed char     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <unsigned char     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         short    > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <unsigned short    > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         int      > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <unsigned int      > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         long     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <unsigned long     > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         long long> { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <unsigned long long> { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         float    > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <         double   > { enum { value = true  }; };
+template<>           struct tbb_trivially_copyable <    long double   > { enum { value = true  }; };
+#if !_MSC_VER || defined(_NATIVE_WCHAR_T_DEFINED)
+template<>           struct tbb_trivially_copyable <         wchar_t  > { enum { value = true  }; };
+#endif /* _MSC_VER||!defined(_NATIVE_WCHAR_T_DEFINED) */
+#endif // tbb_trivially_copyable
+
+template<typename T>
+struct use_allocator {
+    enum { value = sizeof(T) > sizeof(void *) || !tbb_trivially_copyable<T>::value };
+};
+
+// A helper class to customize how a type is passed between filters.
+// Usage: token_helper<T, use_allocator<T>::value>
+template<typename T, bool Allocate> class token_helper;
+
+// using tbb_allocator
+template<typename T>
+class token_helper<T, true> {
+public:
+    typedef typename tbb::tbb_allocator<T> allocator;
+    typedef T* pointer;
+    typedef T value_type;
+#if __TBB_CPP11_RVALUE_REF_PRESENT
+    static pointer create_token(value_type && source)
+#else
+    static pointer create_token(const value_type & source)
+#endif
+    {
+        pointer output_t = allocator().allocate(1);
+        return new (output_t) T(tbb::internal::move(source));
+    }
+    static value_type & token(pointer & t) { return *t; }
+    static void * cast_to_void_ptr(pointer ref) { return (void *) ref; }
+    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
+    static void destroy_token(pointer token) {
+        allocator().destroy(token);
+        allocator().deallocate(token,1);
+    }
+};
+
+// pointer specialization
+template<typename T>
+class token_helper<T*, false> {
+public:
+    typedef T* pointer;
+    typedef T* value_type;
+    static pointer create_token(const value_type & source) { return source; }
+    static value_type & token(pointer & t) { return t; }
+    static void * cast_to_void_ptr(pointer ref) { return (void *)ref; }
+    static pointer cast_from_void_ptr(void * ref) { return (pointer)ref; }
+    static void destroy_token( pointer /*token*/) {}
+};
+
+// converting type to and from void*, passing objects directly
+template<typename T>
+class token_helper<T, false> {
+    typedef union {
+        T actual_value;
+        void * void_overlay;
+    } type_to_void_ptr_map;
+public:
+    typedef T pointer;  // not really a pointer in this case.
+    typedef T value_type;
+    static pointer create_token(const value_type & source) { return source; }
+    static value_type & token(pointer & t) { return t; }
+    static void * cast_to_void_ptr(pointer ref) {
+        type_to_void_ptr_map mymap;
+        mymap.void_overlay = NULL;
+        mymap.actual_value = ref;
+        return mymap.void_overlay;
+    }
+    static pointer cast_from_void_ptr(void * ref) {
+        type_to_void_ptr_map mymap;
+        mymap.void_overlay = ref;
+        return mymap.actual_value;
+    }
+    static void destroy_token( pointer /*token*/) {}
+};
+
+// intermediate
+template<typename T, typename U, typename Body>
+class concrete_filter: public tbb::filter {
+    const Body& my_body;
+    typedef token_helper<T,use_allocator<T>::value> t_helper;
+    typedef typename t_helper::pointer t_pointer;
+    typedef token_helper<U,use_allocator<U>::value> u_helper;
+    typedef typename u_helper::pointer u_pointer;
+
+    void* operator()(void* input) __TBB_override {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        u_pointer output_u = u_helper::create_token(my_body(tbb::internal::move(t_helper::token(temp_input))));
+        t_helper::destroy_token(temp_input);
+        return u_helper::cast_to_void_ptr(output_u);
+    }
+
+    void finalize(void * input) __TBB_override {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        t_helper::destroy_token(temp_input);
+    }
+
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+// input
+template<typename U, typename Body>
+class concrete_filter<void,U,Body>: public filter {
+    const Body& my_body;
+    typedef token_helper<U, use_allocator<U>::value> u_helper;
+    typedef typename u_helper::pointer u_pointer;
+
+    void* operator()(void*) __TBB_override {
+        flow_control control;
+        u_pointer output_u = u_helper::create_token(my_body(control));
+        if(control.is_pipeline_stopped) {
+            u_helper::destroy_token(output_u);
+            set_end_of_input();
+            return NULL;
+        }
+        return u_helper::cast_to_void_ptr(output_u);
+    }
+
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) :
+        filter(static_cast<tbb::filter::mode>(filter_mode | filter_may_emit_null)),
+        my_body(body)
+    {}
+};
+
+// output
+template<typename T, typename Body>
+class concrete_filter<T,void,Body>: public filter {
+    const Body& my_body;
+    typedef token_helper<T, use_allocator<T>::value> t_helper;
+    typedef typename t_helper::pointer t_pointer;
+
+    void* operator()(void* input) __TBB_override {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        my_body(tbb::internal::move(t_helper::token(temp_input)));
+        t_helper::destroy_token(temp_input);
+        return NULL;
+    }
+    void finalize(void* input) __TBB_override {
+        t_pointer temp_input = t_helper::cast_from_void_ptr(input);
+        t_helper::destroy_token(temp_input);
+    }
+
+public:
+    concrete_filter(tbb::filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+template<typename Body>
+class concrete_filter<void,void,Body>: public filter {
+    const Body& my_body;
+
+    void* operator()(void*) __TBB_override {
+        flow_control control;
+        my_body(control);
+        void* output = control.is_pipeline_stopped ? NULL : (void*)(intptr_t)-1;
+        return output;
+    }
+public:
+    concrete_filter(filter::mode filter_mode, const Body& body) : filter(filter_mode), my_body(body) {}
+};
+
+//! The class that represents an object of the pipeline for parallel_pipeline().
+/** It primarily serves as RAII class that deletes heap-allocated filter instances. */
+class pipeline_proxy {
+    tbb::pipeline my_pipe;
+public:
+    pipeline_proxy( const filter_t<void,void>& filter_chain );
+    ~pipeline_proxy() {
+        while( filter* f = my_pipe.filter_list )
+            delete f; // filter destructor removes it from the pipeline
+    }
+    tbb::pipeline* operator->() { return &my_pipe; }
+};
+
+//! Abstract base class that represents a node in a parse tree underlying a filter_t.
+/** These nodes are always heap-allocated and can be shared by filter_t objects. */
+class filter_node: tbb::internal::no_copy {
+    /** Count must be atomic because it is hidden state for user, but might be shared by threads. */
+    tbb::atomic<intptr_t> ref_count;
+protected:
+    filter_node() {
+        ref_count = 0;
+#ifdef __TBB_TEST_FILTER_NODE_COUNT
+        ++(__TBB_TEST_FILTER_NODE_COUNT);
+#endif
+    }
+public:
+    //! Add concrete_filter to pipeline
+    virtual void add_to( pipeline& ) = 0;
+    //! Increment reference count
+    void add_ref() { ++ref_count; }
+    //! Decrement reference count and delete if it becomes zero.
+    void remove_ref() {
+        __TBB_ASSERT(ref_count>0,"ref_count underflow");
+        if( --ref_count==0 )
+            delete this;
+    }
+    virtual ~filter_node() {
+#ifdef __TBB_TEST_FILTER_NODE_COUNT
+        --(__TBB_TEST_FILTER_NODE_COUNT);
+#endif
+    }
+};
+
+//! Node in parse tree representing result of make_filter.
+template<typename T, typename U, typename Body>
+class filter_node_leaf: public filter_node {
+    const tbb::filter::mode mode;
+    const Body body;
+    void add_to( pipeline& p ) __TBB_override {
+        concrete_filter<T,U,Body>* f = new concrete_filter<T,U,Body>(mode,body);
+        p.add_filter( *f );
+    }
+public:
+    filter_node_leaf( tbb::filter::mode m, const Body& b ) : mode(m), body(b) {}
+};
+
+//! Node in parse tree representing join of two filters.
+class filter_node_join: public filter_node {
+    friend class filter_node; // to suppress GCC 3.2 warnings
+    filter_node& left;
+    filter_node& right;
+    ~filter_node_join() {
+       left.remove_ref();
+       right.remove_ref();
+    }
+    void add_to( pipeline& p ) __TBB_override {
+        left.add_to(p);
+        right.add_to(p);
+    }
+public:
+    filter_node_join( filter_node& x, filter_node& y ) : left(x), right(y) {
+       left.add_ref();
+       right.add_ref();
+    }
+};
+
+} // namespace internal
+//! @endcond
+
+//! Create a filter to participate in parallel_pipeline
+template<typename T, typename U, typename Body>
+filter_t<T,U> make_filter(tbb::filter::mode mode, const Body& body) {
+    return new internal::filter_node_leaf<T,U,Body>(mode, body);
+}
+
+template<typename T, typename V, typename U>
+filter_t<T,U> operator& (const filter_t<T,V>& left, const filter_t<V,U>& right) {
+    __TBB_ASSERT(left.root,"cannot use default-constructed filter_t as left argument of '&'");
+    __TBB_ASSERT(right.root,"cannot use default-constructed filter_t as right argument of '&'");
+    return new internal::filter_node_join(*left.root,*right.root);
+}
+
+//! Class representing a chain of type-safe pipeline filters
+template<typename T, typename U>
+class filter_t {
+    typedef internal::filter_node filter_node;
+    filter_node* root;
+    filter_t( filter_node* root_ ) : root(root_) {
+        root->add_ref();
+    }
+    friend class internal::pipeline_proxy;
+    template<typename T_, typename U_, typename Body>
+    friend filter_t<T_,U_> make_filter(tbb::filter::mode, const Body& );
+    template<typename T_, typename V_, typename U_>
+    friend filter_t<T_,U_> operator& (const filter_t<T_,V_>& , const filter_t<V_,U_>& );
+public:
+    // TODO: add move-constructors, move-assignment, etc. where C++11 is available.
+    filter_t() : root(NULL) {}
+    filter_t( const filter_t<T,U>& rhs ) : root(rhs.root) {
+        if( root ) root->add_ref();
+    }
+    template<typename Body>
+    filter_t( tbb::filter::mode mode, const Body& body ) :
+        root( new internal::filter_node_leaf<T,U,Body>(mode, body) ) {
+        root->add_ref();
+    }
+
+    void operator=( const filter_t<T,U>& rhs ) {
+        // Order of operations below carefully chosen so that reference counts remain correct
+        // in unlikely event that remove_ref throws exception.
+        filter_node* old = root;
+        root = rhs.root;
+        if( root ) root->add_ref();
+        if( old ) old->remove_ref();
+    }
+    ~filter_t() {
+        if( root ) root->remove_ref();
+    }
+    void clear() {
+        // Like operator= with filter_t() on right side.
+        if( root ) {
+            filter_node* old = root;
+            root = NULL;
+            old->remove_ref();
+        }
+    }
+};
+
+inline internal::pipeline_proxy::pipeline_proxy( const filter_t<void,void>& filter_chain ) : my_pipe() {
+    __TBB_ASSERT( filter_chain.root, "cannot apply parallel_pipeline to default-constructed filter_t"  );
+    filter_chain.root->add_to(my_pipe);
+}
+
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain
+#if __TBB_TASK_GROUP_CONTEXT
+    , tbb::task_group_context& context
+#endif
+    ) {
+    internal::pipeline_proxy pipe(filter_chain);
+    // tbb::pipeline::run() is called via the proxy
+    pipe->run(max_number_of_live_tokens
+#if __TBB_TASK_GROUP_CONTEXT
+              , context
+#endif
+    );
+}
+
+#if __TBB_TASK_GROUP_CONTEXT
+inline void parallel_pipeline(size_t max_number_of_live_tokens, const filter_t<void,void>& filter_chain) {
+    tbb::task_group_context context;
+    parallel_pipeline(max_number_of_live_tokens, filter_chain, context);
+}
+#endif // __TBB_TASK_GROUP_CONTEXT
+
+} // interface6
+
+using interface6::flow_control;
+using interface6::filter_t;
+using interface6::make_filter;
+using interface6::parallel_pipeline;
+
+} // tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_pipeline_H_include_area
+
+#endif /* __TBB_pipeline_H */
--- a/cs440-acg/ext/tbb/include/tbb/queuing_mutex.h
+++ b/cs440-acg/ext/tbb/include/tbb/queuing_mutex.h
@@ -0,0 +1,113 @@
+/*
+    Copyright (c) 2005-2020 Intel Corporation
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+*/
+
+#ifndef __TBB_queuing_mutex_H
+#define __TBB_queuing_mutex_H
+
+#define __TBB_queuing_mutex_H_include_area
+#include "internal/_warning_suppress_enable_notice.h"
+
+#include <cstring>
+#include "atomic.h"
+#include "tbb_profiling.h"
+
+namespace tbb {
+
+//! Queuing mutex with local-only spinning.
+/** @ingroup synchronization */
+class queuing_mutex : internal::mutex_copy_deprecated_and_disabled {
+public:
+    //! Construct unacquired mutex.
+    queuing_mutex() {
+        q_tail = NULL;
+#if TBB_USE_THREADING_TOOLS
+        internal_construct();
+#endif
+    }
+
+    //! The scoped locking pattern
+    /** It helps to avoid the common problem of forgetting to release lock.
+        It also nicely provides the "node" for queuing locks. */
+    class scoped_lock: internal::no_copy {
+        //! Initialize fields to mean "no lock held".
+        void initialize() {
+            mutex = NULL;
+            going = 0;
+#if TBB_USE_ASSERT
+            internal::poison_pointer(next);
+#endif /* TBB_USE_ASSERT */
+        }
+
+    public:
+        //! Construct lock that has not acquired a mutex.
+        /** Equivalent to zero-initialization of *this. */
+        scoped_lock() {initialize();}
+
+        //! Acquire lock on given mutex.
+        scoped_lock( queuing_mutex& m ) {
+            initialize();
+            acquire(m);
+        }
+
+        //! Release lock (if lock is held).
+        ~scoped_lock() {
+            if( mutex ) release();
+        }
+
+        //! Acquire lock on given mutex.
+        void __TBB_EXPORTED_METHOD acquire( queuing_mutex& m );
+
+        //! Acquire lock on given mutex if free (i.e. non-blocking)
+        bool __TBB_EXPORTED_METHOD try_acquire( queuing_mutex& m );
+
+        //! Release lock.
+        void __TBB_EXPORTED_METHOD release();
+
+    private:
+        //! The pointer to the mutex owned, or NULL if not holding a mutex.
+        queuing_mutex* mutex;
+
+        //! The pointer to the next competitor for a mutex
+        scoped_lock *next;
+
+        //! The local spin-wait variable
+        /** Inverted (0 - blocked, 1 - acquired the mutex) for the sake of
+            zero-initialization.  Defining it as an entire word instead of
+            a byte seems to help performance slightly. */
+        uintptr_t going;
+    };
+
+    void __TBB_EXPORTED_METHOD internal_construct();
+
+    // Mutex traits
+    static const bool is_rw_mutex = false;
+    static const bool is_recursive_mutex = false;
+    static const bool is_fair_mutex = true;
+
+private:
+    //! The last competitor requesting the lock
+    atomic<scoped_lock*> q_tail;
+
+};
+
+__TBB_DEFINE_PROFILING_SET_NAME(queuing_mutex)
+
+} // namespace tbb
+
+#include "internal/_warning_suppress_disable_notice.h"
+#undef __TBB_queuing_mutex_H_include_area
+
+#endif /* __TBB_queuing_mutex_H */
--- a/Show More
+++ b/Show More