Skip to content
Merged
2 changes: 1 addition & 1 deletion include/oneapi/dpl/pstl/glue_algorithm_ranges_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1578,7 +1578,7 @@ copy_if(_ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result, _Predica

return oneapi::dpl::__internal::__ranges::__pattern_copy_if(
__dispatch_tag, ::std::forward<_ExecutionPolicy>(__exec), views::all_read(::std::forward<_Range1>(__rng)),
views::all_write(::std::forward<_Range2>(__result)), __pred, oneapi::dpl::__internal::__pstl_assign());
views::all_write(::std::forward<_Range2>(__result)), __pred);
}

// [alg.swap]
Expand Down
16 changes: 6 additions & 10 deletions include/oneapi/dpl/pstl/hetero/algorithm_ranges_impl_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -649,11 +649,10 @@ __pattern_count(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec, _R&&
// copy_if
//------------------------------------------------------------------------

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate,
typename _Assign = oneapi::dpl::__internal::__pstl_assign>
template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _Predicate>
oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2,
_Predicate __pred, _Assign __assign)
_Predicate __pred)
{
using _Size = oneapi::dpl::__ranges::__common_size_t<_Range1, _Range2>;
_Size __n = oneapi::dpl::__ranges::__size(__rng1);
Expand All @@ -664,8 +663,7 @@ __pattern_copy_if(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&
return oneapi::dpl::__par_backend_hetero::__parallel_copy_if(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::__get_subscription_view(std::forward<_Range1>(__rng1)),
oneapi::dpl::__ranges::__get_subscription_view(std::forward<_Range2>(__rng2)),
__n, __n_out, __pred, __assign)[0];
oneapi::dpl::__ranges::__get_subscription_view(std::forward<_Range2>(__rng2)), __n, __n_out, __pred)[0];
}

#if _ONEDPL_CPP20_RANGES_PRESENT
Expand Down Expand Up @@ -693,8 +691,7 @@ __pattern_copy_if_ranges(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __e
std::array<_Size, 2> __stops = oneapi::dpl::__par_backend_hetero::__parallel_copy_if(
_BackendTag{}, std::forward<_ExecutionPolicy>(__exec),
oneapi::dpl::__ranges::views::all_read(std::forward<_InRange>(__in_r)),
oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)),
__n, __n_out, __pred_1, oneapi::dpl::__internal::__pstl_assign());
oneapi::dpl::__ranges::views::all_write(std::forward<_OutRange>(__out_r)), __n, __n_out, __pred_1);

return {std::ranges::begin(__in_r) + __stops[1], std::ranges::begin(__out_r) + __stops[0]};
}
Expand All @@ -717,8 +714,7 @@ __pattern_remove_if(__hetero_tag<_BackendTag> __tag, _ExecutionPolicy&& __exec,
oneapi::dpl::__par_backend_hetero::__buffer<_ValueType> __buf(__n);
auto __copy_rng = oneapi::dpl::__ranges::views::all(__buf.get_buffer());

auto __copy_last_id = __ranges::__pattern_copy_if(__tag, __exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred},
oneapi::dpl::__internal::__pstl_assign());
auto __copy_last_id = __ranges::__pattern_copy_if(__tag, __exec, __rng, __copy_rng, __not_pred<_Predicate>{__pred});
auto __copy_rng_truncated = __copy_rng | oneapi::dpl::experimental::ranges::views::take(__copy_last_id);

oneapi::dpl::__internal::__ranges::__pattern_walk_n(
Expand Down Expand Up @@ -808,7 +804,7 @@ template <typename _Name>
struct __copy_wrapper;

template <typename _BackendTag, typename _ExecutionPolicy, typename _Range1, typename _Range2,
typename _BinaryPredicate, typename _Assign = oneapi::dpl::__internal::__pstl_assign>
typename _BinaryPredicate>
oneapi::dpl::__internal::__difference_t<_Range2>
__pattern_unique_copy(__hetero_tag<_BackendTag>, _ExecutionPolicy&& __exec, _Range1&& __rng, _Range2&& __result,
_BinaryPredicate __pred)
Expand Down
143 changes: 74 additions & 69 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -234,10 +234,9 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative
{
using __result_and_scratch_storage_t = __result_and_scratch_storage<_Tp>;

using _NoOpFunctor = unseq_backend::walk_n<oneapi::dpl::identity>;
auto __transform_pattern =
unseq_backend::transform_reduce<_ReduceOp, _NoOpFunctor, _Tp, _Commutative, _VecSize>{__reduce_op,
_NoOpFunctor{}};
using unseq_backend::__unchanged;
auto __transform_pattern = unseq_backend::transform_reduce<_ReduceOp, __unchanged, _Tp, _Commutative, _VecSize>{
__reduce_op, __unchanged{}};
auto __reduce_pattern = unseq_backend::reduce_over_group<_ReduceOp, _Tp>{__reduce_op};

const bool __is_full = __n == __work_group_size * __iters_per_work_item;
Expand Down Expand Up @@ -303,16 +302,14 @@ struct __parallel_transform_reduce_impl
submit(sycl::queue& __q, _Size __n, _Size __work_group_size, const _Size __iters_per_work_item,
_ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs)
{
using _NoOpFunctor = unseq_backend::walk_n<oneapi::dpl::identity>;
using unseq_backend::__unchanged;
using _ReduceKernel = oneapi::dpl::__par_backend_hetero::__internal::__kernel_name_generator<
__reduce_kernel, _CustomName, _Tp, _ReduceOp, _TransformOp, _NoOpFunctor, _Ranges...>;
__reduce_kernel, _CustomName, _Tp, _ReduceOp, _TransformOp, __unchanged, _Ranges...>;

auto __transform_pattern1 =
unseq_backend::transform_reduce<_ReduceOp, _TransformOp, _Tp, _Commutative, _VecSize>{__reduce_op,
__transform_op};
auto __transform_pattern2 =
unseq_backend::transform_reduce<_ReduceOp, _NoOpFunctor, _Tp, _Commutative, _VecSize>{__reduce_op,
_NoOpFunctor{}};
auto __transform_pattern1 = unseq_backend::transform_reduce<_ReduceOp, _TransformOp, _Tp, _Commutative,
_VecSize>{__reduce_op, __transform_op};
auto __transform_pattern2 = unseq_backend::transform_reduce<_ReduceOp, __unchanged, _Tp, _Commutative,
_VecSize>{__reduce_op, __unchanged{}};
auto __reduce_pattern = unseq_backend::reduce_over_group<_ReduceOp, _Tp>{__reduce_op};

#if _ONEDPL_COMPILE_KERNEL
Expand Down
54 changes: 26 additions & 28 deletions include/oneapi/dpl/pstl/hetero/dpcpp/sycl_traits.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ class __not_pred;
template <typename _Pred>
class __reorder_pred;

template <typename _Pred>
class __pred_at_index;

template <typename _Pred, bool _Flag>
class __unique_at_index;

template <typename _Tp>
class __equal_value;

Expand Down Expand Up @@ -101,9 +107,6 @@ struct __search_n_unary_predicate;
template <class _Comp>
struct __is_heap_check;

template <typename _Predicate, typename _ValueType>
struct __create_mask_unique_copy;

template <class _Tag, typename _Tp, typename>
struct __brick_fill;

Expand Down Expand Up @@ -171,6 +174,18 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::
{
};

template <typename _Pred>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::__pred_at_index, _Pred)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Pred>
{
};

template <typename _Pred, bool _Flag>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::__unique_at_index, _Pred, _Flag)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Pred>
{
};

template <typename _Tp>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::__equal_value, _Tp)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Tp>
Expand Down Expand Up @@ -271,13 +286,6 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::
{
};

template <typename _Predicate, typename _ValueType>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::__create_mask_unique_copy, _Predicate,
_ValueType)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Predicate>
{
};

template <typename... _Types>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::__internal::tuple, _Types...)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Types...>
Expand Down Expand Up @@ -694,15 +702,12 @@ struct n_elem_match_pred;
template <typename _Pred>
struct first_match_pred;

template <typename _Pred, typename _Tp>
template <typename _Pred, typename _ValueType>
struct __create_mask;

template <typename _BinaryOp, typename _Assigner, std::size_t N>
template <typename _Assigner, std::size_t N>
struct __copy_by_mask;

template <typename _BinaryOp>
struct __partition_by_mask;

template <typename _Inclusive, typename _BinaryOp, typename _InitType>
struct __global_scan_functor;

Expand Down Expand Up @@ -787,22 +792,15 @@ struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backen
{
};

template <typename _Pred, typename _Tp>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__create_mask, _Pred, _Tp)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Pred, _Tp>
{
};

template <typename _BinaryOp, typename _Assigner, std::size_t N>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__copy_by_mask, _BinaryOp,
_Assigner, N)>
: oneapi::dpl::__internal::__are_all_device_copyable<_BinaryOp, _Assigner>
template <typename _Pred, typename _ValueType>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__create_mask, _Pred, _ValueType)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Pred>
{
};

template <typename _BinaryOp>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__partition_by_mask, _BinaryOp)>
: oneapi::dpl::__internal::__are_all_device_copyable<_BinaryOp>
template <typename _Assigner, std::size_t N>
struct sycl::is_device_copyable<_ONEDPL_SPECIALIZE_FOR(oneapi::dpl::unseq_backend::__copy_by_mask, _Assigner, N)>
: oneapi::dpl::__internal::__are_all_device_copyable<_Assigner>
{
};

Expand Down
37 changes: 16 additions & 21 deletions include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,12 @@ struct walk_n
};

// If read accessor returns temporary value then oneapi::dpl::identity returns lvalue reference to it.
// After temporary value destroying it will be a reference on invalid object.
// So let's don't call functor in case of oneapi::dpl::identity
// After temporary value destroying it will be a reference to an invalid object.
// So let's not call the functor in case of oneapi::dpl::identity
template <>
struct walk_n<oneapi::dpl::identity>
{
oneapi::dpl::identity __f;
oneapi::dpl::identity __f; // only needed for uniform initialization

template <typename _ItemId, typename _Range>
auto
Expand All @@ -131,6 +131,8 @@ struct walk_n<oneapi::dpl::identity>
}
};

using __unchanged = walk_n<oneapi::dpl::identity>;

// walk_n_vectors_or_scalars
template <typename _F>
struct walk_n_vectors_or_scalars
Expand Down Expand Up @@ -680,29 +682,25 @@ struct __scan_ignore
};

// create mask
template <typename _Pred, typename _Tp>
template <typename _IndexPred, typename _ValueType>
struct __create_mask
{
_Pred __pred;
_IndexPred __pred;

template <typename _Idx, typename _Input>
_Tp
operator()(const _Idx __idx, const _Input& __input) const
template <typename _Idx, typename... _Ranges>
_ValueType
operator()(const _Idx __idx, const oneapi::dpl::__ranges::zip_view<_Ranges...>& __input) const
{
using ::std::get;
// 1. apply __pred
auto __temp = __pred(get<0>(__input[__idx]));
// 2. initialize mask
get<1>(__input[__idx]) = __temp;
return _Tp(__temp);
bool __mask_value = __pred(std::get<0>(__input.tuple()), __idx);
std::get<1>(__input[__idx]) = __mask_value;
return _ValueType(__mask_value);
}
};

// functors for scan
template <typename _BinaryOp, typename _Assigner, std::size_t N>
template <typename _Assigner, std::size_t N>
struct __copy_by_mask
{
_BinaryOp __binary_op;
_Assigner __assigner;

template <typename _Item, typename _OutAcc, typename _InAcc, typename _WgSumsPtr, typename _RetPtr, typename _Size,
Expand Down Expand Up @@ -739,7 +737,7 @@ struct __copy_by_mask
if (__item_idx >= __size_per_wg)
{
auto __wg_sums_idx = __item_idx / __size_per_wg - 1;
__out_idx = __binary_op(__out_idx, __wg_sums_ptr[__wg_sums_idx]);
__out_idx += __wg_sums_ptr[__wg_sums_idx];
}
if (__item_idx % __size_per_wg == 0 || (get<N>(__in_acc[__item_idx]) != get<N>(__in_acc[__item_idx - 1])))
{
Expand Down Expand Up @@ -772,11 +770,8 @@ struct __copy_by_mask_stops
}
};

template <typename _BinaryOp>
struct __partition_by_mask
{
_BinaryOp __binary_op;

template <typename _Item, typename _OutAcc, typename _InAcc, typename _WgSumsPtr, typename _RetPtr, typename _Size,
typename _SizePerWg>
void
Expand All @@ -798,7 +793,7 @@ struct __partition_by_mask
__in_type, ::std::decay_t<decltype(get<0>(__out_acc[__out_idx]))>>::__type;

if (__not_first_wg)
__out_idx = __binary_op(__out_idx, __wg_sums_ptr[__wg_sums_idx - 1]);
__out_idx += __wg_sums_ptr[__wg_sums_idx - 1];
get<0>(__out_acc[__out_idx]) = static_cast<__tuple_type>(get<0>(__in_acc[__item_idx]));
}
else
Expand Down
20 changes: 0 additions & 20 deletions include/oneapi/dpl/pstl/hetero/utils_hetero.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,26 +31,6 @@ namespace dpl
namespace __internal
{

template <typename _Predicate, typename _ValueType>
struct __create_mask_unique_copy
{
_Predicate __predicate;

template <typename _Idx, typename _Acc>
_ValueType
operator()(_Idx __idx, _Acc& __acc) const
{
using ::std::get;

auto __predicate_result = 1;
if (__idx != 0)
__predicate_result = __predicate(get<0>(__acc[__idx]), get<0>(__acc[__idx + (-1)]));

get<1>(__acc[__idx]) = __predicate_result;
return _ValueType{__predicate_result};
}
};

template <typename _Compare, typename _ReduceValueType>
struct __pattern_minmax_element_reduce_fn
{
Expand Down
10 changes: 9 additions & 1 deletion include/oneapi/dpl/pstl/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class __pred_at_index
};

//! Apply a predicate to two consecutive elements of a random-access sequence to find non-equivalent (unique) ones
template <typename _Pred>
template <typename _Pred, bool _CheckZero = false>
class __unique_at_index
{
mutable _Pred _M_pred;
Expand All @@ -145,6 +145,14 @@ class __unique_at_index
bool
operator()(_RandomAccessTp&& __arr, _IndexTp __i) const
{
if constexpr (_CheckZero)
{
if (__i == 0)
return true;
}
else
static_assert(std::is_signed_v<_IndexTp>);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

May be nice to have a text message here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added to #2562


return !_M_pred(__arr[__i], __arr[__i - 1]);
}
};
Expand Down
Loading
Loading