Skip to content

Commit 6cdb4d8

Browse files
committed
thrust: CUDA specific zero init hack
Improve performance when pre-initialized memory is not required. This is not in main branch because it is not portablel to rocThrust, and may depend on CUDA/thrust version (but so far it works with CUDA 10 and 11).
1 parent 85e94c4 commit 6cdb4d8

4 files changed

Lines changed: 346 additions & 0 deletions

File tree

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* Copyright 2008-2013 NVIDIA Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <thrust/detail/config.h>
20+
21+
namespace thrust
22+
{
23+
namespace detail
24+
{
25+
26+
27+
template<typename Allocator, typename Pointer, typename Size>
28+
__host__ __device__
29+
inline void default_construct_range(Allocator &a, Pointer p, Size n);
30+
31+
32+
} // end detail
33+
} // end thrust
34+
35+
#include <thrust/detail/allocator/default_construct_range.inl>
36+
37+
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
/*
2+
* Copyright 2008-2013 NVIDIA Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <thrust/detail/config.h>
18+
#include <thrust/detail/allocator/allocator_traits.h>
19+
#include <thrust/detail/type_traits.h>
20+
#include <thrust/detail/type_traits/pointer_traits.h>
21+
#include <thrust/for_each.h>
22+
#include <thrust/uninitialized_fill.h>
23+
24+
namespace thrust
25+
{
26+
namespace detail
27+
{
28+
namespace allocator_traits_detail
29+
{
30+
31+
32+
template<typename Allocator>
33+
struct construct1_via_allocator
34+
{
35+
Allocator &a;
36+
37+
__host__ __device__
38+
construct1_via_allocator(Allocator &a)
39+
: a(a)
40+
{}
41+
42+
template<typename T>
43+
inline __host__ __device__
44+
void operator()(T &x)
45+
{
46+
allocator_traits<Allocator>::construct(a, &x);
47+
}
48+
};
49+
50+
51+
// we need to construct T via the allocator if...
52+
template<typename Allocator, typename T>
53+
struct needs_default_construct_via_allocator
54+
: thrust::detail::or_<
55+
has_member_construct1<Allocator,T>, // if the Allocator does something interesting
56+
thrust::detail::not_<has_trivial_constructor<T> > // or if T's default constructor does something interesting
57+
>
58+
{};
59+
60+
61+
// we know that std::allocator::construct's only effect is to call T's
62+
// default constructor, so we needn't use it for default construction
63+
// unless T's constructor does something interesting
64+
template<typename U, typename T>
65+
struct needs_default_construct_via_allocator<std::allocator<U>, T>
66+
: thrust::detail::not_<has_trivial_constructor<T> >
67+
{};
68+
69+
70+
template<typename Allocator, typename Pointer, typename Size>
71+
__host__ __device__
72+
typename enable_if<
73+
needs_default_construct_via_allocator<
74+
Allocator,
75+
typename pointer_element<Pointer>::type
76+
>::value
77+
>::type
78+
default_construct_range(Allocator &a, Pointer p, Size n)
79+
{
80+
// thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, construct1_via_allocator<Allocator>(a));
81+
}
82+
83+
84+
template<typename Allocator, typename Pointer, typename Size>
85+
__host__ __device__
86+
typename disable_if<
87+
needs_default_construct_via_allocator<
88+
Allocator,
89+
typename pointer_element<Pointer>::type
90+
>::value
91+
>::type
92+
default_construct_range(Allocator &a, Pointer p, Size n)
93+
{
94+
// thrust::uninitialized_fill_n(allocator_system<Allocator>::get(a), p, n, typename pointer_element<Pointer>::type());
95+
}
96+
97+
98+
} // end allocator_traits_detail
99+
100+
101+
template<typename Allocator, typename Pointer, typename Size>
102+
__host__ __device__
103+
void default_construct_range(Allocator &a, Pointer p, Size n)
104+
{
105+
return allocator_traits_detail::default_construct_range(a,p,n);
106+
}
107+
108+
109+
} // end detail
110+
} // end thrust
111+
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright 2008-2013 NVIDIA Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <thrust/detail/config.h>
20+
21+
namespace thrust
22+
{
23+
namespace detail
24+
{
25+
26+
template<typename Allocator, typename Pointer, typename Size>
27+
__host__ __device__
28+
inline void destroy_range(Allocator &a, Pointer p, Size n);
29+
30+
} // end detail
31+
} // end thrust
32+
33+
#include <thrust/detail/allocator/destroy_range.inl>
34+
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
/*
2+
* Copyright 2008-2013 NVIDIA Corporation
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#include <thrust/detail/allocator/destroy_range.h>
18+
#include <thrust/detail/allocator/allocator_traits.h>
19+
#include <thrust/detail/type_traits/pointer_traits.h>
20+
#include <thrust/for_each.h>
21+
#include <memory>
22+
23+
namespace thrust
24+
{
25+
namespace detail
26+
{
27+
namespace allocator_traits_detail
28+
{
29+
30+
31+
// destroy_range has three cases:
32+
// if Allocator has an effectful member function destroy:
33+
// 1. destroy via the allocator
34+
// else
35+
// 2. if T has a non-trivial destructor, destroy the range without using the allocator
36+
// 3. if T has a trivial destructor, do a no-op
37+
38+
template<typename Allocator, typename T>
39+
struct has_effectful_member_destroy
40+
: has_member_destroy<Allocator,T>
41+
{};
42+
43+
// std::allocator::destroy's only effect is to invoke its argument's destructor
44+
template<typename U, typename T>
45+
struct has_effectful_member_destroy<std::allocator<U>, T>
46+
: thrust::detail::false_type
47+
{};
48+
49+
// case 1: Allocator has an effectful 1-argument member function "destroy"
50+
template<typename Allocator, typename Pointer>
51+
struct enable_if_destroy_range_case1
52+
: thrust::detail::enable_if<
53+
has_effectful_member_destroy<
54+
Allocator,
55+
typename pointer_element<Pointer>::type
56+
>::value
57+
>
58+
{};
59+
60+
// case 2: Allocator has no member function "destroy", but T has a non-trivial destructor
61+
template<typename Allocator, typename Pointer>
62+
struct enable_if_destroy_range_case2
63+
: thrust::detail::enable_if<
64+
!has_effectful_member_destroy<
65+
Allocator,
66+
typename pointer_element<Pointer>::type
67+
>::value &&
68+
!has_trivial_destructor<
69+
typename pointer_element<Pointer>::type
70+
>::value
71+
>
72+
{};
73+
74+
// case 3: Allocator has no member function "destroy", and T has a trivial destructor
75+
template<typename Allocator, typename Pointer>
76+
struct enable_if_destroy_range_case3
77+
: thrust::detail::enable_if<
78+
!has_effectful_member_destroy<
79+
Allocator,
80+
typename pointer_element<Pointer>::type
81+
>::value &&
82+
has_trivial_destructor<
83+
typename pointer_element<Pointer>::type
84+
>::value
85+
>
86+
{};
87+
88+
89+
90+
template<typename Allocator>
91+
struct destroy_via_allocator
92+
{
93+
Allocator &a;
94+
95+
__host__ __device__
96+
destroy_via_allocator(Allocator &a)
97+
: a(a)
98+
{}
99+
100+
template<typename T>
101+
inline __host__ __device__
102+
void operator()(T &x)
103+
{
104+
allocator_traits<Allocator>::destroy(a, &x);
105+
}
106+
};
107+
108+
109+
// destroy_range case 1: destroy via allocator
110+
template<typename Allocator, typename Pointer, typename Size>
111+
__host__ __device__
112+
typename enable_if_destroy_range_case1<Allocator,Pointer>::type
113+
destroy_range(Allocator &a, Pointer p, Size n)
114+
{
115+
//thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, destroy_via_allocator<Allocator>(a));
116+
}
117+
118+
119+
// we must prepare for His coming
120+
struct gozer
121+
{
122+
__thrust_exec_check_disable__
123+
template<typename T>
124+
inline __host__ __device__
125+
void operator()(T &x)
126+
{
127+
x.~T();
128+
}
129+
};
130+
131+
// destroy_range case 2: destroy without the allocator
132+
template<typename Allocator, typename Pointer, typename Size>
133+
__host__ __device__
134+
typename enable_if_destroy_range_case2<Allocator,Pointer>::type
135+
destroy_range(Allocator &a, Pointer p, Size n)
136+
{
137+
//thrust::for_each_n(allocator_system<Allocator>::get(a), p, n, gozer());
138+
}
139+
140+
141+
// destroy_range case 3: no-op
142+
template<typename Allocator, typename Pointer, typename Size>
143+
__host__ __device__
144+
typename enable_if_destroy_range_case3<Allocator,Pointer>::type
145+
destroy_range(Allocator &, Pointer, Size)
146+
{
147+
// no op
148+
}
149+
150+
151+
} // end allocator_traits_detail
152+
153+
154+
template<typename Allocator, typename Pointer, typename Size>
155+
__host__ __device__
156+
void destroy_range(Allocator &a, Pointer p, Size n)
157+
{
158+
return allocator_traits_detail::destroy_range(a,p,n);
159+
}
160+
161+
162+
} // end detail
163+
} // end thrust
164+

0 commit comments

Comments
 (0)