Halide 18.0.0
Halide compiler and libraries
Loading...
Searching...
No Matches
HalideBuffer.h
Go to the documentation of this file.
1/** \file
2 * Defines a Buffer type that wraps from halide_buffer_t and adds
3 * functionality, and methods for more conveniently iterating over the
4 * samples in a halide_buffer_t outside of Halide code. */
5
6#ifndef HALIDE_RUNTIME_BUFFER_H
7#define HALIDE_RUNTIME_BUFFER_H
8
9#include <algorithm>
10#include <atomic>
11#include <cassert>
12#include <cstdint>
13#include <cstdlib>
14#include <cstring>
15#include <limits>
16#include <memory>
17#include <vector>
18
19#ifdef __APPLE__
20#include <AvailabilityVersions.h>
21#include <TargetConditionals.h>
22#endif
23
24#if defined(__has_feature)
25#if __has_feature(memory_sanitizer)
26#include <sanitizer/msan_interface.h>
27#endif
28#endif
29
30#include "HalideRuntime.h"
31
32#ifdef _MSC_VER
33#include <malloc.h>
34#define HALIDE_ALLOCA _alloca
35#else
36#define HALIDE_ALLOCA __builtin_alloca
37#endif
38
39// gcc 5.1 has a false positive warning on this code
40#if __GNUC__ == 5 && __GNUC_MINOR__ == 1
41#pragma GCC diagnostic ignored "-Warray-bounds"
42#endif
43
44#ifndef HALIDE_RUNTIME_BUFFER_CHECK_INDICES
45#define HALIDE_RUNTIME_BUFFER_CHECK_INDICES 0
46#endif
47
48#ifndef HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
49// Conservatively align buffer allocations to 128 bytes by default.
50// This is enough alignment for all the platforms currently in use.
51// Redefine this in your compiler settings if you desire more/less alignment.
52#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT 128
53#endif
54
56 "HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT must be a power of 2.");
57
58// Unfortunately, not all C++17 runtimes support aligned_alloc
59// (it may depends on OS/SDK version); this is provided as an opt-out
60// if you are compiling on a platform that doesn't provide a (good)
61// implementation. (Note that we actually use the C11 `::aligned_alloc()`
62// rather than the C++17 `std::aligned_alloc()` because at least one platform
63// we found supports the former but not the latter.)
64#ifndef HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
65
66// clang-format off
67#ifdef _MSC_VER
68
69 // MSVC doesn't implement aligned_alloc(), even in C++17 mode, and
70 // has stated they probably never will, so, always default it off here.
71 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
72
73#elif defined(__ANDROID_API__) && __ANDROID_API__ < 28
74
75 // Android doesn't provide aligned_alloc until API 28
76 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
77
78#elif defined(__APPLE__)
79
80 #if TARGET_OS_OSX && (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_10_15)
81
82 // macOS doesn't provide aligned_alloc until 10.15
83 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
84
85 #elif TARGET_OS_IPHONE && (__IPHONE_OS_VERSION_MIN_REQUIRED < __IPHONE_14_0)
86
87 // iOS doesn't provide aligned_alloc until 14.0
88 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
89
90 #else
91
92 // Assume it's ok on all other Apple targets
93 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
94
95 #endif
96
97#else
98
99 #if defined(__GLIBCXX__) && !defined(_GLIBCXX_HAVE_ALIGNED_ALLOC)
100
101 // ARM GNU-A baremetal compiler doesn't provide aligned_alloc as of 12.2
102 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 0
103
104 #else
105
106 // Not Windows, Android, or Apple: just assume it's ok
107 #define HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC 1
108
109 #endif
110
111#endif
112// clang-format on
113
114#endif // HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
115
116namespace Halide {
117namespace Runtime {
118
119// Forward-declare our Buffer class
120template<typename T, int Dims, int InClassDimStorage>
121class Buffer;
122
123// A helper to check if a parameter pack is entirely implicitly
124// int-convertible to use with std::enable_if
125template<typename... Args>
126struct AllInts : std::false_type {};
127
128template<>
129struct AllInts<> : std::true_type {};
130
131template<typename T, typename... Args>
132struct AllInts<T, Args...> {
133 static const bool value = std::is_convertible<T, int>::value && AllInts<Args...>::value;
134};
135
136// Floats and doubles are technically implicitly int-convertible, but
137// doing so produces a warning we treat as an error, so just disallow
138// it here.
139template<typename... Args>
140struct AllInts<float, Args...> : std::false_type {};
141
142template<typename... Args>
143struct AllInts<double, Args...> : std::false_type {};
144
145namespace Internal {
146// A helper to detect if there are any zeros in a container
147template<typename Container>
148bool any_zero(const Container &c) {
149 for (int i : c) {
150 if (i == 0) {
151 return true;
152 }
153 }
154 return false;
155}
156
158 static inline void *(*default_allocate_fn)(size_t) = nullptr;
159 static inline void (*default_deallocate_fn)(void *) = nullptr;
160};
161} // namespace Internal
162
163/** A struct acting as a header for allocations owned by the Buffer
164 * class itself. */
166 void (*deallocate_fn)(void *);
167 std::atomic<int> ref_count;
168
169 // Note that ref_count always starts at 1
170 explicit AllocationHeader(void (*deallocate_fn)(void *))
172 }
173};
174
175/** This indicates how to deallocate the device for a Halide::Runtime::Buffer. */
176enum struct BufferDeviceOwnership : int {
177 Allocated, ///> halide_device_free will be called when device ref count goes to zero
178 WrappedNative, ///> halide_device_detach_native will be called when device ref count goes to zero
179 Unmanaged, ///> No free routine will be called when device ref count goes to zero
180 AllocatedDeviceAndHost, ///> Call device_and_host_free when DevRefCount goes to zero.
181 Cropped, ///> Call halide_device_release_crop when DevRefCount goes to zero.
182};
183
184/** A similar struct for managing device allocations. */
186 // This is only ever constructed when there's something to manage,
187 // so start at one.
188 std::atomic<int> count{1};
190};
191
192constexpr int AnyDims = -1;
193
194/** A templated Buffer class that wraps halide_buffer_t and adds
195 * functionality. When using Halide from C++, this is the preferred
196 * way to create input and output buffers. The overhead of using this
197 * class relative to a naked halide_buffer_t is minimal - it uses another
198 * ~16 bytes on the stack, and does no dynamic allocations when using
199 * it to represent existing memory of a known maximum dimensionality.
200 *
201 * The template parameter T is the element type. For buffers where the
202 * element type is unknown, or may vary, use void or const void.
203 *
204 * The template parameter Dims is the number of dimensions. For buffers where
205 * the dimensionality type is unknown at, or may vary, use AnyDims.
206 *
207 * InClassDimStorage is the maximum number of dimensions that can be represented
208 * using space inside the class itself. Set it to the maximum dimensionality
209 * you expect this buffer to be. If the actual dimensionality exceeds
210 * this, heap storage is allocated to track the shape of the buffer.
211 * InClassDimStorage defaults to 4, which should cover nearly all usage.
212 *
213 * The class optionally allocates and owns memory for the image using
214 * a shared pointer allocated with the provided allocator. If they are
215 * null, malloc and free are used. Any device-side allocation is
216 * considered as owned if and only if the host-side allocation is
217 * owned. */
218template<typename T = void,
219 int Dims = AnyDims,
220 int InClassDimStorage = (Dims == AnyDims ? 4 : std::max(Dims, 1))>
221class Buffer {
222 /** The underlying halide_buffer_t */
223 halide_buffer_t buf = {};
224
225 /** Some in-class storage for shape of the dimensions. */
227
228 /** The allocation owned by this Buffer. NULL if the Buffer does not
229 * own the memory. */
230 AllocationHeader *alloc = nullptr;
231
232 /** A reference count for the device allocation owned by this
233 * buffer. */
234 mutable DeviceRefCount *dev_ref_count = nullptr;
235
236 /** True if T is of type void or const void */
237 static const bool T_is_void = std::is_same<typename std::remove_const<T>::type, void>::value;
238
239 /** A type function that adds a const qualifier if T is a const type. */
240 template<typename T2>
241 using add_const_if_T_is_const = typename std::conditional<std::is_const<T>::value, const T2, T2>::type;
242
243 /** T unless T is (const) void, in which case (const)
244 * uint8_t. Useful for providing return types for operator() */
245 using not_void_T = typename std::conditional<T_is_void,
246 add_const_if_T_is_const<uint8_t>,
247 T>::type;
248
249 /** T with constness removed. Useful for return type of copy(). */
250 using not_const_T = typename std::remove_const<T>::type;
251
252 /** The type the elements are stored as. Equal to not_void_T
253 * unless T is a pointer, in which case uint64_t. Halide stores
254 * all pointer types as uint64s internally, even on 32-bit
255 * systems. */
256 using storage_T = typename std::conditional<std::is_pointer<T>::value, uint64_t, not_void_T>::type;
257
258public:
259 /** True if the Halide type is not void (or const void). */
260 static constexpr bool has_static_halide_type = !T_is_void;
261
262 /** Get the Halide type of T. Callers should not use the result if
263 * has_static_halide_type is false. */
267
268 /** Does this Buffer own the host memory it refers to? */
269 bool owns_host_memory() const {
270 return alloc != nullptr;
271 }
272
273 static constexpr bool has_static_dimensions = (Dims != AnyDims);
274
275 /** Callers should not use the result if
276 * has_static_dimensions is false. */
277 static constexpr int static_dimensions() {
278 return Dims;
279 }
280
281 static_assert(!has_static_dimensions || static_dimensions() >= 0);
282
283private:
284 /** Increment the reference count of any owned allocation */
285 void incref() const {
286 if (owns_host_memory()) {
287 alloc->ref_count++;
288 }
289 if (buf.device) {
290 if (!dev_ref_count) {
291 // I seem to have a non-zero dev field but no
292 // reference count for it. I must have been given a
293 // device allocation by a Halide pipeline, and have
294 // never been copied from since. Take sole ownership
295 // of it.
296 dev_ref_count = new DeviceRefCount;
297 }
298 dev_ref_count->count++;
299 }
300 }
301
302 // Note that this is called "cropped" but can also encompass a slice/embed
303 // operation as well.
304 struct DevRefCountCropped : DeviceRefCount {
305 // We will only store Buffers that have a dynamic number of dimensions.
306 // Buffers that cropped or sliced from need to be first converted to
307 // one with variable size. This is required because we cannot possibly
308 // know what the actual dimensionality is of the buffer this is a
309 // crop or slice from. Since cropping a sliced buffer is also possible,
310 // no optimizations can be made for cropped buffers either.
311 Buffer<T, AnyDims> cropped_from;
312 explicit DevRefCountCropped(const Buffer<T, AnyDims> &cropped_from)
313 : cropped_from(cropped_from) {
314 ownership = BufferDeviceOwnership::Cropped;
315 }
316 };
317
318 /** Setup the device ref count for a buffer to indicate it is a crop (or slice, embed, etc) of cropped_from */
319 void crop_from(const Buffer<T, AnyDims> &cropped_from) {
320 assert(dev_ref_count == nullptr);
321 dev_ref_count = new DevRefCountCropped(cropped_from);
322 }
323
324 /** Decrement the reference count of any owned allocation and free host
325 * and device memory if it hits zero. Sets alloc to nullptr. */
326 void decref(bool device_only = false) {
327 if (owns_host_memory() && !device_only) {
328 int new_count = --(alloc->ref_count);
329 if (new_count == 0) {
330 void (*fn)(void *) = alloc->deallocate_fn;
331 alloc->~AllocationHeader();
332 fn(alloc);
333 }
334 buf.host = nullptr;
335 alloc = nullptr;
336 set_host_dirty(false);
337 }
338 int new_count = 0;
339 if (dev_ref_count) {
340 new_count = --(dev_ref_count->count);
341 }
342 if (new_count == 0) {
343 if (buf.device) {
344 assert(!(alloc && device_dirty()) &&
345 "Implicitly freeing a dirty device allocation while a host allocation still lives. "
346 "Call device_free explicitly if you want to drop dirty device-side data. "
347 "Call copy_to_host explicitly if you want the data copied to the host allocation "
348 "before the device allocation is freed.");
349 int result = halide_error_code_success;
350 if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::WrappedNative) {
351 result = buf.device_interface->detach_native(nullptr, &buf);
352 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::AllocatedDeviceAndHost) {
353 result = buf.device_interface->device_and_host_free(nullptr, &buf);
354 } else if (dev_ref_count && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
355 result = buf.device_interface->device_release_crop(nullptr, &buf);
356 } else if (dev_ref_count == nullptr || dev_ref_count->ownership == BufferDeviceOwnership::Allocated) {
357 result = buf.device_interface->device_free(nullptr, &buf);
358 }
359 // No reasonable way to return the error, but we can at least assert-fail in debug builds.
360 assert((result == halide_error_code_success) && "device_interface call returned a nonzero result in Buffer::decref()");
361 (void)result;
362 }
363 if (dev_ref_count) {
364 if (dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
365 delete (DevRefCountCropped *)dev_ref_count;
366 } else {
367 delete dev_ref_count;
368 }
369 }
370 }
371 dev_ref_count = nullptr;
372 buf.device = 0;
373 buf.device_interface = nullptr;
374 }
375
376 void free_shape_storage() {
377 if (buf.dim != shape) {
378 delete[] buf.dim;
379 buf.dim = nullptr;
380 }
381 }
382
383 template<int DimsSpecified>
384 void make_static_shape_storage() {
385 static_assert(Dims == AnyDims || Dims == DimsSpecified,
386 "Number of arguments to Buffer() does not match static dimensionality");
388 if constexpr (Dims == AnyDims) {
389 if constexpr (DimsSpecified <= InClassDimStorage) {
390 buf.dim = shape;
391 } else {
392 static_assert(DimsSpecified >= 1);
394 }
395 } else {
396 static_assert(InClassDimStorage >= Dims);
397 buf.dim = shape;
398 }
399 }
400
401 void make_shape_storage(const int dimensions) {
402 if (Dims != AnyDims && Dims != dimensions) {
403 assert(false && "Number of arguments to Buffer() does not match static dimensionality");
404 }
405 // This should usually be inlined, so if dimensions is statically known,
406 // we can skip the call to new
407 buf.dimensions = dimensions;
408 buf.dim = (dimensions <= InClassDimStorage) ? shape : new halide_dimension_t[dimensions];
409 }
410
411 void copy_shape_from(const halide_buffer_t &other) {
412 // All callers of this ensure that buf.dimensions == other.dimensions.
413 make_shape_storage(other.dimensions);
414 std::copy(other.dim, other.dim + other.dimensions, buf.dim);
415 }
416
417 template<typename T2, int D2, int S2>
418 void move_shape_from(Buffer<T2, D2, S2> &&other) {
419 if (other.shape == other.buf.dim) {
420 copy_shape_from(other.buf);
421 } else {
422 buf.dim = other.buf.dim;
423 other.buf.dim = nullptr;
424 }
425 }
426
427 /** Initialize the shape from a halide_buffer_t. */
428 void initialize_from_buffer(const halide_buffer_t &b,
429 BufferDeviceOwnership ownership) {
430 memcpy(&buf, &b, sizeof(halide_buffer_t));
431 copy_shape_from(b);
432 if (b.device) {
433 dev_ref_count = new DeviceRefCount;
434 dev_ref_count->ownership = ownership;
435 }
436 }
437
438 /** Initialize the shape from an array of ints */
439 void initialize_shape(const int *sizes) {
440 for (int i = 0; i < buf.dimensions; i++) {
441 buf.dim[i].min = 0;
442 buf.dim[i].extent = sizes[i];
443 if (i == 0) {
444 buf.dim[i].stride = 1;
445 } else {
446 buf.dim[i].stride = buf.dim[i - 1].stride * buf.dim[i - 1].extent;
447 }
448 }
449 }
450
451 /** Initialize the shape from a vector of extents */
452 void initialize_shape(const std::vector<int> &sizes) {
453 assert(buf.dimensions == (int)sizes.size());
454 initialize_shape(sizes.data());
455 }
456
457 /** Initialize the shape from the static shape of an array */
458 template<typename Array, size_t N>
459 void initialize_shape_from_array_shape(int next, Array (&vals)[N]) {
460 buf.dim[next].min = 0;
461 buf.dim[next].extent = (int)N;
462 if (next == 0) {
463 buf.dim[next].stride = 1;
464 } else {
465 initialize_shape_from_array_shape(next - 1, vals[0]);
466 buf.dim[next].stride = buf.dim[next - 1].stride * buf.dim[next - 1].extent;
467 }
468 }
469
470 /** Base case for the template recursion above. */
471 template<typename T2>
472 void initialize_shape_from_array_shape(int, const T2 &) {
473 }
474
475 /** Get the dimensionality of a multi-dimensional C array */
476 template<typename Array, size_t N>
477 static int dimensionality_of_array(Array (&vals)[N]) {
478 return dimensionality_of_array(vals[0]) + 1;
479 }
480
481 template<typename T2>
482 static int dimensionality_of_array(const T2 &) {
483 return 0;
484 }
485
486 /** Get the underlying halide_type_t of an array's element type. */
487 template<typename Array, size_t N>
488 static halide_type_t scalar_type_of_array(Array (&vals)[N]) {
489 return scalar_type_of_array(vals[0]);
490 }
491
492 template<typename T2>
493 static halide_type_t scalar_type_of_array(const T2 &) {
495 }
496
497 /** Crop a single dimension without handling device allocation. */
498 void crop_host(int d, int min, int extent) {
499 assert(dim(d).min() <= min);
500 assert(dim(d).max() >= min + extent - 1);
501 ptrdiff_t shift = min - dim(d).min();
502 if (buf.host != nullptr) {
503 buf.host += (shift * dim(d).stride()) * type().bytes();
504 }
505 buf.dim[d].min = min;
506 buf.dim[d].extent = extent;
507 }
508
509 /** Crop as many dimensions as are in rect, without handling device allocation. */
510 void crop_host(const std::vector<std::pair<int, int>> &rect) {
511 assert(rect.size() <= static_cast<decltype(rect.size())>(std::numeric_limits<int>::max()));
512 int limit = (int)rect.size();
513 assert(limit <= dimensions());
514 for (int i = 0; i < limit; i++) {
515 crop_host(i, rect[i].first, rect[i].second);
516 }
517 }
518
519 void complete_device_crop(Buffer<T, Dims, InClassDimStorage> &result_host_cropped) const {
520 assert(buf.device_interface != nullptr);
522 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
523 // is it possible to get to this point without incref having run at least once since
524 // the device field was set? (I.e. in the internal logic of crop. incref might have been
525 // called.)
526 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
527 result_host_cropped.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
528 } else {
529 result_host_cropped.crop_from(*this);
530 }
531 }
532 }
533
534 /** slice a single dimension without handling device allocation. */
535 void slice_host(int d, int pos) {
536 static_assert(Dims == AnyDims);
537 assert(dimensions() > 0);
538 assert(d >= 0 && d < dimensions());
539 assert(pos >= dim(d).min() && pos <= dim(d).max());
540 buf.dimensions--;
541 ptrdiff_t shift = pos - buf.dim[d].min;
542 if (buf.host != nullptr) {
543 buf.host += (shift * buf.dim[d].stride) * type().bytes();
544 }
545 for (int i = d; i < buf.dimensions; i++) {
546 buf.dim[i] = buf.dim[i + 1];
547 }
548 buf.dim[buf.dimensions] = {0, 0, 0};
549 }
550
551 void complete_device_slice(Buffer<T, AnyDims, InClassDimStorage> &result_host_sliced, int d, int pos) const {
552 assert(buf.device_interface != nullptr);
554 // TODO: Figure out what to do if dev_ref_count is nullptr. Should incref logic run here?
555 // is it possible to get to this point without incref having run at least once since
556 // the device field was set? (I.e. in the internal logic of slice. incref might have been
557 // called.)
558 if (dev_ref_count != nullptr && dev_ref_count->ownership == BufferDeviceOwnership::Cropped) {
559 // crop_from() is correct here, despite the fact that we are slicing.
560 result_host_sliced.crop_from(((DevRefCountCropped *)dev_ref_count)->cropped_from);
561 } else {
562 // crop_from() is correct here, despite the fact that we are slicing.
563 result_host_sliced.crop_from(*this);
564 }
565 }
566 }
567
568public:
569 typedef T ElemType;
570
571 /** Read-only access to the shape */
572 class Dimension {
573 const halide_dimension_t &d;
574
575 public:
576 /** The lowest coordinate in this dimension */
578 return d.min;
579 }
580
581 /** The number of elements in memory you have to step over to
582 * increment this coordinate by one. */
584 return d.stride;
585 }
586
587 /** The extent of the image along this dimension */
589 return d.extent;
590 }
591
592 /** The highest coordinate in this dimension */
594 return min() + extent() - 1;
595 }
596
597 /** An iterator class, so that you can iterate over
598 * coordinates in a dimensions using a range-based for loop. */
599 struct iterator {
600 int val;
601 int operator*() const {
602 return val;
603 }
604 bool operator!=(const iterator &other) const {
605 return val != other.val;
606 }
608 val++;
609 return *this;
610 }
611 };
612
613 /** An iterator that points to the min coordinate */
615 return {min()};
616 }
617
618 /** An iterator that points to one past the max coordinate */
620 return {min() + extent()};
621 }
622
623 explicit Dimension(const halide_dimension_t &dim)
624 : d(dim) {
625 }
626 };
627
628 /** Access the shape of the buffer */
630 assert(i >= 0 && i < this->dimensions());
631 return Dimension(buf.dim[i]);
632 }
633
634 /** Access to the mins, strides, extents. Will be deprecated. Do not use. */
635 // @{
636 int min(int i) const {
637 return dim(i).min();
638 }
639 int extent(int i) const {
640 return dim(i).extent();
641 }
642 int stride(int i) const {
643 return dim(i).stride();
644 }
645 // @}
646
647 /** The total number of elements this buffer represents. Equal to
648 * the product of the extents */
649 size_t number_of_elements() const {
650 return buf.number_of_elements();
651 }
652
653 /** Get the dimensionality of the buffer. */
654 int dimensions() const {
655 if constexpr (has_static_dimensions) {
656 return Dims;
657 } else {
658 return buf.dimensions;
659 }
660 }
661
662 /** Get the type of the elements. */
664 return buf.type;
665 }
666
667 /** A pointer to the element with the lowest address. If all
668 * strides are positive, equal to the host pointer. */
669 T *begin() const {
670 assert(buf.host != nullptr); // Cannot call begin() on an unallocated Buffer.
671 return (T *)buf.begin();
672 }
673
674 /** A pointer to one beyond the element with the highest address. */
675 T *end() const {
676 assert(buf.host != nullptr); // Cannot call end() on an unallocated Buffer.
677 return (T *)buf.end();
678 }
679
680 /** The total number of bytes spanned by the data in memory. */
681 size_t size_in_bytes() const {
682 return buf.size_in_bytes();
683 }
684
685 /** Reset the Buffer to be equivalent to a default-constructed Buffer
686 * of the same static type (if any); Buffer<void> will have its runtime
687 * type reset to uint8. */
688 void reset() {
689 *this = Buffer();
690 }
691
693 : shape() {
694 buf.type = static_halide_type();
695 // If Dims are statically known, must create storage that many.
696 // otherwise, make a zero-dimensional buffer.
697 constexpr int buf_dimensions = (Dims == AnyDims) ? 0 : Dims;
699 }
700
701 /** Make a Buffer from a halide_buffer_t */
702 explicit Buffer(const halide_buffer_t &buf,
704 assert(T_is_void || buf.type == static_halide_type());
705 initialize_from_buffer(buf, ownership);
706 }
707
708 /** Give Buffers access to the members of Buffers of different dimensionalities and types. */
709 template<typename T2, int D2, int S2>
710 friend class Buffer;
711
712private:
713 template<typename T2, int D2, int S2>
714 static void static_assert_can_convert_from() {
715 static_assert((!std::is_const<T2>::value || std::is_const<T>::value),
716 "Can't convert from a Buffer<const T> to a Buffer<T>");
717 static_assert(std::is_same<typename std::remove_const<T>::type,
718 typename std::remove_const<T2>::type>::value ||
720 "type mismatch constructing Buffer");
721 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2,
722 "Can't convert from a Buffer with static dimensionality to a Buffer with different static dimensionality");
723 }
724
725public:
729 static void set_default_deallocate_fn(void (*deallocate_fn)(void *)) {
731 }
732
733 /** Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
734 * If this can be determined at compile time, fail with a static assert; otherwise
735 * return a boolean based on runtime typing. */
736 template<typename T2, int D2, int S2>
739 if (Buffer<T2, D2, S2>::T_is_void && !T_is_void) {
740 if (other.type() != static_halide_type()) {
741 return false;
742 }
743 }
744 if (Dims != AnyDims) {
745 if (other.dimensions() != Dims) {
746 return false;
747 }
748 }
749 return true;
750 }
751
752 /** Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage>
753 * cannot be constructed from some other Buffer type. */
754 template<typename T2, int D2, int S2>
756 // Explicitly call static_assert_can_convert_from() here so
757 // that we always get compile-time checking, even if compiling with
758 // assertions disabled.
760 assert(can_convert_from(other));
761 }
762
763 /** Copy constructor. Does not copy underlying data. */
765 : buf(other.buf),
766 alloc(other.alloc) {
767 other.incref();
768 dev_ref_count = other.dev_ref_count;
769 copy_shape_from(other.buf);
770 }
771
772 /** Construct a Buffer from a Buffer of different dimensionality
773 * and type. Asserts that the type and dimensionality matches (at runtime,
774 * if one of the types is void). Note that this constructor is
775 * implicit. This, for example, lets you pass things like
776 * Buffer<T> or Buffer<const void> to functions expected
777 * Buffer<const T>. */
778 template<typename T2, int D2, int S2>
780 : buf(other.buf),
781 alloc(other.alloc) {
782 assert_can_convert_from(other);
783 other.incref();
784 dev_ref_count = other.dev_ref_count;
785 copy_shape_from(other.buf);
786 }
787
788 /** Move constructor */
790 : buf(other.buf),
791 alloc(other.alloc),
792 dev_ref_count(other.dev_ref_count) {
793 other.dev_ref_count = nullptr;
794 other.alloc = nullptr;
795 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
796 other.buf = halide_buffer_t();
797 }
798
799 /** Move-construct a Buffer from a Buffer of different
800 * dimensionality and type. Asserts that the types match (at
801 * runtime if one of the types is void). */
802 template<typename T2, int D2, int S2>
804 : buf(other.buf),
805 alloc(other.alloc),
806 dev_ref_count(other.dev_ref_count) {
807 assert_can_convert_from(other);
808 other.dev_ref_count = nullptr;
809 other.alloc = nullptr;
810 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
811 other.buf = halide_buffer_t();
812 }
813
814 /** Assign from another Buffer of possibly-different
815 * dimensionality and type. Asserts that the types match (at
816 * runtime if one of the types is void). */
817 template<typename T2, int D2, int S2>
819 if ((const void *)this == (const void *)&other) {
820 return *this;
821 }
822 assert_can_convert_from(other);
823 other.incref();
824 decref();
825 dev_ref_count = other.dev_ref_count;
826 alloc = other.alloc;
827 free_shape_storage();
828 buf = other.buf;
829 copy_shape_from(other.buf);
830 return *this;
831 }
832
833 /** Standard assignment operator */
835 // The cast to void* here is just to satisfy clang-tidy
836 if ((const void *)this == (const void *)&other) {
837 return *this;
838 }
839 other.incref();
840 decref();
841 dev_ref_count = other.dev_ref_count;
842 alloc = other.alloc;
843 free_shape_storage();
844 buf = other.buf;
845 copy_shape_from(other.buf);
846 return *this;
847 }
848
849 /** Move from another Buffer of possibly-different
850 * dimensionality and type. Asserts that the types match (at
851 * runtime if one of the types is void). */
852 template<typename T2, int D2, int S2>
854 assert_can_convert_from(other);
855 decref();
856 alloc = other.alloc;
857 other.alloc = nullptr;
858 dev_ref_count = other.dev_ref_count;
859 other.dev_ref_count = nullptr;
860 free_shape_storage();
861 buf = other.buf;
862 move_shape_from(std::forward<Buffer<T2, D2, S2>>(other));
863 other.buf = halide_buffer_t();
864 return *this;
865 }
866
867 /** Standard move-assignment operator */
869 decref();
870 alloc = other.alloc;
871 other.alloc = nullptr;
872 dev_ref_count = other.dev_ref_count;
873 other.dev_ref_count = nullptr;
874 free_shape_storage();
875 buf = other.buf;
876 move_shape_from(std::forward<Buffer<T, Dims, InClassDimStorage>>(other));
877 other.buf = halide_buffer_t();
878 return *this;
879 }
880
881 /** Check the product of the extents fits in memory. */
883 size_t size = type().bytes();
884 for (int i = 0; i < dimensions(); i++) {
885 size *= dim(i).extent();
886 }
887 // We allow 2^31 or 2^63 bytes, so drop the top bit.
888 size = (size << 1) >> 1;
889 for (int i = 0; i < dimensions(); i++) {
890 size /= dim(i).extent();
891 }
892 assert(size == (size_t)type().bytes() && "Error: Overflow computing total size of buffer.");
893 }
894
895 /** Allocate memory for this Buffer. Drops the reference to any
896 * owned memory. */
897 void allocate(void *(*allocate_fn)(size_t) = nullptr,
898 void (*deallocate_fn)(void *) = nullptr) {
899 // Drop any existing allocation
900 deallocate();
901
902 // Conservatively align images to (usually) 128 bytes. This is enough
903 // alignment for all the platforms we might use. Also ensure that the allocation
904 // is such that the logical size is an integral multiple of 128 bytes (or a bit more).
905 constexpr size_t alignment = HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT;
906
907 const auto align_up = [=](size_t value) -> size_t {
908 return (value + alignment - 1) & ~(alignment - 1);
909 };
910
911 size_t size = size_in_bytes();
912
913#if HALIDE_RUNTIME_BUFFER_USE_ALIGNED_ALLOC
914 // Only use aligned_alloc() if no custom allocators are specified.
916 // As a practical matter, sizeof(AllocationHeader) is going to be no more than 16 bytes
917 // on any supported platform, so we will just overallocate by 'alignment'
918 // so that the user storage also starts at an aligned point. This is a bit
919 // wasteful, but probably not a big deal.
920 static_assert(sizeof(AllocationHeader) <= alignment);
921 void *alloc_storage = ::aligned_alloc(alignment, align_up(size) + alignment);
923 alloc = new (alloc_storage) AllocationHeader(free);
924 buf.host = (uint8_t *)((uintptr_t)alloc_storage + alignment);
925 return;
926 }
927 // else fall thru
928#endif
929 if (!allocate_fn) {
931 if (!allocate_fn) {
933 }
934 }
935 if (!deallocate_fn) {
937 if (!deallocate_fn) {
938 deallocate_fn = free;
939 }
940 }
941
942 static_assert(sizeof(AllocationHeader) <= alignment);
943
944 // malloc() and friends must return a pointer aligned to at least alignof(std::max_align_t);
945 // make sure this is OK for AllocationHeader, since it always goes at the start
946 static_assert(alignof(AllocationHeader) <= alignof(std::max_align_t));
947
948 const size_t requested_size = align_up(size + alignment +
949 std::max(0, (int)sizeof(AllocationHeader) -
950 (int)sizeof(std::max_align_t)));
952 alloc = new (alloc_storage) AllocationHeader(deallocate_fn);
953 uint8_t *unaligned_ptr = ((uint8_t *)alloc) + sizeof(AllocationHeader);
955 }
956
957 /** Drop reference to any owned host or device memory, possibly
958 * freeing it, if this buffer held the last reference to
959 * it. Retains the shape of the buffer. Does nothing if this
960 * buffer did not allocate its own memory. */
961 void deallocate() {
962 decref();
963 }
964
965 /** Drop reference to any owned device memory, possibly freeing it
966 * if this buffer held the last reference to it. Asserts that
967 * device_dirty is false. */
969 decref(true);
970 }
971
972 /** Allocate a new image of the given size with a runtime
973 * type. Only used when you do know what size you want but you
974 * don't know statically what type the elements are. Pass zeroes
975 * to make a buffer suitable for bounds query calls. */
976 template<typename... Args,
977 typename = typename std::enable_if<AllInts<Args...>::value>::type>
978 Buffer(halide_type_t t, int first, Args... rest) {
979 if (!T_is_void) {
980 assert(static_halide_type() == t);
981 }
982 int extents[] = {first, (int)rest...};
983 buf.type = t;
984 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
986 initialize_shape(extents);
987 if (!Internal::any_zero(extents)) {
988 check_overflow();
989 allocate();
990 }
991 }
992
993 /** Allocate a new image of the given size. Pass zeroes to make a
994 * buffer suitable for bounds query calls. */
995 // @{
996
997 // The overload with one argument is 'explicit', so that
998 // (say) int is not implicitly convertible to Buffer<int>
999 explicit Buffer(int first) {
1000 static_assert(!T_is_void,
1001 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1002 int extents[] = {first};
1003 buf.type = static_halide_type();
1004 constexpr int buf_dimensions = 1;
1006 initialize_shape(extents);
1007 if (first != 0) {
1008 check_overflow();
1009 allocate();
1010 }
1011 }
1012
1013 template<typename... Args,
1014 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1015 Buffer(int first, int second, Args... rest) {
1016 static_assert(!T_is_void,
1017 "To construct an Buffer<void>, pass a halide_type_t as the first argument to the constructor");
1018 int extents[] = {first, second, (int)rest...};
1019 buf.type = static_halide_type();
1020 constexpr int buf_dimensions = 2 + (int)(sizeof...(rest));
1022 initialize_shape(extents);
1023 if (!Internal::any_zero(extents)) {
1024 check_overflow();
1025 allocate();
1026 }
1027 }
1028 // @}
1029
1030 /** Allocate a new image of unknown type using a vector of ints as the size. */
1031 Buffer(halide_type_t t, const std::vector<int> &sizes) {
1032 if (!T_is_void) {
1033 assert(static_halide_type() == t);
1034 }
1035 buf.type = t;
1036 // make_shape_storage() will do a runtime check that dimensionality matches.
1037 make_shape_storage((int)sizes.size());
1038 initialize_shape(sizes);
1039 if (!Internal::any_zero(sizes)) {
1040 check_overflow();
1041 allocate();
1042 }
1043 }
1044
1045 /** Allocate a new image of known type using a vector of ints as the size. */
1046 explicit Buffer(const std::vector<int> &sizes)
1047 : Buffer(static_halide_type(), sizes) {
1048 }
1049
1050private:
1051 // Create a copy of the sizes vector, ordered as specified by order.
1052 static std::vector<int> make_ordered_sizes(const std::vector<int> &sizes, const std::vector<int> &order) {
1053 assert(order.size() == sizes.size());
1054 std::vector<int> ordered_sizes(sizes.size());
1055 for (size_t i = 0; i < sizes.size(); ++i) {
1056 ordered_sizes[i] = sizes.at(order[i]);
1057 }
1058 return ordered_sizes;
1059 }
1060
1061public:
1062 /** Allocate a new image of unknown type using a vector of ints as the size and
1063 * a vector of indices indicating the storage order for each dimension. The
1064 * length of the sizes vector and the storage-order vector must match. For instance,
1065 * to allocate an interleaved RGB buffer, you would pass {2, 0, 1} for storage_order. */
1066 Buffer(halide_type_t t, const std::vector<int> &sizes, const std::vector<int> &storage_order)
1067 : Buffer(t, make_ordered_sizes(sizes, storage_order)) {
1068 transpose(storage_order);
1069 }
1070
1071 Buffer(const std::vector<int> &sizes, const std::vector<int> &storage_order)
1072 : Buffer(static_halide_type(), sizes, storage_order) {
1073 }
1074
1075 /** Make an Buffer that refers to a statically sized array. Does not
1076 * take ownership of the data, and does not set the host_dirty flag. */
1077 template<typename Array, size_t N>
1078 explicit Buffer(Array (&vals)[N]) {
1079 const int buf_dimensions = dimensionality_of_array(vals);
1080 buf.type = scalar_type_of_array(vals);
1081 buf.host = (uint8_t *)vals;
1082 make_shape_storage(buf_dimensions);
1083 initialize_shape_from_array_shape(buf.dimensions - 1, vals);
1084 }
1085
1086 /** Initialize an Buffer of runtime type from a pointer and some
1087 * sizes. Assumes dense row-major packing and a min coordinate of
1088 * zero. Does not take ownership of the data and does not set the
1089 * host_dirty flag. */
1090 template<typename... Args,
1091 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1092 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int first, Args &&...rest) {
1093 if (!T_is_void) {
1094 assert(static_halide_type() == t);
1095 }
1096 int extents[] = {first, (int)rest...};
1097 buf.type = t;
1098 buf.host = (uint8_t *)const_cast<void *>(data);
1099 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1101 initialize_shape(extents);
1102 }
1103
1104 /** Initialize an Buffer from a pointer and some sizes. Assumes
1105 * dense row-major packing and a min coordinate of zero. Does not
1106 * take ownership of the data and does not set the host_dirty flag. */
1107 template<typename... Args,
1108 typename = typename std::enable_if<AllInts<Args...>::value>::type>
1109 explicit Buffer(T *data, int first, Args &&...rest) {
1110 int extents[] = {first, (int)rest...};
1111 buf.type = static_halide_type();
1112 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1113 constexpr int buf_dimensions = 1 + (int)(sizeof...(rest));
1115 initialize_shape(extents);
1116 }
1117
1118 /** Initialize an Buffer from a pointer and a vector of
1119 * sizes. Assumes dense row-major packing and a min coordinate of
1120 * zero. Does not take ownership of the data and does not set the
1121 * host_dirty flag. */
1122 explicit Buffer(T *data, const std::vector<int> &sizes) {
1123 buf.type = static_halide_type();
1124 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1125 make_shape_storage((int)sizes.size());
1126 initialize_shape(sizes);
1127 }
1128
1129 /** Initialize an Buffer of runtime type from a pointer and a
1130 * vector of sizes. Assumes dense row-major packing and a min
1131 * coordinate of zero. Does not take ownership of the data and
1132 * does not set the host_dirty flag. */
1133 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, const std::vector<int> &sizes) {
1134 if (!T_is_void) {
1135 assert(static_halide_type() == t);
1136 }
1137 buf.type = t;
1138 buf.host = (uint8_t *)const_cast<void *>(data);
1139 make_shape_storage((int)sizes.size());
1140 initialize_shape(sizes);
1141 }
1142
1143 /** Initialize an Buffer from a pointer to the min coordinate and
1144 * an array describing the shape. Does not take ownership of the
1145 * data, and does not set the host_dirty flag. */
1146 explicit Buffer(halide_type_t t, add_const_if_T_is_const<void> *data, int d, const halide_dimension_t *shape) {
1147 if (!T_is_void) {
1148 assert(static_halide_type() == t);
1149 }
1150 buf.type = t;
1151 buf.host = (uint8_t *)const_cast<void *>(data);
1152 make_shape_storage(d);
1153 for (int i = 0; i < d; i++) {
1154 buf.dim[i] = shape[i];
1155 }
1156 }
1157
1158 /** Initialize a Buffer from a pointer to the min coordinate and
1159 * a vector describing the shape. Does not take ownership of the
1160 * data, and does not set the host_dirty flag. */
1161 explicit inline Buffer(halide_type_t t, add_const_if_T_is_const<void> *data,
1162 const std::vector<halide_dimension_t> &shape)
1163 : Buffer(t, data, (int)shape.size(), shape.data()) {
1164 }
1165
1166 /** Initialize an Buffer from a pointer to the min coordinate and
1167 * an array describing the shape. Does not take ownership of the
1168 * data and does not set the host_dirty flag. */
1169 explicit Buffer(T *data, int d, const halide_dimension_t *shape) {
1170 buf.type = static_halide_type();
1171 buf.host = (uint8_t *)const_cast<typename std::remove_const<T>::type *>(data);
1172 make_shape_storage(d);
1173 for (int i = 0; i < d; i++) {
1174 buf.dim[i] = shape[i];
1175 }
1176 }
1177
1178 /** Initialize a Buffer from a pointer to the min coordinate and
1179 * a vector describing the shape. Does not take ownership of the
1180 * data, and does not set the host_dirty flag. */
1181 explicit inline Buffer(T *data, const std::vector<halide_dimension_t> &shape)
1182 : Buffer(data, (int)shape.size(), shape.data()) {
1183 }
1184
1185 /** Destructor. Will release any underlying owned allocation if
1186 * this is the last reference to it. Will assert fail if there are
1187 * weak references to this Buffer outstanding. */
1189 decref();
1190 free_shape_storage();
1191 }
1192
1193 /** Get a pointer to the raw halide_buffer_t this wraps. */
1194 // @{
1196 return &buf;
1197 }
1198
1200 return &buf;
1201 }
1202 // @}
1203
1204 /** Provide a cast operator to halide_buffer_t *, so that
1205 * instances can be passed directly to Halide filters. */
1206 operator halide_buffer_t *() {
1207 return &buf;
1208 }
1209
1210 /** Return a typed reference to this Buffer. Useful for converting
1211 * a reference to a Buffer<void> to a reference to, for example, a
1212 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1213 * You can also optionally sspecify a new value for Dims; this is useful
1214 * mainly for removing the dimensionality constraint on a Buffer with
1215 * explicit dimensionality. Does a runtime assert if the source buffer type
1216 * is void or the new dimensionality is incompatible. */
1217 template<typename T2, int D2 = Dims>
1222
1223 /** Return a const typed reference to this Buffer. Useful for converting
1224 * a reference to a Buffer<void> to a reference to, for example, a
1225 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1226 * You can also optionally sspecify a new value for Dims; this is useful
1227 * mainly for removing the dimensionality constraint on a Buffer with
1228 * explicit dimensionality. Does a runtime assert if the source buffer type
1229 * is void or the new dimensionality is incompatible. */
1230 template<typename T2, int D2 = Dims>
1235
1236 /** Return an rval reference to this Buffer. Useful for converting
1237 * a reference to a Buffer<void> to a reference to, for example, a
1238 * Buffer<const uint8_t>, or converting a Buffer<T>& to Buffer<const T>&.
1239 * You can also optionally sspecify a new value for Dims; this is useful
1240 * mainly for removing the dimensionality constraint on a Buffer with
1241 * explicit dimensionality. Does a runtime assert if the source buffer type
1242 * is void or the new dimensionality is incompatible. */
1243 template<typename T2, int D2 = Dims>
1248
1249 /** as_const() is syntactic sugar for .as<const T>(), to avoid the need
1250 * to recapitulate the type argument. */
1251 // @{
1254 // Note that we can skip the assert_can_convert_from(), since T -> const T
1255 // conversion is always legal.
1257 }
1258
1263
1268 // @}
1269
1270 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<const T>& when
1271 * passing arguments */
1274 return as_const();
1275 }
1276
1277 /** Add some syntactic sugar to allow autoconversion from Buffer<T> to Buffer<void>& when
1278 * passing arguments */
1279 template<typename TVoid,
1280 typename T2 = T,
1281 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1282 !std::is_void<T2>::value &&
1283 !std::is_const<T2>::value>::type>
1285 return as<TVoid, Dims>();
1286 }
1287
1288 /** Add some syntactic sugar to allow autoconversion from Buffer<const T> to Buffer<const void>& when
1289 * passing arguments */
1290 template<typename TVoid,
1291 typename T2 = T,
1292 typename = typename std::enable_if<std::is_same<TVoid, void>::value &&
1293 !std::is_void<T2>::value &&
1294 std::is_const<T2>::value>::type>
1298
1299 /** Conventional names for the first three dimensions. */
1300 // @{
1301 int width() const {
1302 return (dimensions() > 0) ? dim(0).extent() : 1;
1303 }
1304 int height() const {
1305 return (dimensions() > 1) ? dim(1).extent() : 1;
1306 }
1307 int channels() const {
1308 return (dimensions() > 2) ? dim(2).extent() : 1;
1309 }
1310 // @}
1311
1312 /** Conventional names for the min and max value of each dimension */
1313 // @{
1314 int left() const {
1315 return dim(0).min();
1316 }
1317
1318 int right() const {
1319 return dim(0).max();
1320 }
1321
1322 int top() const {
1323 return dim(1).min();
1324 }
1325
1326 int bottom() const {
1327 return dim(1).max();
1328 }
1329 // @}
1330
1331 /** Make a new image which is a deep copy of this image. Use crop
1332 * or slice followed by copy to make a copy of only a portion of
1333 * the image. The new image uses the same memory layout as the
1334 * original, with holes compacted away. Note that the returned
1335 * Buffer is always of a non-const type T (ie:
1336 *
1337 * Buffer<const T>.copy() -> Buffer<T> rather than Buffer<const T>
1338 *
1339 * which is always safe, since we are making a deep copy. (The caller
1340 * can easily cast it back to Buffer<const T> if desired, which is
1341 * always safe and free.)
1342 */
1344 void (*deallocate_fn)(void *) = nullptr) const {
1346 dst.copy_from(*this);
1347 return dst;
1348 }
1349
1350 /** Like copy(), but the copy is created in interleaved memory layout
1351 * (vs. keeping the same memory layout as the original). Requires that 'this'
1352 * has exactly 3 dimensions.
1353 */
1355 void (*deallocate_fn)(void *) = nullptr) const {
1356 static_assert(Dims == AnyDims || Dims == 3);
1357 assert(dimensions() == 3);
1359 dst.set_min(min(0), min(1), min(2));
1360 dst.allocate(allocate_fn, deallocate_fn);
1361 dst.copy_from(*this);
1362 return dst;
1363 }
1364
1365 /** Like copy(), but the copy is created in planar memory layout
1366 * (vs. keeping the same memory layout as the original).
1367 */
1369 void (*deallocate_fn)(void *) = nullptr) const {
1370 std::vector<int> mins, extents;
1371 const int dims = dimensions();
1372 mins.reserve(dims);
1373 extents.reserve(dims);
1374 for (int d = 0; d < dims; ++d) {
1375 mins.push_back(dim(d).min());
1376 extents.push_back(dim(d).extent());
1377 }
1379 dst.set_min(mins);
1380 dst.allocate(allocate_fn, deallocate_fn);
1381 dst.copy_from(*this);
1382 return dst;
1383 }
1384
1385 /** Make a copy of the Buffer which shares the underlying host and/or device
1386 * allocations as the existing Buffer. This is purely syntactic sugar for
1387 * cases where you have a const reference to a Buffer but need a temporary
1388 * non-const copy (e.g. to make a call into AOT-generated Halide code), and want a terse
1389 * inline way to create a temporary. \code
1390 * void call_my_func(const Buffer<const uint8_t>& input) {
1391 * my_func(input.alias(), output);
1392 * }\endcode
1393 */
1395 return *this;
1396 }
1397
1398 /** Fill a Buffer with the values at the same coordinates in
1399 * another Buffer. Restricts itself to coordinates contained
1400 * within the intersection of the two buffers. If the two Buffers
1401 * are not in the same coordinate system, you will need to
1402 * translate the argument Buffer first. E.g. if you're blitting a
1403 * sprite onto a framebuffer, you'll want to translate the sprite
1404 * to the correct location first like so: \code
1405 * framebuffer.copy_from(sprite.translated({x, y})); \endcode
1406 */
1407 template<typename T2, int D2, int S2>
1409 static_assert(!std::is_const<T>::value, "Cannot call copy_from() on a Buffer<const T>");
1410 assert(!device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty destination.");
1411 assert(!src.device_dirty() && "Cannot call Halide::Runtime::Buffer::copy_from on a device dirty source.");
1412
1414
1415 static_assert(Dims == AnyDims || D2 == AnyDims || Dims == D2);
1416 assert(src.dimensions() == dst.dimensions());
1417
1418 // Trim the copy to the region in common
1419 const int d = dimensions();
1420 for (int i = 0; i < d; i++) {
1421 int min_coord = std::max(dst.dim(i).min(), src.dim(i).min());
1422 int max_coord = std::min(dst.dim(i).max(), src.dim(i).max());
1423 if (max_coord < min_coord) {
1424 // The buffers do not overlap.
1425 return;
1426 }
1427 dst.crop(i, min_coord, max_coord - min_coord + 1);
1428 src.crop(i, min_coord, max_coord - min_coord + 1);
1429 }
1430
1431 // If T is void, we need to do runtime dispatch to an
1432 // appropriately-typed lambda. We're copying, so we only care
1433 // about the element size. (If not, this should optimize away
1434 // into a static dispatch to the right-sized copy.)
1435 if (T_is_void ? (type().bytes() == 1) : (sizeof(not_void_T) == 1)) {
1436 using MemType = uint8_t;
1439 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1440 } else if (T_is_void ? (type().bytes() == 2) : (sizeof(not_void_T) == 2)) {
1441 using MemType = uint16_t;
1444 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1445 } else if (T_is_void ? (type().bytes() == 4) : (sizeof(not_void_T) == 4)) {
1446 using MemType = uint32_t;
1449 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1450 } else if (T_is_void ? (type().bytes() == 8) : (sizeof(not_void_T) == 8)) {
1451 using MemType = uint64_t;
1454 typed_dst.for_each_value([&](MemType &dst, MemType src) { dst = src; }, typed_src);
1455 } else {
1456 assert(false && "type().bytes() must be 1, 2, 4, or 8");
1457 }
1458 set_host_dirty();
1459 }
1460
1461 /** Make an image that refers to a sub-range of this image along
1462 * the given dimension. Asserts that the crop region is within
1463 * the existing bounds: you cannot "crop outwards", even if you know there
1464 * is valid Buffer storage (e.g. because you already cropped inwards). */
1465 Buffer<T, Dims, InClassDimStorage> cropped(int d, int min, int extent) const {
1466 // Make a fresh copy of the underlying buffer (but not a fresh
1467 // copy of the allocation, if there is one).
1469
1470 // This guarantees the prexisting device ref is dropped if the
1471 // device_crop call fails and maintains the buffer in a consistent
1472 // state.
1473 im.device_deallocate();
1474
1475 im.crop_host(d, min, extent);
1476 if (buf.device_interface != nullptr) {
1477 complete_device_crop(im);
1478 }
1479 return im;
1480 }
1481
1482 /** Crop an image in-place along the given dimension. This does
1483 * not move any data around in memory - it just changes the min
1484 * and extent of the given dimension. */
1485 void crop(int d, int min, int extent) {
1486 // An optimization for non-device buffers. For the device case,
1487 // a temp buffer is required, so reuse the not-in-place version.
1488 // TODO(zalman|abadams): Are nop crops common enough to special
1489 // case the device part of the if to do nothing?
1490 if (buf.device_interface != nullptr) {
1491 *this = cropped(d, min, extent);
1492 } else {
1493 crop_host(d, min, extent);
1494 }
1495 }
1496
1497 /** Make an image that refers to a sub-rectangle of this image along
1498 * the first N dimensions. Asserts that the crop region is within
1499 * the existing bounds. The cropped image may drop any device handle
1500 * if the device_interface cannot accomplish the crop in-place. */
1501 Buffer<T, Dims, InClassDimStorage> cropped(const std::vector<std::pair<int, int>> &rect) const {
1502 // Make a fresh copy of the underlying buffer (but not a fresh
1503 // copy of the allocation, if there is one).
1505
1506 // This guarantees the prexisting device ref is dropped if the
1507 // device_crop call fails and maintains the buffer in a consistent
1508 // state.
1509 im.device_deallocate();
1510
1511 im.crop_host(rect);
1512 if (buf.device_interface != nullptr) {
1513 complete_device_crop(im);
1514 }
1515 return im;
1516 }
1517
1518 /** Crop an image in-place along the first N dimensions. This does
1519 * not move any data around in memory, nor does it free memory. It
1520 * just rewrites the min/extent of each dimension to refer to a
1521 * subregion of the same allocation. */
1522 void crop(const std::vector<std::pair<int, int>> &rect) {
1523 // An optimization for non-device buffers. For the device case,
1524 // a temp buffer is required, so reuse the not-in-place version.
1525 // TODO(zalman|abadams): Are nop crops common enough to special
1526 // case the device part of the if to do nothing?
1527 if (buf.device_interface != nullptr) {
1528 *this = cropped(rect);
1529 } else {
1530 crop_host(rect);
1531 }
1532 }
1533
1534 /** Make an image which refers to the same data with using
1535 * translated coordinates in the given dimension. Positive values
1536 * move the image data to the right or down relative to the
1537 * coordinate system. Drops any device handle. */
1540 im.translate(d, dx);
1541 return im;
1542 }
1543
1544 /** Translate an image in-place along one dimension by changing
1545 * how it is indexed. Does not move any data around in memory. */
1546 void translate(int d, int delta) {
1547 assert(d >= 0 && d < this->dimensions());
1548 device_deallocate();
1549 buf.dim[d].min += delta;
1550 }
1551
1552 /** Make an image which refers to the same data translated along
1553 * the first N dimensions. */
1554 Buffer<T, Dims, InClassDimStorage> translated(const std::vector<int> &delta) const {
1556 im.translate(delta);
1557 return im;
1558 }
1559
1560 /** Translate an image along the first N dimensions by changing
1561 * how it is indexed. Does not move any data around in memory. */
1562 void translate(const std::vector<int> &delta) {
1563 device_deallocate();
1564 assert(delta.size() <= static_cast<decltype(delta.size())>(std::numeric_limits<int>::max()));
1565 int limit = (int)delta.size();
1566 assert(limit <= dimensions());
1567 for (int i = 0; i < limit; i++) {
1568 translate(i, delta[i]);
1569 }
1570 }
1571
1572 /** Set the min coordinate of an image in the first N dimensions. */
1573 // @{
1574 void set_min(const std::vector<int> &mins) {
1575 assert(mins.size() <= static_cast<decltype(mins.size())>(dimensions()));
1576 device_deallocate();
1577 for (size_t i = 0; i < mins.size(); i++) {
1578 buf.dim[i].min = mins[i];
1579 }
1580 }
1581
1582 template<typename... Args>
1583 void set_min(Args... args) {
1584 set_min(std::vector<int>{args...});
1585 }
1586 // @}
1587
1588 /** Test if a given coordinate is within the bounds of an image. */
1589 // @{
1590 bool contains(const std::vector<int> &coords) const {
1591 assert(coords.size() <= static_cast<decltype(coords.size())>(dimensions()));
1592 for (size_t i = 0; i < coords.size(); i++) {
1593 if (coords[i] < dim((int)i).min() || coords[i] > dim((int)i).max()) {
1594 return false;
1595 }
1596 }
1597 return true;
1598 }
1599
1600 template<typename... Args>
1601 bool contains(Args... args) const {
1602 return contains(std::vector<int>{args...});
1603 }
1604 // @}
1605
1606 /** Make a buffer which refers to the same data in the same layout
1607 * using a swapped indexing order for the dimensions given. So
1608 * A = B.transposed(0, 1) means that A(i, j) == B(j, i), and more
1609 * strongly that A.address_of(i, j) == B.address_of(j, i). */
1612 im.transpose(d1, d2);
1613 return im;
1614 }
1615
1616 /** Transpose a buffer in-place by changing how it is indexed. For
1617 * example, transpose(0, 1) on a two-dimensional buffer means that
1618 * the value referred to by coordinates (i, j) is now reached at
1619 * the coordinates (j, i), and vice versa. This is done by
1620 * reordering the per-dimension metadata rather than by moving
1621 * data around in memory, so other views of the same memory will
1622 * not see the data as having been transposed. */
1623 void transpose(int d1, int d2) {
1624 assert(d1 >= 0 && d1 < this->dimensions());
1625 assert(d2 >= 0 && d2 < this->dimensions());
1626 std::swap(buf.dim[d1], buf.dim[d2]);
1627 }
1628
1629 /** A generalized transpose: instead of swapping two dimensions,
1630 * pass a vector that lists each dimension index exactly once, in
1631 * the desired order. This does not move any data around in memory
1632 * - it just permutes how it is indexed. */
1633 void transpose(const std::vector<int> &order) {
1634 assert((int)order.size() == dimensions());
1635 if (dimensions() < 2) {
1636 // My, that was easy
1637 return;
1638 }
1639
1640 std::vector<int> order_sorted = order;
1641 for (size_t i = 1; i < order_sorted.size(); i++) {
1642 for (size_t j = i; j > 0 && order_sorted[j - 1] > order_sorted[j]; j--) {
1643 std::swap(order_sorted[j], order_sorted[j - 1]);
1644 transpose(j, j - 1);
1645 }
1646 }
1647 }
1648
1649 /** Make a buffer which refers to the same data in the same
1650 * layout using a different ordering of the dimensions. */
1651 Buffer<T, Dims, InClassDimStorage> transposed(const std::vector<int> &order) const {
1653 im.transpose(order);
1654 return im;
1655 }
1656
1657 /** Make a lower-dimensional buffer that refers to one slice of
1658 * this buffer. */
1659 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1660 sliced(int d, int pos) const {
1661 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1662 assert(dimensions() > 0);
1663
1665
1666 // This guarantees the prexisting device ref is dropped if the
1667 // device_slice call fails and maintains the buffer in a consistent
1668 // state.
1669 im.device_deallocate();
1670
1671 im.slice_host(d, pos);
1672 if (buf.device_interface != nullptr) {
1673 complete_device_slice(im, d, pos);
1674 }
1675 return im;
1676 }
1677
1678 /** Make a lower-dimensional buffer that refers to one slice of this
1679 * buffer at the dimension's minimum. */
1680 Buffer<T, (Dims == AnyDims ? AnyDims : Dims - 1)>
1681 sliced(int d) const {
1682 static_assert(Dims == AnyDims || Dims > 0, "Cannot slice a 0-dimensional buffer");
1683 assert(dimensions() > 0);
1684
1685 return sliced(d, dim(d).min());
1686 }
1687
1688 /** Rewrite the buffer to refer to a single lower-dimensional
1689 * slice of itself along the given dimension at the given
1690 * coordinate. Does not move any data around or free the original
1691 * memory, so other views of the same data are unaffected. Can
1692 * only be called on a Buffer with dynamic dimensionality. */
1693 void slice(int d, int pos) {
1694 static_assert(Dims == AnyDims, "Cannot call slice() on a Buffer with static dimensionality.");
1695 assert(dimensions() > 0);
1696
1697 // An optimization for non-device buffers. For the device case,
1698 // a temp buffer is required, so reuse the not-in-place version.
1699 // TODO(zalman|abadams): Are nop slices common enough to special
1700 // case the device part of the if to do nothing?
1701 if (buf.device_interface != nullptr) {
1702 *this = sliced(d, pos);
1703 } else {
1704 slice_host(d, pos);
1705 }
1706 }
1707
1708 /** Slice a buffer in-place at the dimension's minimum. */
1709 inline void slice(int d) {
1710 slice(d, dim(d).min());
1711 }
1712
1713 /** Make a new buffer that views this buffer as a single slice in a
1714 * higher-dimensional space. The new dimension has extent one and
1715 * the given min. This operation is the opposite of slice. As an
1716 * example, the following condition is true:
1717 *
1718 \code
1719 im2 = im.embedded(1, 17);
1720 &im(x, y, c) == &im2(x, 17, y, c);
1721 \endcode
1722 */
1723 Buffer<T, (Dims == AnyDims ? AnyDims : Dims + 1)>
1724 embedded(int d, int pos = 0) const {
1726 im.embed(d, pos);
1727 return im;
1728 }
1729
1730 /** Embed a buffer in-place, increasing the
1731 * dimensionality. */
1732 void embed(int d, int pos = 0) {
1733 static_assert(Dims == AnyDims, "Cannot call embed() on a Buffer with static dimensionality.");
1734 assert(d >= 0 && d <= dimensions());
1735 add_dimension();
1736 translate(dimensions() - 1, pos);
1737 for (int i = dimensions() - 1; i > d; i--) {
1738 transpose(i, i - 1);
1739 }
1740 }
1741
1742 /** Add a new dimension with a min of zero and an extent of
1743 * one. The stride is the extent of the outermost dimension times
1744 * its stride. The new dimension is the last dimension. This is a
1745 * special case of embed. */
1747 static_assert(Dims == AnyDims, "Cannot call add_dimension() on a Buffer with static dimensionality.");
1748 const int dims = buf.dimensions;
1749 buf.dimensions++;
1750 if (buf.dim != shape) {
1751 // We're already on the heap. Reallocate.
1753 for (int i = 0; i < dims; i++) {
1754 new_shape[i] = buf.dim[i];
1755 }
1756 delete[] buf.dim;
1757 buf.dim = new_shape;
1758 } else if (dims == InClassDimStorage) {
1759 // Transition from the in-class storage to the heap
1760 make_shape_storage(buf.dimensions);
1761 for (int i = 0; i < dims; i++) {
1762 buf.dim[i] = shape[i];
1763 }
1764 } else {
1765 // We still fit in the class
1766 }
1767 buf.dim[dims] = {0, 1, 0};
1768 if (dims == 0) {
1769 buf.dim[dims].stride = 1;
1770 } else {
1771 buf.dim[dims].stride = buf.dim[dims - 1].extent * buf.dim[dims - 1].stride;
1772 }
1773 }
1774
1775 /** Add a new dimension with a min of zero, an extent of one, and
1776 * the specified stride. The new dimension is the last
1777 * dimension. This is a special case of embed. */
1779 add_dimension();
1780 buf.dim[buf.dimensions - 1].stride = s;
1781 }
1782
1783 /** Methods for managing any GPU allocation. */
1784 // @{
1785 // Set the host dirty flag. Called by every operator()
1786 // access. Must be inlined so it can be hoisted out of loops.
1788 void set_host_dirty(bool v = true) {
1789 assert((!v || !device_dirty()) && "Cannot set host dirty when device is already dirty. Call copy_to_host() before accessing the buffer from host.");
1790 buf.set_host_dirty(v);
1791 }
1792
1793 // Check if the device allocation is dirty. Called by
1794 // set_host_dirty, which is called by every accessor. Must be
1795 // inlined so it can be hoisted out of loops.
1797 bool device_dirty() const {
1798 return buf.device_dirty();
1799 }
1800
1801 bool host_dirty() const {
1802 return buf.host_dirty();
1803 }
1804
1805 void set_device_dirty(bool v = true) {
1806 assert((!v || !host_dirty()) && "Cannot set device dirty when host is already dirty.");
1807 buf.set_device_dirty(v);
1808 }
1809
1810 int copy_to_host(void *ctx = nullptr) {
1811 if (device_dirty()) {
1812 return buf.device_interface->copy_to_host(ctx, &buf);
1813 }
1815 }
1816
1817 int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1818 if (host_dirty()) {
1819 return device_interface->copy_to_device(ctx, &buf, device_interface);
1820 }
1822 }
1823
1824 int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1825 return device_interface->device_malloc(ctx, &buf, device_interface);
1826 }
1827
1828 int device_free(void *ctx = nullptr) {
1829 if (dev_ref_count) {
1831 "Can't call device_free on an unmanaged or wrapped native device handle. "
1832 "Free the source allocation or call device_detach_native instead.");
1833 // Multiple people may be holding onto this dev field
1834 assert(dev_ref_count->count == 1 &&
1835 "Multiple Halide::Runtime::Buffer objects share this device "
1836 "allocation. Freeing it would create dangling references. "
1837 "Don't call device_free on Halide buffers that you have copied or "
1838 "passed by value.");
1839 }
1841 if (buf.device_interface) {
1842 ret = buf.device_interface->device_free(ctx, &buf);
1843 }
1844 if (dev_ref_count) {
1845 delete dev_ref_count;
1846 dev_ref_count = nullptr;
1847 }
1848 return ret;
1849 }
1850
1851 int device_wrap_native(const struct halide_device_interface_t *device_interface,
1852 uint64_t handle, void *ctx = nullptr) {
1853 assert(device_interface);
1854 dev_ref_count = new DeviceRefCount;
1856 return device_interface->wrap_native(ctx, &buf, handle, device_interface);
1857 }
1858
1859 int device_detach_native(void *ctx = nullptr) {
1860 assert(dev_ref_count &&
1862 "Only call device_detach_native on buffers wrapping a native "
1863 "device handle via device_wrap_native. This buffer was allocated "
1864 "using device_malloc, or is unmanaged. "
1865 "Call device_free or free the original allocation instead.");
1866 // Multiple people may be holding onto this dev field
1867 assert(dev_ref_count->count == 1 &&
1868 "Multiple Halide::Runtime::Buffer objects share this device "
1869 "allocation. Freeing it could create dangling references. "
1870 "Don't call device_detach_native on Halide buffers that you "
1871 "have copied or passed by value.");
1873 if (buf.device_interface) {
1874 ret = buf.device_interface->detach_native(ctx, &buf);
1875 }
1876 delete dev_ref_count;
1877 dev_ref_count = nullptr;
1878 return ret;
1879 }
1880
1881 int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1882 return device_interface->device_and_host_malloc(ctx, &buf, device_interface);
1883 }
1884
1885 int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx = nullptr) {
1886 if (dev_ref_count) {
1888 "Can't call device_and_host_free on a device handle not allocated with device_and_host_malloc. "
1889 "Free the source allocation or call device_detach_native instead.");
1890 // Multiple people may be holding onto this dev field
1891 assert(dev_ref_count->count == 1 &&
1892 "Multiple Halide::Runtime::Buffer objects share this device "
1893 "allocation. Freeing it would create dangling references. "
1894 "Don't call device_and_host_free on Halide buffers that you have copied or "
1895 "passed by value.");
1896 }
1898 if (buf.device_interface) {
1900 }
1901 if (dev_ref_count) {
1902 delete dev_ref_count;
1903 dev_ref_count = nullptr;
1904 }
1905 return ret;
1906 }
1907
1908 int device_sync(void *ctx = nullptr) {
1909 return buf.device_sync(ctx);
1910 }
1911
1913 return buf.device != 0;
1914 }
1915
1916 /** Return the method by which the device field is managed. */
1918 if (dev_ref_count == nullptr) {
1920 }
1921 return dev_ref_count->ownership;
1922 }
1923 // @}
1924
1925 /** If you use the (x, y, c) indexing convention, then Halide
1926 * Buffers are stored planar by default. This function constructs
1927 * an interleaved RGB or RGBA image that can still be indexed
1928 * using (x, y, c). Passing it to a generator requires that the
1929 * generator has been compiled with support for interleaved (also
1930 * known as packed or chunky) memory layouts. */
1931 static Buffer<void, Dims, InClassDimStorage> make_interleaved(halide_type_t t, int width, int height, int channels) {
1932 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1933 Buffer<void, Dims, InClassDimStorage> im(t, channels, width, height);
1934 // Note that this is equivalent to calling transpose({2, 0, 1}),
1935 // but slightly more efficient.
1936 im.transpose(0, 1);
1937 im.transpose(1, 2);
1938 return im;
1939 }
1940
1941 /** If you use the (x, y, c) indexing convention, then Halide
1942 * Buffers are stored planar by default. This function constructs
1943 * an interleaved RGB or RGBA image that can still be indexed
1944 * using (x, y, c). Passing it to a generator requires that the
1945 * generator has been compiled with support for interleaved (also
1946 * known as packed or chunky) memory layouts. */
1947 static Buffer<T, Dims, InClassDimStorage> make_interleaved(int width, int height, int channels) {
1948 return make_interleaved(static_halide_type(), width, height, channels);
1949 }
1950
1951 /** Wrap an existing interleaved image. */
1953 make_interleaved(halide_type_t t, T *data, int width, int height, int channels) {
1954 static_assert(Dims == AnyDims || Dims == 3, "make_interleaved() must be called on a Buffer that can represent 3 dimensions.");
1955 Buffer<add_const_if_T_is_const<void>, Dims, InClassDimStorage> im(t, data, channels, width, height);
1956 im.transpose(0, 1);
1957 im.transpose(1, 2);
1958 return im;
1959 }
1960
1961 /** Wrap an existing interleaved image. */
1962 static Buffer<T, Dims, InClassDimStorage> make_interleaved(T *data, int width, int height, int channels) {
1963 return make_interleaved(static_halide_type(), data, width, height, channels);
1964 }
1965
1966 /** Make a zero-dimensional Buffer */
1968 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1970 buf.slice(0, 0);
1971 return buf;
1972 }
1973
1974 /** Make a zero-dimensional Buffer */
1976 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1978 buf.slice(0, 0);
1979 return buf;
1980 }
1981
1982 /** Make a zero-dimensional Buffer that points to non-owned, existing data */
1984 static_assert(Dims == AnyDims || Dims == 0, "make_scalar() must be called on a Buffer that can represent 0 dimensions.");
1986 buf.slice(0, 0);
1987 return buf;
1988 }
1989
1990 /** Make a buffer with the same shape and memory nesting order as
1991 * another buffer. It may have a different type. */
1992 template<typename T2, int D2, int S2>
1994 void *(*allocate_fn)(size_t) = nullptr,
1995 void (*deallocate_fn)(void *) = nullptr) {
1996 static_assert(Dims == D2 || Dims == AnyDims);
1999 allocate_fn, deallocate_fn);
2000 }
2001
2002private:
2003 static Buffer<> make_with_shape_of_helper(halide_type_t dst_type,
2004 int dimensions,
2005 halide_dimension_t *shape,
2006 void *(*allocate_fn)(size_t),
2007 void (*deallocate_fn)(void *)) {
2008 // Reorder the dimensions of src to have strides in increasing order
2009 std::vector<int> swaps;
2010 for (int i = dimensions - 1; i > 0; i--) {
2011 for (int j = i; j > 0; j--) {
2012 if (shape[j - 1].stride > shape[j].stride) {
2013 std::swap(shape[j - 1], shape[j]);
2014 swaps.push_back(j);
2015 }
2016 }
2017 }
2018
2019 // Rewrite the strides to be dense (this messes up src, which
2020 // is why we took it by value).
2021 for (int i = 0; i < dimensions; i++) {
2022 if (i == 0) {
2023 shape[i].stride = 1;
2024 } else {
2025 shape[i].stride = shape[i - 1].extent * shape[i - 1].stride;
2026 }
2027 }
2028
2029 // Undo the dimension reordering
2030 while (!swaps.empty()) {
2031 int j = swaps.back();
2032 std::swap(shape[j - 1], shape[j]);
2033 swaps.pop_back();
2034 }
2035
2036 // Use an explicit runtime type, and make dst a Buffer<void>, to allow
2037 // using this method with Buffer<void> for either src or dst.
2038 Buffer<> dst(dst_type, nullptr, dimensions, shape);
2039 dst.allocate(allocate_fn, deallocate_fn);
2040
2041 return dst;
2042 }
2043
2044 template<typename... Args>
2046 ptrdiff_t
2047 offset_of(int d, int first, Args... rest) const {
2048#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2049 assert(first >= this->buf.dim[d].min);
2050 assert(first < this->buf.dim[d].min + this->buf.dim[d].extent);
2051#endif
2052 return offset_of(d + 1, rest...) + (ptrdiff_t)this->buf.dim[d].stride * (first - this->buf.dim[d].min);
2053 }
2054
2056 ptrdiff_t offset_of(int d) const {
2057 return 0;
2058 }
2059
2060 template<typename... Args>
2062 storage_T *
2063 address_of(Args... args) const {
2064 if (T_is_void) {
2065 return (storage_T *)(this->buf.host) + offset_of(0, args...) * type().bytes();
2066 } else {
2067 return (storage_T *)(this->buf.host) + offset_of(0, args...);
2068 }
2069 }
2070
2072 ptrdiff_t offset_of(const int *pos) const {
2073 ptrdiff_t offset = 0;
2074 for (int i = this->dimensions() - 1; i >= 0; i--) {
2075#if HALIDE_RUNTIME_BUFFER_CHECK_INDICES
2076 assert(pos[i] >= this->buf.dim[i].min);
2077 assert(pos[i] < this->buf.dim[i].min + this->buf.dim[i].extent);
2078#endif
2079 offset += (ptrdiff_t)this->buf.dim[i].stride * (pos[i] - this->buf.dim[i].min);
2080 }
2081 return offset;
2082 }
2083
2085 storage_T *address_of(const int *pos) const {
2086 if (T_is_void) {
2087 return (storage_T *)this->buf.host + offset_of(pos) * type().bytes();
2088 } else {
2089 return (storage_T *)this->buf.host + offset_of(pos);
2090 }
2091 }
2092
2093public:
2094 /** Get a pointer to the address of the min coordinate. */
2095 T *data() const {
2096 return (T *)(this->buf.host);
2097 }
2098
2099 /** Access elements. Use im(...) to get a reference to an element,
2100 * and use &im(...) to get the address of an element. If you pass
2101 * fewer arguments than the buffer has dimensions, the rest are
2102 * treated as their min coordinate. The non-const versions set the
2103 * host_dirty flag to true.
2104 */
2105 //@{
2106 template<typename... Args,
2107 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2108 HALIDE_ALWAYS_INLINE const not_void_T &operator()(int first, Args... rest) const {
2109 static_assert(!T_is_void,
2110 "Cannot use operator() on Buffer<void> types");
2111 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2112 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2113 assert(!device_dirty());
2114 return *((const not_void_T *)(address_of(first, rest...)));
2115 }
2116
2118 const not_void_T &
2119 operator()() const {
2120 static_assert(!T_is_void,
2121 "Cannot use operator() on Buffer<void> types");
2122 constexpr int expected_dims = 0;
2123 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2124 assert(!device_dirty());
2125 return *((const not_void_T *)(data()));
2126 }
2127
2129 const not_void_T &
2130 operator()(const int *pos) const {
2131 static_assert(!T_is_void,
2132 "Cannot use operator() on Buffer<void> types");
2133 assert(!device_dirty());
2134 return *((const not_void_T *)(address_of(pos)));
2135 }
2136
2137 template<typename... Args,
2138 typename = typename std::enable_if<AllInts<Args...>::value>::type>
2140 not_void_T &
2141 operator()(int first, Args... rest) {
2142 static_assert(!T_is_void,
2143 "Cannot use operator() on Buffer<void> types");
2144 constexpr int expected_dims = 1 + (int)(sizeof...(rest));
2145 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2146 set_host_dirty();
2147 return *((not_void_T *)(address_of(first, rest...)));
2148 }
2149
2151 not_void_T &
2153 static_assert(!T_is_void,
2154 "Cannot use operator() on Buffer<void> types");
2155 constexpr int expected_dims = 0;
2156 static_assert(Dims == AnyDims || Dims == expected_dims, "Buffer with static dimensions was accessed with the wrong number of coordinates in operator()");
2157 set_host_dirty();
2158 return *((not_void_T *)(data()));
2159 }
2160
2162 not_void_T &
2163 operator()(const int *pos) {
2164 static_assert(!T_is_void,
2165 "Cannot use operator() on Buffer<void> types");
2166 set_host_dirty();
2167 return *((not_void_T *)(address_of(pos)));
2168 }
2169 // @}
2170
2171 /** Tests that all values in this buffer are equal to val. */
2172 bool all_equal(not_void_T val) const {
2173 bool all_equal = true;
2174 for_each_element([&](const int *pos) { all_equal &= (*this)(pos) == val; });
2175 return all_equal;
2176 }
2177
2179 set_host_dirty();
2180 for_each_value([=](T &v) { v = val; });
2181 return *this;
2182 }
2183
2184private:
2185 /** Helper functions for for_each_value. */
2186 // @{
2187 template<int N>
2188 struct for_each_value_task_dim {
2189 std::ptrdiff_t extent;
2190 std::ptrdiff_t stride[N];
2191 };
2192
2193 // Given an array of strides, and a bunch of pointers to pointers
2194 // (all of different types), advance the pointers using the
2195 // strides.
2196 template<typename Ptr, typename... Ptrs>
2197 HALIDE_ALWAYS_INLINE static void advance_ptrs(const std::ptrdiff_t *stride, Ptr &ptr, Ptrs &...ptrs) {
2198 ptr += *stride;
2199 advance_ptrs(stride + 1, ptrs...);
2200 }
2201
2203 static void advance_ptrs(const std::ptrdiff_t *) {
2204 }
2205
2206 template<typename Fn, typename Ptr, typename... Ptrs>
2207 HALIDE_NEVER_INLINE static void for_each_value_helper(Fn &&f, int d, bool innermost_strides_are_one,
2208 const for_each_value_task_dim<sizeof...(Ptrs) + 1> *t, Ptr ptr, Ptrs... ptrs) {
2209 if (d == 0) {
2211 Ptr end = ptr + t[0].extent;
2212 while (ptr != end) {
2213 f(*ptr++, (*ptrs++)...);
2214 }
2215 } else {
2216 for (std::ptrdiff_t i = t[0].extent; i != 0; i--) {
2217 f(*ptr, (*ptrs)...);
2218 advance_ptrs(t[0].stride, ptr, ptrs...);
2219 }
2220 }
2221 } else {
2222 for (std::ptrdiff_t i = t[d].extent; i != 0; i--) {
2223 for_each_value_helper(f, d - 1, innermost_strides_are_one, t, ptr, ptrs...);
2224 advance_ptrs(t[d].stride, ptr, ptrs...);
2225 }
2226 }
2227 }
2228
2229 // Return pair is <new_dimensions, innermost_strides_are_one>
2230 template<int N>
2231 HALIDE_NEVER_INLINE static std::pair<int, bool> for_each_value_prep(for_each_value_task_dim<N> *t,
2232 const halide_buffer_t **buffers) {
2233 const int dimensions = buffers[0]->dimensions;
2234 assert(dimensions > 0);
2235
2236 // Check the buffers all have clean host allocations
2237 for (int i = 0; i < N; i++) {
2238 if (buffers[i]->device) {
2239 assert(buffers[i]->host &&
2240 "Buffer passed to for_each_value has device allocation but no host allocation. Call allocate() and copy_to_host() first");
2241 assert(!buffers[i]->device_dirty() &&
2242 "Buffer passed to for_each_value is dirty on device. Call copy_to_host() first");
2243 } else {
2244 assert(buffers[i]->host &&
2245 "Buffer passed to for_each_value has no host or device allocation");
2246 }
2247 }
2248
2249 // Extract the strides in all the dimensions
2250 for (int i = 0; i < dimensions; i++) {
2251 for (int j = 0; j < N; j++) {
2252 assert(buffers[j]->dimensions == dimensions);
2253 assert(buffers[j]->dim[i].extent == buffers[0]->dim[i].extent &&
2254 buffers[j]->dim[i].min == buffers[0]->dim[i].min);
2255 const int s = buffers[j]->dim[i].stride;
2256 t[i].stride[j] = s;
2257 }
2258 t[i].extent = buffers[0]->dim[i].extent;
2259
2260 // Order the dimensions by stride, so that the traversal is cache-coherent.
2261 // Use the last dimension for this, because this is the source in copies.
2262 // It appears to be better to optimize read order than write order.
2263 for (int j = i; j > 0 && t[j].stride[N - 1] < t[j - 1].stride[N - 1]; j--) {
2264 std::swap(t[j], t[j - 1]);
2265 }
2266 }
2267
2268 // flatten dimensions where possible to make a larger inner
2269 // loop for autovectorization.
2270 int d = dimensions;
2271 for (int i = 1; i < d; i++) {
2272 bool flat = true;
2273 for (int j = 0; j < N; j++) {
2274 flat = flat && t[i - 1].stride[j] * t[i - 1].extent == t[i].stride[j];
2275 }
2276 if (flat) {
2277 t[i - 1].extent *= t[i].extent;
2278 for (int j = i; j < d - 1; j++) {
2279 t[j] = t[j + 1];
2280 }
2281 i--;
2282 d--;
2283 }
2284 }
2285
2286 // Note that we assert() that dimensions > 0 above
2287 // (our one-and-only caller will only call us that way)
2288 // so the unchecked access to t[0] should be safe.
2289 bool innermost_strides_are_one = true;
2290 for (int i = 0; i < N; i++) {
2291 innermost_strides_are_one &= (t[0].stride[i] == 1);
2292 }
2293
2294 return {d, innermost_strides_are_one};
2295 }
2296
2297 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2298 void for_each_value_impl(Fn &&f, Args &&...other_buffers) const {
2299 if (dimensions() > 0) {
2300 const size_t alloc_size = dimensions() * sizeof(for_each_value_task_dim<N>);
2303 // Move the preparatory code into a non-templated helper to
2304 // save code size.
2305 const halide_buffer_t *buffers[] = {&buf, (&other_buffers.buf)...};
2307 if (new_dims > 0) {
2310 t,
2311 data(), (other_buffers.data())...);
2312 return;
2313 }
2314 // else fall thru
2315 }
2316
2317 // zero-dimensional case
2318 f(*data(), (*other_buffers.data())...);
2319 }
2320 // @}
2321
2322public:
2323 /** Call a function on every value in the buffer, and the
2324 * corresponding values in some number of other buffers of the
2325 * same size. The function should take a reference, const
2326 * reference, or value of the correct type for each buffer. This
2327 * effectively lifts a function of scalars to an element-wise
2328 * function of buffers. This produces code that the compiler can
2329 * autovectorize. This is slightly cheaper than for_each_element,
2330 * because it does not need to track the coordinates.
2331 *
2332 * Note that constness of Buffers is preserved: a const Buffer<T> (for either
2333 * 'this' or the other-buffers arguments) will allow mutation of the
2334 * buffer contents, while a Buffer<const T> will not. Attempting to specify
2335 * a mutable reference for the lambda argument of a Buffer<const T>
2336 * will result in a compilation error. */
2337 // @{
2338 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2340 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2341 return *this;
2342 }
2343
2344 template<typename Fn, typename... Args, int N = sizeof...(Args) + 1>
2348 for_each_value_impl(f, std::forward<Args>(other_buffers)...);
2349 return *this;
2350 }
2351 // @}
2352
2353private:
2354 // Helper functions for for_each_element
2355 struct for_each_element_task_dim {
2356 int min, max;
2357 };
2358
2359 /** If f is callable with this many args, call it. The first
2360 * argument is just to make the overloads distinct. Actual
2361 * overload selection is done using the enable_if. */
2362 template<typename Fn,
2363 typename... Args,
2364 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2365 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(int, int, const for_each_element_task_dim *, Fn &&f, Args... args) {
2366 f(args...);
2367 }
2368
2369 /** If the above overload is impossible, we add an outer loop over
2370 * an additional argument and try again. */
2371 template<typename Fn,
2372 typename... Args>
2373 HALIDE_ALWAYS_INLINE static void for_each_element_variadic(double, int d, const for_each_element_task_dim *t, Fn &&f, Args... args) {
2374 for (int i = t[d].min; i <= t[d].max; i++) {
2375 for_each_element_variadic(0, d - 1, t, std::forward<Fn>(f), i, args...);
2376 }
2377 }
2378
2379 /** Determine the minimum number of arguments a callable can take
2380 * using the same trick. */
2381 template<typename Fn,
2382 typename... Args,
2383 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2384 HALIDE_ALWAYS_INLINE static int num_args(int, Fn &&, Args...) {
2385 return (int)(sizeof...(Args));
2386 }
2387
2388 /** The recursive version is only enabled up to a recursion limit
2389 * of 256. This catches callables that aren't callable with any
2390 * number of ints. */
2391 template<typename Fn,
2392 typename... Args>
2393 HALIDE_ALWAYS_INLINE static int num_args(double, Fn &&f, Args... args) {
2394 static_assert(sizeof...(args) <= 256,
2395 "Callable passed to for_each_element must accept either a const int *,"
2396 " or up to 256 ints. No such operator found. Expect infinite template recursion.");
2397 return num_args(0, std::forward<Fn>(f), 0, args...);
2398 }
2399
2400 /** A version where the callable takes a position array instead,
2401 * with compile-time recursion on the dimensionality. This
2402 * overload is preferred to the one below using the same int vs
2403 * double trick as above, but is impossible once d hits -1 using
2404 * std::enable_if. */
2405 template<int d,
2406 typename Fn,
2407 typename = typename std::enable_if<(d >= 0)>::type>
2408 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(int, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2409 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2410 for_each_element_array_helper<d - 1>(0, t, std::forward<Fn>(f), pos);
2411 }
2412 }
2413
2414 /** Base case for recursion above. */
2415 template<int d,
2416 typename Fn,
2417 typename = typename std::enable_if<(d < 0)>::type>
2418 HALIDE_ALWAYS_INLINE static void for_each_element_array_helper(double, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2419 f(pos);
2420 }
2421
2422 /** A run-time-recursive version (instead of
2423 * compile-time-recursive) that requires the callable to take a
2424 * pointer to a position array instead. Dispatches to the
2425 * compile-time-recursive version once the dimensionality gets
2426 * small. */
2427 template<typename Fn>
2428 static void for_each_element_array(int d, const for_each_element_task_dim *t, Fn &&f, int *pos) {
2429 if (d == -1) {
2430 f(pos);
2431 } else if (d == 0) {
2432 // Once the dimensionality gets small enough, dispatch to
2433 // a compile-time-recursive version for better codegen of
2434 // the inner loops.
2435 for_each_element_array_helper<0, Fn>(0, t, std::forward<Fn>(f), pos);
2436 } else if (d == 1) {
2437 for_each_element_array_helper<1, Fn>(0, t, std::forward<Fn>(f), pos);
2438 } else if (d == 2) {
2439 for_each_element_array_helper<2, Fn>(0, t, std::forward<Fn>(f), pos);
2440 } else if (d == 3) {
2441 for_each_element_array_helper<3, Fn>(0, t, std::forward<Fn>(f), pos);
2442 } else {
2443 for (pos[d] = t[d].min; pos[d] <= t[d].max; pos[d]++) {
2444 for_each_element_array(d - 1, t, std::forward<Fn>(f), pos);
2445 }
2446 }
2447 }
2448
2449 /** We now have two overloads for for_each_element. This one
2450 * triggers if the callable takes a const int *.
2451 */
2452 template<typename Fn,
2453 typename = decltype(std::declval<Fn>()((const int *)nullptr))>
2454 static void for_each_element(int, int dims, const for_each_element_task_dim *t, Fn &&f, int check = 0) {
2455 const int size = dims * sizeof(int);
2456 int *pos = (int *)HALIDE_ALLOCA(size);
2457 // At least one version of GCC will (incorrectly) report that pos "may be used uninitialized".
2458 // Add this memset to silence it.
2459 memset(pos, 0, size);
2460 for_each_element_array(dims - 1, t, std::forward<Fn>(f), pos);
2461 }
2462
2463 /** This one triggers otherwise. It treats the callable as
2464 * something that takes some number of ints. */
2465 template<typename Fn>
2466 HALIDE_ALWAYS_INLINE static void for_each_element(double, int dims, const for_each_element_task_dim *t, Fn &&f) {
2467 int args = num_args(0, std::forward<Fn>(f));
2468 assert(dims >= args);
2469 for_each_element_variadic(0, args - 1, t, std::forward<Fn>(f));
2470 }
2471
2472 template<typename Fn>
2473 void for_each_element_impl(Fn &&f) const {
2474 for_each_element_task_dim *t =
2475 (for_each_element_task_dim *)HALIDE_ALLOCA(dimensions() * sizeof(for_each_element_task_dim));
2476 for (int i = 0; i < dimensions(); i++) {
2477 t[i].min = dim(i).min();
2478 t[i].max = dim(i).max();
2479 }
2480 for_each_element(0, dimensions(), t, std::forward<Fn>(f));
2481 }
2482
2483public:
2484 /** Call a function at each site in a buffer. This is likely to be
2485 * much slower than using Halide code to populate a buffer, but is
2486 * convenient for tests. If the function has more arguments than the
2487 * buffer has dimensions, the remaining arguments will be zero. If it
2488 * has fewer arguments than the buffer has dimensions then the last
2489 * few dimensions of the buffer are not iterated over. For example,
2490 * the following code exploits this to set a floating point RGB image
2491 * to red:
2492
2493 \code
2494 Buffer<float, 3> im(100, 100, 3);
2495 im.for_each_element([&](int x, int y) {
2496 im(x, y, 0) = 1.0f;
2497 im(x, y, 1) = 0.0f;
2498 im(x, y, 2) = 0.0f:
2499 });
2500 \endcode
2501
2502 * The compiled code is equivalent to writing the a nested for loop,
2503 * and compilers are capable of optimizing it in the same way.
2504 *
2505 * If the callable can be called with an int * as the sole argument,
2506 * that version is called instead. Each location in the buffer is
2507 * passed to it in a coordinate array. This version is higher-overhead
2508 * than the variadic version, but is useful for writing generic code
2509 * that accepts buffers of arbitrary dimensionality. For example, the
2510 * following sets the value at all sites in an arbitrary-dimensional
2511 * buffer to their first coordinate:
2512
2513 \code
2514 im.for_each_element([&](const int *pos) {im(pos) = pos[0];});
2515 \endcode
2516
2517 * It is also possible to use for_each_element to iterate over entire
2518 * rows or columns by cropping the buffer to a single column or row
2519 * respectively and iterating over elements of the result. For example,
2520 * to set the diagonal of the image to 1 by iterating over the columns:
2521
2522 \code
2523 Buffer<float, 3> im(100, 100, 3);
2524 im.sliced(1, 0).for_each_element([&](int x, int c) {
2525 im(x, x, c) = 1.0f;
2526 });
2527 \endcode
2528
2529 * Or, assuming the memory layout is known to be dense per row, one can
2530 * memset each row of an image like so:
2531
2532 \code
2533 Buffer<float, 3> im(100, 100, 3);
2534 im.sliced(0, 0).for_each_element([&](int y, int c) {
2535 memset(&im(0, y, c), 0, sizeof(float) * im.width());
2536 });
2537 \endcode
2538
2539 */
2540 // @{
2541 template<typename Fn>
2543 for_each_element_impl(f);
2544 return *this;
2545 }
2546
2547 template<typename Fn>
2551 for_each_element_impl(f);
2552 return *this;
2553 }
2554 // @}
2555
2556private:
2557 template<typename Fn>
2558 struct FillHelper {
2559 Fn f;
2561
2562 template<typename... Args,
2563 typename = decltype(std::declval<Fn>()(std::declval<Args>()...))>
2564 void operator()(Args... args) {
2565 (*buf)(args...) = f(args...);
2566 }
2567
2568 FillHelper(Fn &&f, Buffer<T, Dims, InClassDimStorage> *buf)
2569 : f(std::forward<Fn>(f)), buf(buf) {
2570 }
2571 };
2572
2573public:
2574 /** Fill a buffer by evaluating a callable at every site. The
2575 * callable should look much like a callable passed to
2576 * for_each_element, but it should return the value that should be
2577 * stored to the coordinate corresponding to the arguments. */
2578 template<typename Fn,
2579 typename = typename std::enable_if<!std::is_arithmetic<typename std::decay<Fn>::type>::value>::type>
2581 // We'll go via for_each_element. We need a variadic wrapper lambda.
2582 FillHelper<Fn> wrapper(std::forward<Fn>(f), this);
2583 return for_each_element(wrapper);
2584 }
2585
2586 /** Check if an input buffer passed extern stage is a querying
2587 * bounds. Compared to doing the host pointer check directly,
2588 * this both adds clarity to code and will facilitate moving to
2589 * another representation for bounds query arguments. */
2590 bool is_bounds_query() const {
2591 return buf.is_bounds_query();
2592 }
2593
2594 /** Convenient check to verify that all of the interesting bytes in the Buffer
2595 * are initialized under MSAN. Note that by default, we use for_each_value() here so that
2596 * we skip any unused padding that isn't part of the Buffer; this isn't efficient,
2597 * but in MSAN mode, it doesn't matter. (Pass true for the flag to force check
2598 * the entire Buffer storage.) */
2599 void msan_check_mem_is_initialized(bool entire = false) const {
2600#if defined(__has_feature)
2601#if __has_feature(memory_sanitizer)
2602 if (entire) {
2603 __msan_check_mem_is_initialized(data(), size_in_bytes());
2604 } else {
2605 for_each_value([](T &v) { __msan_check_mem_is_initialized(&v, sizeof(T)); ; });
2606 }
2607#endif
2608#endif
2609 }
2610};
2611
2612} // namespace Runtime
2613} // namespace Halide
2614
2615#undef HALIDE_ALLOCA
2616
2617#endif // HALIDE_RUNTIME_IMAGE_H
#define HALIDE_RUNTIME_BUFFER_ALLOCATION_ALIGNMENT
#define HALIDE_ALLOCA
This file declares the routines used by Halide internally in its runtime.
#define HALIDE_NEVER_INLINE
@ halide_error_code_success
There was no error.
#define HALIDE_ALWAYS_INLINE
Read-only access to the shape.
HALIDE_ALWAYS_INLINE int min() const
The lowest coordinate in this dimension.
Dimension(const halide_dimension_t &dim)
HALIDE_ALWAYS_INLINE int max() const
The highest coordinate in this dimension.
HALIDE_ALWAYS_INLINE iterator end() const
An iterator that points to one past the max coordinate.
HALIDE_ALWAYS_INLINE int stride() const
The number of elements in memory you have to step over to increment this coordinate by one.
HALIDE_ALWAYS_INLINE iterator begin() const
An iterator that points to the min coordinate.
HALIDE_ALWAYS_INLINE int extent() const
The extent of the image along this dimension.
A templated Buffer class that wraps halide_buffer_t and adds functionality.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T2, D2, S2 > &other)
Assign from another Buffer of possibly-different dimensionality and type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_planar(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in planar memory layout (vs.
Buffer< T, Dims, InClassDimStorage > transposed(const std::vector< int > &order) const
Make a buffer which refers to the same data in the same layout using a different ordering of the dime...
void translate(int d, int delta)
Translate an image in-place along one dimension by changing how it is indexed.
Buffer(const halide_buffer_t &buf, BufferDeviceOwnership ownership=BufferDeviceOwnership::Unmanaged)
Make a Buffer from a halide_buffer_t.
void allocate(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Allocate memory for this Buffer.
Buffer< not_const_T, Dims, InClassDimStorage > copy(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Make a new image which is a deep copy of this image.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims+1)> embedded(int d, int pos=0) const
Make a new buffer that views this buffer as a single slice in a higher-dimensional space.
void add_dimension()
Add a new dimension with a min of zero and an extent of one.
void slice(int d)
Slice a buffer in-place at the dimension's minimum.
static void set_default_allocate_fn(void *(*allocate_fn)(size_t))
bool owns_host_memory() const
Does this Buffer own the host memory it refers to?
int width() const
Conventional names for the first three dimensions.
void transpose(const std::vector< int > &order)
A generalized transpose: instead of swapping two dimensions, pass a vector that lists each dimension ...
void set_min(const std::vector< int > &mins)
Set the min coordinate of an image in the first N dimensions.
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f)
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< int > &sizes)
Initialize an Buffer of runtime type from a pointer and a vector of sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > as() &&
Return an rval reference to this Buffer.
int copy_to_host(void *ctx=nullptr)
Buffer(halide_type_t t, const std::vector< int > &sizes)
Allocate a new image of unknown type using a vector of ints as the size.
int device_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_free(void *ctx=nullptr)
int extent(int i) const
bool contains(Args... args) const
void crop(const std::vector< std::pair< int, int > > &rect)
Crop an image in-place along the first N dimensions.
HALIDE_ALWAYS_INLINE const Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() const &
void set_device_dirty(bool v=true)
HALIDE_ALWAYS_INLINE const not_void_T & operator()(const int *pos) const
Buffer(T *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
Buffer(Buffer< T2, D2, S2 > &&other)
Move-construct a Buffer from a Buffer of different dimensionality and type.
void slice(int d, int pos)
Rewrite the buffer to refer to a single lower-dimensional slice of itself along the given dimension a...
HALIDE_ALWAYS_INLINE const not_void_T & operator()(int first, Args... rest) const
Access elements.
HALIDE_ALWAYS_INLINE void set_host_dirty(bool v=true)
Methods for managing any GPU allocation.
void msan_check_mem_is_initialized(bool entire=false) const
Convenient check to verify that all of the interesting bytes in the Buffer are initialized under MSAN...
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > as_const() &&
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Standard move-assignment operator.
int device_detach_native(void *ctx=nullptr)
int device_wrap_native(const struct halide_device_interface_t *device_interface, uint64_t handle, void *ctx=nullptr)
Buffer< T, Dims, InClassDimStorage > translated(const std::vector< int > &delta) const
Make an image which refers to the same data translated along the first N dimensions.
HALIDE_ALWAYS_INLINE Dimension dim(int i) const
Access the shape of the buffer.
Buffer(int first, int second, Args... rest)
HALIDE_ALWAYS_INLINE Buffer< typename std::add_const< T >::type, Dims, InClassDimStorage > & as_const() &
as_const() is syntactic sugar for .as<const T>(), to avoid the need to recapitulate the type argument...
Buffer< T, Dims, InClassDimStorage > transposed(int d1, int d2) const
Make a buffer which refers to the same data in the same layout using a swapped indexing order for the...
HALIDE_ALWAYS_INLINE Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers)
HALIDE_ALWAYS_INLINE not_void_T & operator()()
BufferDeviceOwnership device_ownership() const
Return the method by which the device field is managed.
void check_overflow()
Check the product of the extents fits in memory.
static bool can_convert_from(const Buffer< T2, D2, S2 > &other)
Determine if a Buffer<T, Dims, InClassDimStorage> can be constructed from some other Buffer type.
Buffer< not_const_T, Dims, InClassDimStorage > copy_to_interleaved(void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr) const
Like copy(), but the copy is created in interleaved memory layout (vs.
int device_and_host_malloc(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
int device_sync(void *ctx=nullptr)
static Buffer< void, Dims, InClassDimStorage > make_interleaved(halide_type_t t, int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
Buffer(const std::vector< int > &sizes)
Allocate a new image of known type using a vector of ints as the size.
void embed(int d, int pos=0)
Embed a buffer in-place, increasing the dimensionality.
static constexpr halide_type_t static_halide_type()
Get the Halide type of T.
Buffer(T *data, int first, Args &&...rest)
Initialize an Buffer from a pointer and some sizes.
int copy_to_device(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(Array(&vals)[N])
Make an Buffer that refers to a statically sized array.
const halide_buffer_t * raw_buffer() const
HALIDE_ALWAYS_INLINE not_void_T & operator()(int first, Args... rest)
static Buffer< T, Dims, InClassDimStorage > make_interleaved(int width, int height, int channels)
If you use the (x, y, c) indexing convention, then Halide Buffers are stored planar by default.
halide_type_t type() const
Get the type of the elements.
int device_and_host_free(const struct halide_device_interface_t *device_interface, void *ctx=nullptr)
Buffer(int first)
Allocate a new image of the given size.
halide_buffer_t * raw_buffer()
Get a pointer to the raw halide_buffer_t this wraps.
T * end() const
A pointer to one beyond the element with the highest address.
HALIDE_ALWAYS_INLINE bool device_dirty() const
Buffer< T, Dims, InClassDimStorage > cropped(const std::vector< std::pair< int, int > > &rect) const
Make an image that refers to a sub-rectangle of this image along the first N dimensions.
static constexpr int static_dimensions()
Callers should not use the result if has_static_dimensions is false.
void transpose(int d1, int d2)
Transpose a buffer in-place by changing how it is indexed.
void deallocate()
Drop reference to any owned host or device memory, possibly freeing it, if this buffer held the last ...
size_t size_in_bytes() const
The total number of bytes spanned by the data in memory.
bool has_device_allocation() const
void reset()
Reset the Buffer to be equivalent to a default-constructed Buffer of the same static type (if any); B...
Buffer(halide_type_t t, int first, Args... rest)
Allocate a new image of the given size with a runtime type.
int dimensions() const
Get the dimensionality of the buffer.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int d, const halide_dimension_t *shape)
Initialize an Buffer from a pointer to the min coordinate and an array describing the shape.
int min(int i) const
Access to the mins, strides, extents.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_element(Fn &&f) const
Call a function at each site in a buffer.
void device_deallocate()
Drop reference to any owned device memory, possibly freeing it if this buffer held the last reference...
HALIDE_ALWAYS_INLINE const not_void_T & operator()() const
static Buffer< T, Dims, InClassDimStorage > make_scalar()
Make a zero-dimensional Buffer.
void add_dimension_with_stride(int s)
Add a new dimension with a min of zero, an extent of one, and the specified stride.
Buffer(Buffer< T, Dims, InClassDimStorage > &&other) noexcept
Move constructor.
Buffer< T, Dims, InClassDimStorage > cropped(int d, int min, int extent) const
Make an image that refers to a sub-range of this image along the given dimension.
void crop(int d, int min, int extent)
Crop an image in-place along the given dimension.
Buffer< T, Dims, InClassDimStorage > & fill(Fn &&f)
Fill a buffer by evaluating a callable at every site.
static Buffer< T, Dims, InClassDimStorage > make_scalar(T *data)
Make a zero-dimensional Buffer that points to non-owned, existing data.
Buffer< T, Dims, InClassDimStorage > alias() const
Make a copy of the Buffer which shares the underlying host and/or device allocations as the existing ...
void set_min(Args... args)
size_t number_of_elements() const
The total number of elements this buffer represents.
static void assert_can_convert_from(const Buffer< T2, D2, S2 > &other)
Fail an assertion at runtime or compile-time if an Buffer<T, Dims, InClassDimStorage> cannot be const...
void translate(const std::vector< int > &delta)
Translate an image along the first N dimensions by changing how it is indexed.
Buffer(const Buffer< T, Dims, InClassDimStorage > &other)
Copy constructor.
HALIDE_ALWAYS_INLINE not_void_T & operator()(const int *pos)
T * data() const
Get a pointer to the address of the min coordinate.
Buffer< T, Dims, InClassDimStorage > & fill(not_void_T val)
Buffer(const std::vector< int > &sizes, const std::vector< int > &storage_order)
Buffer< T, Dims, InClassDimStorage > & operator=(Buffer< T2, D2, S2 > &&other)
Move from another Buffer of possibly-different dimensionality and type.
Buffer(halide_type_t t, const std::vector< int > &sizes, const std::vector< int > &storage_order)
Allocate a new image of unknown type using a vector of ints as the size and a vector of indices indic...
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d, int pos) const
Make a lower-dimensional buffer that refers to one slice of this buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_interleaved(halide_type_t t, T *data, int width, int height, int channels)
Wrap an existing interleaved image.
HALIDE_ALWAYS_INLINE const Buffer< T, Dims, InClassDimStorage > & for_each_value(Fn &&f, Args &&...other_buffers) const
Call a function on every value in the buffer, and the corresponding values in some number of other bu...
bool is_bounds_query() const
Check if an input buffer passed extern stage is a querying bounds.
Buffer< T,(Dims==AnyDims ? AnyDims :Dims - 1)> sliced(int d) const
Make a lower-dimensional buffer that refers to one slice of this buffer at the dimension's minimum.
int left() const
Conventional names for the min and max value of each dimension.
void copy_from(Buffer< T2, D2, S2 > src)
Fill a Buffer with the values at the same coordinates in another Buffer.
Buffer< T, Dims, InClassDimStorage > translated(int d, int dx) const
Make an image which refers to the same data with using translated coordinates in the given dimension.
int stride(int i) const
static Buffer< T, Dims, InClassDimStorage > make_interleaved(T *data, int width, int height, int channels)
Wrap an existing interleaved image.
static void set_default_deallocate_fn(void(*deallocate_fn)(void *))
static Buffer< T, Dims, InClassDimStorage > make_with_shape_of(Buffer< T2, D2, S2 > src, void *(*allocate_fn)(size_t)=nullptr, void(*deallocate_fn)(void *)=nullptr)
Make a buffer with the same shape and memory nesting order as another buffer.
Buffer(const Buffer< T2, D2, S2 > &other)
Construct a Buffer from a Buffer of different dimensionality and type.
bool contains(const std::vector< int > &coords) const
Test if a given coordinate is within the bounds of an image.
Buffer(T *data, const std::vector< halide_dimension_t > &shape)
Initialize a Buffer from a pointer to the min coordinate and a vector describing the shape.
Buffer(T *data, const std::vector< int > &sizes)
Initialize an Buffer from a pointer and a vector of sizes.
Buffer< T, Dims, InClassDimStorage > & operator=(const Buffer< T, Dims, InClassDimStorage > &other)
Standard assignment operator.
T * begin() const
A pointer to the element with the lowest address.
bool all_equal(not_void_T val) const
Tests that all values in this buffer are equal to val.
Buffer(halide_type_t t, add_const_if_T_is_const< void > *data, int first, Args &&...rest)
Initialize an Buffer of runtime type from a pointer and some sizes.
HALIDE_ALWAYS_INLINE Buffer< T2, D2, InClassDimStorage > & as() &
Return a typed reference to this Buffer.
HALIDE_ALWAYS_INLINE const Buffer< T2, D2, InClassDimStorage > & as() const &
Return a const typed reference to this Buffer.
static Buffer< add_const_if_T_is_const< void >, Dims, InClassDimStorage > make_scalar(halide_type_t t)
Make a zero-dimensional Buffer.
ConstantInterval min(const ConstantInterval &a, const ConstantInterval &b)
ConstantInterval max(const ConstantInterval &a, const ConstantInterval &b)
bool any_zero(const Container &c)
constexpr int AnyDims
BufferDeviceOwnership
This indicates how to deallocate the device for a Halide::Runtime::Buffer.
@ AllocatedDeviceAndHost
‍No free routine will be called when device ref count goes to zero
@ WrappedNative
‍halide_device_free will be called when device ref count goes to zero
@ Unmanaged
‍halide_device_detach_native will be called when device ref count goes to zero
@ Cropped
‍Call device_and_host_free when DevRefCount goes to zero.
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
unsigned __INT64_TYPE__ uint64_t
__UINTPTR_TYPE__ uintptr_t
void * malloc(size_t)
ALWAYS_INLINE T align_up(T p, size_t alignment)
unsigned __INT8_TYPE__ uint8_t
__PTRDIFF_TYPE__ ptrdiff_t
unsigned __INT16_TYPE__ uint16_t
void * memcpy(void *s1, const void *s2, size_t n)
__SIZE_TYPE__ size_t
void * memset(void *s, int val, size_t n)
unsigned __INT32_TYPE__ uint32_t
void free(void *)
int64_t min
The lower and upper bound of the interval.
A struct acting as a header for allocations owned by the Buffer class itself.
AllocationHeader(void(*deallocate_fn)(void *))
An iterator class, so that you can iterate over coordinates in a dimensions using a range-based for l...
bool operator!=(const iterator &other) const
A similar struct for managing device allocations.
BufferDeviceOwnership ownership
The raw representation of an image passed around by generated Halide code.
int32_t dimensions
The dimensionality of the buffer.
halide_dimension_t * dim
The shape of the buffer.
uint64_t device
A device-handle for e.g.
uint8_t * host
A pointer to the start of the data in main memory.
struct halide_type_t type
The type of each buffer element.
const struct halide_device_interface_t * device_interface
The interface used to interpret the above handle.
Each GPU API provides a halide_device_interface_t struct pointing to the code that manages device all...
int(* device_slice)(void *user_context, const struct halide_buffer_t *src, int slice_dim, int slice_pos, struct halide_buffer_t *dst)
int(* device_and_host_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* wrap_native)(void *user_context, struct halide_buffer_t *buf, uint64_t handle, const struct halide_device_interface_t *device_interface)
int(* device_release_crop)(void *user_context, struct halide_buffer_t *buf)
int(* device_crop)(void *user_context, const struct halide_buffer_t *src, struct halide_buffer_t *dst)
int(* copy_to_host)(void *user_context, struct halide_buffer_t *buf)
int(* copy_to_device)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
int(* device_free)(void *user_context, struct halide_buffer_t *buf)
int(* detach_native)(void *user_context, struct halide_buffer_t *buf)
int(* device_and_host_free)(void *user_context, struct halide_buffer_t *buf)
int(* device_malloc)(void *user_context, struct halide_buffer_t *buf, const struct halide_device_interface_t *device_interface)
A runtime tag for a type in the halide type system.