18namespace Autoscheduler {
20static constexpr int MAX_THREADS_PER_BLOCK = 1024;
41 ThreadInfo(
int vectorized_loop_index,
const std::vector<int64_t> &size,
const std::vector<FunctionDAG::Node::Loop> &loop,
const std::vector<int64_t> &
max_thread_counts) {
46 if (vectorized_loop_index != -1 && size[vectorized_loop_index] != 1) {
51 loop_vars.push_back(loop[vectorized_loop_index].var);
55 if (size[
i] == 1 || (
int)
i == vectorized_loop_index) {
83 count_num_active_warps_per_block();
108 template<
typename Fn>
135 template<
typename Fn>
155 template<
typename Fn>
175 return (
double)
num_threads / MAX_THREADS_PER_BLOCK;
218 void count_num_active_warps_per_block() {
#define internal_assert(c)
This file defines the class FunctionDAG, which is our representation of a Halide pipeline,...
@ Internal
Not visible externally, similar to 'static' linkage in C.
Internal::ConstantInterval cast(Type t, const Internal::ConstantInterval &a)
Cast operators for ConstantIntervals.
signed __INT64_TYPE__ int64_t
std::vector< std::string > loop_vars
void for_each_thread_id_in_tail_warp(Fn &fn) const
int64_t num_active_threads
double block_occupancy() const
int num_threads_in_final_warp
double warp_lane_utilization() const
double idle_lane_wastage() const
void for_each_active_thread_id(const Fn &fn) const
ThreadInfo(int vectorized_loop_index, const std::vector< int64_t > &size, const std::vector< FunctionDAG::Node::Loop > &loop, const std::vector< int64_t > &max_thread_counts)
int num_active_warps_per_block
void for_each_thread_id(const Fn &fn) const
int threads_in_this_block[3]
int64_t num_threads_in_this_block
int final_warp_initial_thread_id
void for_each_thread_id_in_first_warp(Fn &fn) const
int num_regular_active_warps_per_block
std::vector< int > loop_indices
ThreadTileOption & operator=(const ThreadTileOption &)=delete
double max_idle_lane_wastage
bool operator<(const ThreadTileOption &other) const
IntrusivePtr< const LoopNest > loop_nest
ThreadTileOption()=default
ThreadTileOption & operator=(ThreadTileOption &&)=default
ThreadTileOption(ThreadTileOption &&)=default
ThreadTileOption(const ThreadTileOption &)=delete
Intrusive shared pointers have a reference count (a RefCount object) stored in the class itself.