|
void | copy_from (const LoopNest &n) |
|
void | structural_hash (uint64_t &h, int depth) const |
|
size_t | funcs_realized_or_inlined () const |
|
void | get_sites (StageMap< Sites > &sites, const LoopNest *task=nullptr, const LoopNest *parent=nullptr) const |
|
void | set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const |
|
void | compute_features (const FunctionDAG &dag, const Adams2019Params ¶ms, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, int64_t *working_set, StageMap< ScheduleFeatures > *features, bool use_cached_features) const |
|
bool | is_root () const |
|
const Bound & | set_bounds (const FunctionDAG::Node *f, BoundContents *b) const |
|
const Bound & | get_bounds (const FunctionDAG::Node *f) const |
|
void | dump (std::ostream &os, string prefix, const LoopNest *parent) const |
|
bool | calls (const FunctionDAG::Node *f) const |
|
int64_t | max_inlined_calls () const |
|
bool | accesses_input_buffer () const |
|
bool | computes (const FunctionDAG::Node *f) const |
|
void | inline_func (const FunctionDAG::Node *f) |
|
void | compute_here (const FunctionDAG::Node *f, bool tileable, int v, const Adams2019Params ¶ms) |
|
IntrusivePtr< const LoopNest > | parallelize_in_tiles (const Adams2019Params ¶ms, const vector< int64_t > &tiling, const LoopNest *parent) const |
|
std::vector< IntrusivePtr< const LoopNest > > | compute_in_tiles (const FunctionDAG::Node *f, const LoopNest *parent, const Adams2019Params ¶ms, int v, bool in_realization) const |
|
void | apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site) const |
|
void | copy_from_including_features (const LoopNest &n) |
|
void | memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
|
void | memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features_to_insert) const |
|
void | compute_working_set_from_features (int64_t *working_set, const StageMap< ScheduleFeatures > *features) const |
|
void | recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const |
|
uint64_t | compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const |
|
std::vector< std::pair< int, int > > | collect_producers (const StageMap< Sites > &sites) const |
|
void | collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const |
|
bool | is_gpu_serial (const Target &target) const |
|
bool | is_gpu_thread (const Target &target) const |
|
bool | is_gpu_block (const Target &target) const |
|
bool | is_scalar () const |
|
vector< int64_t > | get_union_thread_counts (const FunctionDAG::Node *f) const |
|
void | get_stage_sizes (const FunctionDAG::Node *f, vector< vector< int64_t > > &stage_sizes, vector< vector< int > > &pure_dims, vector< int > &vectorized_indices) const |
|
void | generate_vec_dim_serial_tilings (vector< int > &serial_sizes) const |
|
bool | add_gpu_thread_tilings (const FunctionDAG::Node *f, const Anderson2021Params ¶ms, const Target &target, int v, vector< IntrusivePtr< const LoopNest > > &result, const vector< int64_t > &max_size) |
|
void | copy_from (const LoopNest &n) |
|
void | copy_from_including_features (const LoopNest &n) |
|
void | structural_hash (uint64_t &h, int depth) const |
|
size_t | funcs_realized_or_inlined () const |
|
GPUMemoryType | get_gpu_memory_type (bool in_block, bool in_thread, bool is_inlined=false) const |
|
std::vector< int > | unrolled_loops (const Target &target, const LoopNest *parent, const LoopNest *grandparent) const |
|
void | get_allocs_that_can_be_promoted_to_registers (const Target &target, StageMap< Sites > &sites, NodeMap< bool > &can_be_promoted_to_registers, const LoopNest *grandparent, const LoopNest *parent) const |
|
bool | promote_allocs_to_registers (const Target &target, StageMap< Sites > &sites) const |
|
void | get_sites (const Target &target, StageMap< Sites > &sites, StageMap< int64_t > &shared_mem_alloc_sizes, const LoopNest *task=nullptr, const LoopNest *parent=nullptr, const LoopNest *current_thread_loop=nullptr) const |
|
void | set_working_set_at_task_feature (int64_t working_set, StageMap< ScheduleFeatures > *features) const |
|
bool | exceeds_serial_extents_limit (const Target &target, const LoopNest *parent, bool in_threads_loop) const |
|
bool | node_has_dynamic_region_computed (const FunctionDAG::Node *f) const |
|
bool | has_dynamic_allocation_inside_thread (bool in_thread_loop) const |
|
const LoopNest * | find_pure_stage_loop_nest (const FunctionDAG::Node *node) const |
|
int | get_pure_stage_vectorized_loop_index (const FunctionDAG::Node *node) const |
|
int | get_vectorized_loop_index_from_pure_stage (const LoopNest &root) const |
|
double | storage_stride (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node *storage_node, const Bound &store_bounds, const LoopNest &root) const |
|
Strides | compute_strides (const LoadJacobian &jac, int innermost_storage_dim, const FunctionDAG::Node *storage_node, const Bound &store_bounds, const ThreadInfo *thread_info, bool verbose=false) const |
|
bool | all_strides_exist (const LoadJacobian &jac, const FunctionDAG::Node *storage_node, const LoopNest &root) const |
|
int | get_actual_vector_dim (const Bound &store_bounds) const |
|
void | compute_gpu_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const GPULoopInfo &gpu_loop_info, const std::vector< int64_t > &inner_serial_loop_extents, const Sites &consumer_site, ScheduleFeatures &feat, const LoopNest *parent, const LoopNest &root, GlobalMemInfo &global_mem_loads, SharedMemInfo &shared_mem_loads, LocalMemInfo &local_mem_loads, bool verbose=false) const |
|
bool | can_vectorize_access_for_innermost_dim (const LoadJacobian &jac, const FunctionDAG::Node *accessed, int innermost_dim, int loop_index) const |
|
bool | can_vectorize_store_access (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, int loop_index, const GPUMemoryType &mem_type) const |
|
int | vectorized_load_access_size (const LoadJacobian &jac, const FunctionDAG::Node *accessed, bool accessed_has_been_scheduled, int innermost_dim, const GPUMemoryType &mem_type, bool verbose=false) const |
|
int | vectorized_access_size (size_t loop_index, bool verbose=false) const |
|
template<typename T > |
void | compute_num_mem_accesses_per_block (const LoadJacobian &jac, const FunctionDAG::Node *node, const Bound &store_bounds, const ThreadInfo *thread_info, int innermost_dim, double num_requests_per_warp, MemInfoType< T > &mem_info, bool verbose=false) const |
|
std::pair< double, double > | compute_local_mem_store_features (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const LoopNest &root, double serial_loop_extents) const |
|
template<typename T > |
MemInfoType< T > | compute_mem_store_info (const LoadJacobian &jac, int consumer_innermost_dim, const FunctionDAG::Node *node, const Bound &consumer_store_bounds, const ThreadInfo *thread_info, double serial_loop_extents, bool verbose) const |
|
template<typename T > |
void | compute_mem_load_features (const LoadJacobian &jac, int producer_innermost_dim, const FunctionDAG::Node *node, const Bound &producer_store_bounds, bool producer_has_been_scheduled, const ThreadInfo *thread_info, MemInfoType< T > &mem_info, double serial_loop_extents, bool verbose=false) const |
|
double | compute_local_mem_stride (double stride, double bytes) const |
|
const LoopNest * | get_enclosing_block (const LoopNest *parent, const LoopNest *grandparent) const |
|
std::pair< int64_t, int64_t > | get_block_and_serial_extents (const LoopNest *block) const |
|
bool | all_paths_to_leaves_have_thread_loop () const |
|
bool | has_thread_loop_descendant () const |
|
void | compute_warp_features (ScheduleFeatures &features, const GPULoopInfo &gpu_loop_info) const |
|
void | compute_warp_and_block_occupancy (const Anderson2021Params ¶ms, ScheduleFeatures &feat, const GPULoopInfo &gpu_loop_info) const |
|
void | compute_shared_mem_occupancy (const Anderson2021Params ¶ms, const Target &target, int64_t total_shared_mem_alloc_size, ScheduleFeatures &feat) const |
|
std::pair< const LoopNest *, const LoopNest * > | find_innermost_and_parent () const |
|
int64_t | points_accessed_per_thread (const Anderson2021Params ¶ms, const Target &target, const GPULoopInfo &gpu_loop_info, const std::vector< const FunctionDAG::Edge * > &edge_chain, const LoadJacobian &jac, const LoopNest *parent, const LoopNest *grandparent, int64_t n, const ScheduleFeatures &feat, const LoadJacobian &serial_jac, bool producer_has_been_scheduled, int producer_innermost_dim, const GPUMemoryType &mem_type, bool verbose) const |
|
int64_t | compute_licm_amortization (const LoopNest *innermost, const LoopNest *parent, const ScheduleFeatures &feat, const LoadJacobian &jac, int producer_dims) const |
|
void | memoize_points_computed_minimum (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
|
vector< pair< int, int > > | collect_producers (const StageMap< Sites > &sites) const |
|
uint64_t | compute_hash_of_producers_stored_at_root (const StageMap< Sites > &sites) const |
|
void | collect_stages (std::set< const FunctionDAG::Node::Stage * > &stages) const |
|
void | memoize_features (StageMap< ScheduleFeatures > &memoized_features, const StageMap< ScheduleFeatures > *features) const |
|
void | compute_working_set_from_features (int64_t *working_set, const StageMap< ScheduleFeatures > *features) const |
|
void | recompute_inlined_features (const StageMap< Sites > &sites, StageMap< ScheduleFeatures > *features) const |
|
std::pair< int64_t, bool > | compute_alloc_size_of_node_here (const FunctionDAG::Node *f) const |
|
void | compute_features (const FunctionDAG &dag, const Anderson2021Params ¶ms, const Target &target, const StageMap< Sites > &sites, int64_t instances, int64_t parallelism, const LoopNest *parent, const LoopNest *grandparent, const LoopNest &root, GPULoopInfo gpu_loop_info, bool use_memoized_features, const StageMap< int64_t > &total_shared_mem_alloc_sizes, int64_t *working_set, int64_t *working_set_local_constant, int64_t *working_set_local_dynamic, StageMap< ScheduleFeatures > *features, Statistics &stats, bool verbose=false) const |
|
bool | is_root () const |
|
const Bound & | set_bounds (const FunctionDAG::Node *f, BoundContents *b) const |
|
const Bound & | get_bounds (const FunctionDAG::Node *f) const |
|
Bound | get_bounds_along_edge_chain (const FunctionDAG::Node *f, const vector< const FunctionDAG::Edge * > &edge_chain) const |
|
void | dump () const |
|
std::string | to_string () const |
|
template<typename T > |
void | dump (T &stream, string prefix, const LoopNest *parent) const |
|
bool | calls (const FunctionDAG::Node *f) const |
|
int64_t | max_inlined_calls () const |
|
bool | accesses_input_buffer () const |
|
bool | computes (const FunctionDAG::Node *f) const |
|
void | inline_func (const FunctionDAG::Node *f) |
|
bool | compute_here (const FunctionDAG::Node *f, bool tileable, int v, bool in_threads_loop, const Anderson2021Params ¶ms, const Target &target) |
|
IntrusivePtr< const LoopNest > | parallelize_in_tiles (const vector< int64_t > &tiling, const LoopNest *parent, const Anderson2021Params ¶ms, const Target &target, bool inner_tiling, bool adjust_tiling, bool move_all_rvars_inward=true, const vector< int > &rvars_to_move_inward={}) const |
|
int64_t | get_total_local_mem_alloc_size (bool constant_allocs_only=false, bool in_threads_loop=false) const |
|
int64_t | get_total_constant_local_mem_alloc_size () const |
|
bool | requires_dynamic_allocation (const FunctionDAG::Node *f, const Target &target, bool in_threads_loop) const |
|
vector< IntrusivePtr< const LoopNest > > | compute_in_tiles (const FunctionDAG::Node *f, const LoopNest *parent, const Anderson2021Params ¶ms, const Target &target, const SearchSpaceOptions &search_space_options, int v, bool in_realization, bool in_threads_loop, bool is_pre_pass, vector< int64_t > union_counts=vector< int64_t >()) const |
|
bool | has_constant_region_computed (const FunctionDAG::Node *node) const |
|
bool | has_constant_region_required (const FunctionDAG::Node *node) const |
|
bool | other_stage_has_same_producer (const FunctionDAG::Node *producer) const |
|
int | num_serial_loops (const FunctionDAG::Node::Stage *stage) const |
|
int | num_serial_loops () const |
|
bool | producer_computed_here_or_further_in (const FunctionDAG::Node *producer) const |
|
void | update_producers_to_be_staged (StageScheduleState &state, const NodeMap< bool > &all_inlined) const |
|
bool | region_computed_shrinks (const FunctionDAG::Node *f, const LoopNest *parent) const |
|
void | apply (LoopLevel here, StageMap< std::unique_ptr< StageScheduleState > > &state_map, double num_cores, int depth, const LoopNest *parent, const LoopNest *compute_site, const Target &target, std::vector< StageScheduleState * > &ancestors, const NodeMap< bool > &all_inlined) const |
|
double | max_idle_lane_wastage (const Target &target, GPULoopInfo gpu_loop_info) const |
|
bool | has_valid_thread_extents () const |
|
void | collect_nodes_that_should_be_inlined (const NodeMap< bool > &nodes_to_freeze, NodeMap< bool > &inlined_nodes) const |
|
void | collect_all_inlined (NodeMap< bool > &all_inlined) const |
|
int64_t | product_of_self_and_descendants (int loop_index) const |
|
int64_t | product_of_descendants (int loop_index) const |
|
void | get_stages_computed_in_each_compute_root_loop (StageMap< StageMap< bool > > &descendants, const LoopNest *compute_root_loop_nest=nullptr) const |
|
Definition at line 34 of file LoopNest.h.