reference, declaration → definition definition → references, declarations, derived classes, virtual overrides reference to multiple definitions → definitions unreferenced |
3851 kernel = isl_calloc_type(gen->ctx, struct ppcg_kernel); 3852 kernel = ppcg_kernel_create_local_arrays(kernel, gen->prog); 3852 kernel = ppcg_kernel_create_local_arrays(kernel, gen->prog); 3853 if (!kernel) 3859 kernel->ctx = gen->ctx; 3860 kernel->prog = gen->prog; 3861 kernel->options = gen->options; 3862 kernel->context = extract_context(node, gen->prog); 3863 kernel->core = isl_union_set_universe(isl_union_set_copy(domain)); 3865 kernel->contraction = isl_union_pw_multi_aff_copy(contraction); 3869 kernel->expanded_domain = isl_union_set_copy(expanded); 3870 kernel->arrays = accessed_by_domain(expanded, gen->prog); 3871 kernel->n_grid = n_outer_coincidence(node); 3873 node_thread = gpu_tree_move_down_to_thread(node_thread, kernel->core); 3875 kernel->n_block = n_outer_coincidence(node_thread); 3877 kernel->id = gen->kernel_id++; 3878 read_grid_and_block_sizes(kernel, gen); 3880 kernel->sync_writes = compute_sync_writes(kernel, node); 3880 kernel->sync_writes = compute_sync_writes(kernel, node); 3888 id = isl_id_alloc(gen->ctx, "kernel", kernel); 3893 node = group_statements(node, kernel->id); 3896 node = split_band(node, kernel->n_grid); 3897 kernel->block_ids = ppcg_scop_generate_names(gen->prog->scop, 3898 kernel->n_grid, "b"); 3899 kernel->block_filter = set_schedule_modulo(node, kernel->block_ids, 3899 kernel->block_filter = set_schedule_modulo(node, kernel->block_ids, 3900 kernel->grid_dim); 3901 kernel->grid_size = extract_grid_size(kernel, 3901 kernel->grid_size = extract_grid_size(kernel, 3903 if (!kernel->options->wrap) 3904 node = snap_band_to_sizes(node, kernel->grid_dim, 3905 kernel->options); 3911 node = insert_guard(node, kernel->context, kernel->grid_size, 3911 node = insert_guard(node, kernel->context, kernel->grid_size, 3913 node = gpu_tree_move_down_to_thread(node, kernel->core); 3915 node = split_band(node, kernel->n_block); 3916 kernel->thread_ids = ppcg_scop_generate_names(gen->prog->scop, 3917 kernel->n_block, "t"); 3918 kernel->thread_filter = set_schedule_modulo(node, kernel->thread_ids, 3918 kernel->thread_filter = set_schedule_modulo(node, kernel->thread_ids, 3919 kernel->block_dim); 3920 if (extract_block_size(kernel, domain) < 0) 3925 node = insert_context(kernel, node); 3928 isl_union_set_copy(kernel->block_filter)); 3932 if (gpu_group_references(kernel, node) < 0) 3934 localize_bounds(kernel, host_domain); 3937 check_shared_memory_bound(kernel); 3938 mark_global_arrays(kernel); 3939 compute_group_tilings(kernel); 3941 node = gpu_tree_move_down_to_thread(node, kernel->core); 3943 if (!kernel->options->wrap) 3944 node = snap_band_to_sizes(node, kernel->block_dim, 3945 kernel->options); 3947 isl_union_set_copy(kernel->thread_filter)); 3948 if (kernel_requires_unroll(kernel)) { 3954 kernel->copy_schedule_dim = isl_schedule_node_get_schedule_depth(node); 3955 kernel->copy_schedule = 3957 contraction = isl_union_pw_multi_aff_copy(kernel->contraction); 3958 kernel->copy_schedule = 3960 kernel->copy_schedule, contraction); 3964 node = add_sync(kernel, node); 3965 node = add_copies(kernel, node); 3967 node = gpu_tree_move_down_to_shared(node, kernel->core); 3970 node = gpu_tree_move_down_to_thread(node, kernel->core); 3975 if (create_kernel_vars(kernel) < 0)