145 std::size_t n_threads,
146 std::size_t batch_size) {
147 if (n_threads == 0 || batch_size == 0)
return;
151 #pragma omp parallel for schedule(static)
153 for (std::size_t tid = 0; tid < n_threads; ++tid) {
157 for (std::size_t slot = 0; slot < batch_size; ++slot) {
158 std::size_t idx = tid + slot * n_threads;
159 z_current *= h_values[idx];
168 for (std::size_t slot = batch_size; slot-- > 0; ) {
169 std::size_t idx = tid + slot * n_threads;
171 z_inv_current *= h_values[idx];
172 h_values[idx] = z_inv_current.
square();