module m_omp_exec_dist use mpi use m_common, only: dp, VERT use m_omp_common, only: SZ use m_omp_kernels_dist, only: der_univ_dist, der_univ_subs, & der_univ_fused_subs use m_tdsops, only: tdsops_t use m_omp_sendrecv, only: sendrecv_fields implicit none contains subroutine exec_dist_tds_compact( & du, u, u_recv_s, u_recv_e, du_send_s, du_send_e, du_recv_s, du_recv_e, & tdsops, nproc, pprev, pnext, n_groups) implicit none ! du = d(u) real(dp), dimension(:, :, :), intent(out) :: du real(dp), dimension(:, :, :), intent(in) :: u, u_recv_s, u_recv_e ! The ones below are intent(out) just so that we can write data in them, ! not because we actually need the data they store later where this ! subroutine is called. We absolutely don't care about the data they pass back real(dp), dimension(:, :, :), intent(out) :: & du_send_s, du_send_e, du_recv_s, du_recv_e type(tdsops_t), intent(in) :: tdsops integer, intent(in) :: nproc, pprev, pnext integer, intent(in) :: n_groups integer :: n_data integer :: k n_data = SZ*n_groups !$omp parallel do do k = 1, n_groups call der_univ_dist( & du(:, :, k), du_send_s(:, :, k), du_send_e(:, :, k), u(:, :, k), & u_recv_s(:, :, k), u_recv_e(:, :, k), & tdsops%coeffs_s, tdsops%coeffs_e, tdsops%coeffs, tdsops%tds_n, & tdsops%dist_fw, tdsops%dist_bw, tdsops%dist_af & ) end do !$omp end parallel do ! halo exchange for 2x2 systems call sendrecv_fields(du_recv_s, du_recv_e, du_send_s, du_send_e, & n_data, nproc, pprev, pnext) !$omp parallel do do k = 1, n_groups call der_univ_subs(du(:, :, k), & du_recv_s(:, :, k), du_recv_e(:, :, k), & tdsops%tds_n, tdsops%dist_sa, tdsops%dist_sc) end do !$omp end parallel do end subroutine exec_dist_tds_compact subroutine exec_dist_transeq_compact( & rhs_du, dud, d2u, & du_send_s, du_send_e, du_recv_s, du_recv_e, & dud_send_s, dud_send_e, dud_recv_s, dud_recv_e, & d2u_send_s, d2u_send_e, d2u_recv_s, d2u_recv_e, & u, u_recv_s, u_recv_e, & v, v_recv_s, v_recv_e, & tdsops_du, tdsops_dud, tdsops_d2u, nu, nproc, pprev, pnext, n_groups) implicit none !> The result array, it is also used as temporary storage real(dp), dimension(:, :, :), intent(out) :: rhs_du !> Temporary storage arrays real(dp), dimension(:, :, :), intent(out) :: dud, d2u ! The ones below are intent(out) just so that we can write data in them, ! not because we actually need the data they store later where this ! subroutine is called. We absolutely don't care about the data they pass back real(dp), dimension(:, :, :), intent(out) :: & du_send_s, du_send_e, du_recv_s, du_recv_e real(dp), dimension(:, :, :), intent(out) :: & dud_send_s, dud_send_e, dud_recv_s, dud_recv_e real(dp), dimension(:, :, :), intent(out) :: & d2u_send_s, d2u_send_e, d2u_recv_s, d2u_recv_e real(dp), dimension(:, :, :), intent(in) :: u, u_recv_s, u_recv_e real(dp), dimension(:, :, :), intent(in) :: v, v_recv_s, v_recv_e type(tdsops_t), intent(in) :: tdsops_du, tdsops_dud, tdsops_d2u real(dp), intent(in) :: nu integer, intent(in) :: nproc, pprev, pnext integer, intent(in) :: n_groups real(dp), dimension(:, :), allocatable :: ud, ud_recv_s, ud_recv_e integer :: n_data, n_halo integer :: k, i, j, n ! TODO: don't hardcode n_halo n_halo = 4 n = tdsops_du%tds_n n_data = SZ*n_groups allocate (ud(SZ, n)) allocate (ud_recv_e(SZ, n_halo)) allocate (ud_recv_s(SZ, n_halo)) !$omp parallel do private(ud, ud_recv_e, ud_recv_s) do k = 1, n_groups call der_univ_dist( & rhs_du(:, :, k), du_send_s(:, :, k), du_send_e(:, :, k), u(:, :, k), & u_recv_s(:, :, k), u_recv_e(:, :, k), & tdsops_du%coeffs_s, tdsops_du%coeffs_e, tdsops_du%coeffs, & n, tdsops_du%dist_fw, tdsops_du%dist_bw, tdsops_du%dist_af & ) call der_univ_dist( & d2u(:, :, k), d2u_send_s(:, :, k), d2u_send_e(:, :, k), u(:, :, k), & u_recv_s(:, :, k), u_recv_e(:, :, k), & tdsops_d2u%coeffs_s, tdsops_d2u%coeffs_e, tdsops_d2u%coeffs, & n, tdsops_d2u%dist_fw, tdsops_d2u%dist_bw, & tdsops_d2u%dist_af & ) ! Handle dud by locally generating u*v do j = 1, n !$omp simd do i = 1, SZ ud(i, j) = u(i, j, k)*v(i, j, k) end do !$omp end simd end do do j = 1, n_halo !$omp simd do i = 1, SZ ud_recv_s(i, j) = u_recv_s(i, j, k)*v_recv_s(i, j, k) ud_recv_e(i, j) = u_recv_e(i, j, k)*v_recv_e(i, j, k) end do !$omp end simd end do call der_univ_dist( & dud(:, :, k), dud_send_s(:, :, k), dud_send_e(:, :, k), ud(:, :), & ud_recv_s(:, :), ud_recv_e(:, :), & tdsops_dud%coeffs_s, tdsops_dud%coeffs_e, tdsops_dud%coeffs, & n, tdsops_dud%dist_fw, tdsops_dud%dist_bw, & tdsops_dud%dist_af & ) end do !$omp end parallel do ! halo exchange for 2x2 systems call sendrecv_fields(du_recv_s, du_recv_e, du_send_s, du_send_e, & n_data, nproc, pprev, pnext) call sendrecv_fields(dud_recv_s, dud_recv_e, dud_send_s, dud_send_e, & n_data, nproc, pprev, pnext) call sendrecv_fields(d2u_recv_s, d2u_recv_e, d2u_send_s, d2u_send_e, & n_data, nproc, pprev, pnext) !$omp parallel do do k = 1, n_groups call der_univ_fused_subs( & rhs_du(:, :, k), dud(:, :, k), d2u(:, :, k), v(:, :, k), & du_recv_s(:, :, k), du_recv_e(:, :, k), & dud_recv_s(:, :, k), dud_recv_e(:, :, k), & d2u_recv_s(:, :, k), d2u_recv_e(:, :, k), & nu, n, tdsops_du%dist_sa, tdsops_du%dist_sc, & tdsops_dud%dist_sa, tdsops_dud%dist_sc, & tdsops_d2u%dist_sa, tdsops_d2u%dist_sc & ) end do !$omp end parallel do end subroutine exec_dist_transeq_compact end module m_omp_exec_dist