Program Listing for File loop.hpp

Return to documentation for file (loop.hpp)

// ***********************************************************************************
// Idefix MHD astrophysical code
// Copyright(C) 2020-2022 Geoffroy R. J. Lesur <geoffroy.lesur@univ-grenoble-alpes.fr>
// and other code contributors
// Licensed under CeCILL 2.1 License, see COPYING for more information
// ***********************************************************************************

// These shortcuts are largely inspired from K-Athena
// https://gitlab.com/pgrete/kathena
// by P. Grete.

#ifndef LOOP_HPP_
#define LOOP_HPP_

#include <string>
#include "idefix.hpp"
#include "global.hpp"

#define KOKKOS_VECTOR_LENGTH  8


#ifdef INNER_TTR_LOOP
  #define TPINNERLOOP Kokkos::TeamThreadRange
#elif defined INNER_TVR_LOOP
  #define TPINNERLOOP Kokkos::ThreadVectorRange
#else
  #define TPINNERLOOP Kokkos::TeamThreadRange
#endif

typedef Kokkos::TeamPolicy<>               team_policy;
typedef Kokkos::TeamPolicy<>::member_type  member_type;



// Check if the user requested a specific loop unrolling strategy
#if defined(LOOP_PATTERN_SIMD)
  constexpr LoopPattern defaultLoop = LoopPattern::SIMDFOR;
#elif defined(LOOP_PATTERN_1DRANGE)
  constexpr LoopPattern defaultLoop = LoopPattern::RANGE;
#elif defined(LOOP_PATTERN_MDRANGE)
  constexpr LoopPattern defaultLoop = LoopPattern::MDRANGE;
#elif defined(LOOP_PATTERN_TPX)
  constexpr LoopPattern defaultLoop = LoopPattern::TPX;
#elif defined(LOOP_PATTERN_TPTTRTVR)
  constexpr LoopPattern defaultLoop = LoopPattern::TPTTRTVR;
#else // no loop strategy has been defined
  // Default loops
  #if defined(KOKKOS_ENABLE_OPENMP)
    constexpr LoopPattern defaultLoop = LoopPattern::TPTTRTVR;
  #elif defined(KOKKOS_ENABLE_CUDA)
    constexpr LoopPattern defaultLoop = LoopPattern::RANGE;
  #elif defined(KOKKOS_ENABLE_HIP)
    constexpr LoopPattern defaultLoop = LoopPattern::RANGE;
  #elif defined(KOKKOS_ENABLE_SERIAL)
    constexpr LoopPattern defaultLoop = LoopPattern::SIMDFOR;
  #else
    #warning "Unknown target architeture: default to MDrange"
    constexpr LoopPattern defaultLoop = LoopPattern::MDRANGE;
  #endif
#endif



// 1D loop
template <typename Function>
inline void idefix_for(const std::string & NAME,
                       const int & IB, const int & IE,
                       Function function) {
  const int NI = IE - IB;
  Kokkos::parallel_for(NAME, NI,
    KOKKOS_LAMBDA (const int& IDX) {
      int i = IDX;
      i += IB;
      function(i);
  });
}


// 2D loop
template <typename Function>
inline void idefix_for(const std::string & NAME,
                       const int & JB, const int & JE,
                       const int & IB, const int & IE,
                       Function function) {
  // Kokkos 1D Range
  if constexpr(defaultLoop == LoopPattern::RANGE) {
    const int NJ = JE - JB;
    const int NI = IE - IB;
    const int NJNI = NJ * NI;
    Kokkos::parallel_for(NAME, NJNI,
      KOKKOS_LAMBDA (const int& IDX) {
        int j = IDX  / NI;
        int i = IDX - j*NI;
        j += JB;
        i += IB;
        function(j,i);
    });

    // MDRange loops
  } else if constexpr(defaultLoop == LoopPattern::MDRANGE) {
    Kokkos::parallel_for(NAME,
      Kokkos::MDRangePolicy<Kokkos::Rank<2, Kokkos::Iterate::Right, Kokkos::Iterate::Right>>
        ({JB,IB},{JE,IE}), function);

    // TeamPolicies with single inner loops
  } else if constexpr(defaultLoop == LoopPattern::TPX || defaultLoop == LoopPattern::TPTTRTVR ) {
    const int NJ = JE - JB;
    Kokkos::parallel_for(NAME, team_policy (NJ, Kokkos::AUTO,KOKKOS_VECTOR_LENGTH),
      KOKKOS_LAMBDA (member_type team_member) {
        const int j = team_member.league_rank() + JB;
        Kokkos::parallel_for(TPINNERLOOP<>(team_member,IB,IE),
                             [&] (const int i) {
                               function(j,i);
                            });
    });

    // SIMD FOR loops
  } else if constexpr(defaultLoop == LoopPattern::SIMDFOR) {
    for (auto j = JB; j < JE; j++)
#pragma omp simd
      for (auto i = IB; i < IE; i++)
        function(j,i);
  } else {
    throw std::runtime_error("Unknown/undefined LoopPattern used.");
  }
}


// 3D loop
template <typename Function>
inline void idefix_for(const std::string & NAME,
                       const int & KB, const int & KE,
                       const int & JB, const int & JE,
                       const int & IB, const int & IE,
                       Function function) {
  // Kokkos 1D Range
  if constexpr(defaultLoop == LoopPattern::RANGE) {
    const int NK = KE - KB;
    const int NJ = JE - JB;
    const int NI = IE - IB;
    const int NKNJNI = NK*NJ*NI;
    const int NJNI = NJ * NI;
    Kokkos::parallel_for(NAME,NKNJNI,
      KOKKOS_LAMBDA (const int& IDX) {
        int k = IDX / NJNI;
        int j = (IDX - k*NJNI) / NI;
        int i = IDX - k*NJNI - j*NI;
        k += KB;
        j += JB;
        i += IB;
        function(k,j,i);
    });

  // MDRange loops
  } else if constexpr(defaultLoop == LoopPattern::MDRANGE) {
    Kokkos::parallel_for(NAME,
      Kokkos::MDRangePolicy<Kokkos::Rank<3, Kokkos::Iterate::Right, Kokkos::Iterate::Right>>
        ({KB,JB,IB},{KE,JE,IE}), function);

  // TeamPolicy with single inner loops
  } else if constexpr(defaultLoop == LoopPattern::TPX) {
    const int NK = KE - KB;
    const int NJ = JE - JB;
    const int NKNJ = NK * NJ;
    Kokkos::parallel_for(NAME,
      team_policy (NKNJ, Kokkos::AUTO,KOKKOS_VECTOR_LENGTH),
      KOKKOS_LAMBDA (member_type team_member) {
        const int k = team_member.league_rank() / NJ + KB;
        const int j = team_member.league_rank() % NJ + JB;
        Kokkos::parallel_for(TPINNERLOOP<>(team_member,IB,IE),
          [&] (const int i) {
            function(k,j,i);
        });
      });

  // TeamPolicy with nested TeamThreadRange and ThreadVectorRange
  } else if constexpr(defaultLoop == LoopPattern::TPTTRTVR) {
    const int NK = KE - KB;
    Kokkos::parallel_for(NAME,
      team_policy (NK, Kokkos::AUTO,KOKKOS_VECTOR_LENGTH),
      KOKKOS_LAMBDA (member_type team_member) {
        const int k = team_member.league_rank() + KB;
        Kokkos::parallel_for(
          Kokkos::TeamThreadRange<>(team_member,JB,JE),
          [&] (const int j) {
            Kokkos::parallel_for(
              Kokkos::ThreadVectorRange<>(team_member,IB,IE),
              [&] (const int i) {
                function(k,j,i);
              });
          });
      });

  // SIMD FOR loops
  } else if constexpr(defaultLoop == LoopPattern::SIMDFOR) {
    for (auto k = KB; k < KE; k++)
      for (auto j = JB; j < JE; j++)
#pragma omp simd
        for (auto i = IB; i < IE; i++)
          function(k,j,i);
  } else {
    throw std::runtime_error("Unknown/undefined LoopPattern used.");
  }
}

// 4D loop
template <typename Function>
inline void idefix_for(const std::string & NAME,
                       const int NB, const int NE,
                       const int KB, const int KE,
                       const int JB, const int JE,
                       const int IB, const int IE,
                       Function function) {
  // Kokkos 1D Range
  if constexpr(defaultLoop == LoopPattern::RANGE) {
    const int NN = (NE) - (NB);
    const int NK = (KE) - (KB);
    const int NJ = (JE) - (JB);
    const int NI = (IE) - (IB);
    const int NNNKNJNI = NN*NK*NJ*NI;
    const int NKNJNI = NK*NJ*NI;
    const int NJNI = NJ * NI;
    Kokkos::parallel_for(NAME,NNNKNJNI,
      KOKKOS_LAMBDA (const int& IDX) {
        int n = IDX / NKNJNI;
        int k = (IDX - n*NKNJNI) / NJNI;
        int j = (IDX - n*NKNJNI - k*NJNI) / NI;
        int i = IDX - n*NKNJNI - k*NJNI - j*NI;
        n += (NB);
        k += (KB);
        j += (JB);
        i += (IB);
        function(n,k,j,i);
    });

  // MDRange loops
  } else if constexpr(defaultLoop == LoopPattern::MDRANGE) {
    Kokkos::parallel_for(NAME,
      Kokkos::MDRangePolicy<Kokkos::Rank<4,Kokkos::Iterate::Right, Kokkos::Iterate::Right>>
        ({NB,KB,JB,IB},{NE,KE,JE,IE}), function);

  // TeamPolicy loops
  } else if constexpr(defaultLoop == LoopPattern::TPX) {
    const int NN = NE - NB;
    const int NK = KE - KB;
    const int NJ = JE - JB;
    const int NKNJ = NK * NJ;
    const int NNNKNJ = NN * NK * NJ;
    Kokkos::parallel_for(NAME,
      team_policy (NNNKNJ, Kokkos::AUTO,KOKKOS_VECTOR_LENGTH),
      KOKKOS_LAMBDA (member_type team_member) {
        int n = team_member.league_rank() / NKNJ;
        int k = (team_member.league_rank() - n*NKNJ) / NJ;
        int j = team_member.league_rank() - n*NKNJ - k*NJ + JB;
        n += NB;
        k += KB;
        Kokkos::parallel_for(
          TPINNERLOOP<>(team_member,IB,IE),
          [&] (const int i) {
            function(n,k,j,i);
          });
      });

  // TeamPolicy with nested TeamThreadRange and ThreadVectorRange
  } else if constexpr(defaultLoop == LoopPattern::TPTTRTVR) {
    const int NN = NE - NB;
    const int NK = KE - KB;
    const int NNNK = NN * NK;
    Kokkos::parallel_for(NAME,
      team_policy (NNNK, Kokkos::AUTO,KOKKOS_VECTOR_LENGTH),
      KOKKOS_LAMBDA (member_type team_member) {
        int n = team_member.league_rank() / NK + NB;
        int k = team_member.league_rank() % NK + KB;
        Kokkos::parallel_for(
          Kokkos::TeamThreadRange<>(team_member,JB,JE),
          [&] (const int j) {
            Kokkos::parallel_for(
              Kokkos::ThreadVectorRange<>(team_member,IB,IE),
              [&] (const int i) {
                function(n,k,j,i);
              });
          });
      });

  // SIMD FOR loops
  } else if constexpr(defaultLoop == LoopPattern::SIMDFOR) {
    for (auto n = NB; n < NE; n++)
      for (auto k = KB; k < KE; k++)
        for (auto j = JB; j < JE; j++)
#pragma omp simd
          for (auto i = IB; i < IE; i++)
            function(n,k,j,i);
  } else {
    throw std::runtime_error("Unknown/undefined LoopPattern used.");
  }
}

#endif // LOOP_HPP_