diff options
Diffstat (limited to 'Carpet/CarpetInterp2')
-rw-r--r-- | Carpet/CarpetInterp2/COPYING | 341 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/README | 8 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/configuration.ccl | 5 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/interface.ccl | 42 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/param.ccl | 1 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/schedule.ccl | 1 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.cc | 681 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/fasterp.hh | 200 | ||||
-rw-r--r-- | Carpet/CarpetInterp2/src/make.code.defn | 8 |
9 files changed, 1287 insertions, 0 deletions
diff --git a/Carpet/CarpetInterp2/COPYING b/Carpet/CarpetInterp2/COPYING new file mode 100644 index 000000000..1942c4334 --- /dev/null +++ b/Carpet/CarpetInterp2/COPYING @@ -0,0 +1,341 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place - Suite 330 + Boston, MA 02111-1307, USA. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) 19yy <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; see the file COPYING. If not, write to + the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/Carpet/CarpetInterp2/README b/Carpet/CarpetInterp2/README new file mode 100644 index 000000000..6480393c9 --- /dev/null +++ b/Carpet/CarpetInterp2/README @@ -0,0 +1,8 @@ +Cactus Code Thorn CarpetInterp2 +Thorn Author(s) : Erik Schnetter <schnetter@cct.lsu.edu> +Thorn Maintainer(s) : Erik Schnetter <schnetter@cct.lsu.edu> +-------------------------------------------------------------------------- + +Purpose of the thorn: + +This thorn provides a parallel interpolator for Carpet. diff --git a/Carpet/CarpetInterp2/configuration.ccl b/Carpet/CarpetInterp2/configuration.ccl new file mode 100644 index 000000000..1391494a9 --- /dev/null +++ b/Carpet/CarpetInterp2/configuration.ccl @@ -0,0 +1,5 @@ +# Configuration definitions for thorn CarpetInterp2 + +REQUIRES Carpet CarpetLib + +REQUIRES THORNS: Carpet CarpetLib diff --git a/Carpet/CarpetInterp2/interface.ccl b/Carpet/CarpetInterp2/interface.ccl new file mode 100644 index 000000000..b161281b8 --- /dev/null +++ b/Carpet/CarpetInterp2/interface.ccl @@ -0,0 +1,42 @@ +# Interface definition for thorn CarpetInterp + +IMPLEMENTS: interp2 + +USES INCLUDE HEADER: defs.hh +USES INCLUDE HEADER: typeprops.hh +USES INCLUDE HEADER: vect.hh + +USES INCLUDE HEADER: carpet.hh + + + +# Get access to communicators +CCTK_POINTER_TO_CONST \ +FUNCTION GetMPICommWorld (CCTK_POINTER_TO_CONST IN cctkGH) +REQUIRES FUNCTION GetMPICommWorld + +# Access coordinate information (on the coarse level) +CCTK_INT FUNCTION GetCoordRange \ + (CCTK_POINTER_TO_CONST IN cctkGH, \ + CCTK_INT IN m, \ + CCTK_INT IN ml, \ + CCTK_INT IN size, \ + CCTK_INT ARRAY OUT gsh, \ + CCTK_REAL ARRAY OUT lower, \ + CCTK_REAL ARRAY OUT upper, \ + CCTK_REAL ARRAY OUT delta) +REQUIRES FUNCTION GetCoordRange + + + +CCTK_INT FUNCTION \ + MultiPatch_GlobalToLocal \ + (CCTK_POINTER_TO_CONST IN cctkGH, \ + CCTK_INT IN ndims, \ + CCTK_INT IN npoints, \ + CCTK_POINTER_TO_CONST IN globalcoords, \ + CCTK_INT ARRAY OUT patch, \ + CCTK_POINTER IN localcoords, \ + CCTK_POINTER IN dadx, \ + CCTK_POINTER IN ddadxdx) +USES FUNCTION MultiPatch_GlobalToLocal diff --git a/Carpet/CarpetInterp2/param.ccl b/Carpet/CarpetInterp2/param.ccl new file mode 100644 index 000000000..9061931b7 --- /dev/null +++ b/Carpet/CarpetInterp2/param.ccl @@ -0,0 +1 @@ +# Parameter definitions for thorn CarpetInterp diff --git a/Carpet/CarpetInterp2/schedule.ccl b/Carpet/CarpetInterp2/schedule.ccl new file mode 100644 index 000000000..96d18609b --- /dev/null +++ b/Carpet/CarpetInterp2/schedule.ccl @@ -0,0 +1 @@ +# Schedule definitions for thorn CarpetInterp diff --git a/Carpet/CarpetInterp2/src/fasterp.cc b/Carpet/CarpetInterp2/src/fasterp.cc new file mode 100644 index 000000000..1de7c7863 --- /dev/null +++ b/Carpet/CarpetInterp2/src/fasterp.cc @@ -0,0 +1,681 @@ +#include <cassert> +#include <cmath> +#include <cstddef> +#include <cstdlib> + +#include <cctk.h> + +#include <mpi.h> + +#include <carpet.hh> + +#include "fasterp.hh" + + + +namespace CarpetInterp2 { + + + + // Create an MPI datatype from a C datatype description + void + create_mpi_datatype (size_t const count, + mpi_struct_descr_t const descr[], + MPI_Datatype & newtype) + { + int blocklengths[count]; + MPI_Aint displacements[count]; + MPI_Datatype types[count]; + for (size_t n=0; n<count; ++n) { + blocklengths [n] = descr[n].blocklength; + displacements[n] = descr[n].displacement; + types [n] = descr[n].type; + } + MPI_Type_struct (count, blocklengths, displacements, types, &newtype); + MPI_Type_commit (&newtype); + } + + + + // Create an MPI datatype for location information + MPI_Datatype + fasterp_iloc_t::mpi_datatype () + { + static bool initialised = false; + static MPI_Datatype newtype; + if (not initialised) { + static fasterp_iloc_t s; +#define ARRSIZE(m) (sizeof(s.m) / sizeof(s.m[0])) +#define OFFSET(m) ((char*)&(s.m) - (char*)&(s)) +#define SIZE (sizeof(s)) + CCTK_REAL rdummy; + mpi_struct_descr_t const descr[] = { + { 1, OFFSET(m ), MPI_INT }, + { 1, OFFSET(rl ), MPI_INT }, + { 1, OFFSET(c ), MPI_INT }, + { 1, OFFSET(ind3d ), MPI_INT }, + {ARRSIZE(offset), OFFSET(offset), dist::datatype(rdummy)}, + { 1, SIZE , MPI_UB } + }; +#undef ARRSIZE +#undef OFFSET +#undef SIZE + create_mpi_datatype (sizeof(descr) / sizeof(descr[0]), descr, newtype); + initialised = true; + } + return newtype; + } + + + + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + + + + // Calculate the coefficients for one interpolation stencil + void + fasterp_src_loc_t:: + calc_stencil (fasterp_iloc_t const & iloc, + int const order) + { + assert (order <= max_order); + CCTK_REAL const rone = 1.0; + CCTK_REAL const eps = 1.0e-12; + int const n0 = (order+1) / 2; + CCTK_REAL const offset = order % 2 == 0 ? 0.5 : 0.0; + for (int d=0; d<dim; ++d) { + // C_n = Pi_m[m!=n] [(x_i - x_m) / (x_n - x_m)] + CCTK_REAL const xi = iloc.offset[d] - offset; + if (order % 2 == 0) { + assert (xi >= -rone/2 and xi < +rone/2); + } else { + assert (xi >= 0 and xi < rone); + } + if (fabs(xi) >= eps) { + assert (fabs(fmod(xi, rone)) <= eps/2); + for (int n=0; n<=order; ++n) { + CCTK_REAL const xn = n - n0; + CCTK_REAL c = 1.0; + for (int m=0; m<=order; ++m) { + if (m != n) { + CCTK_REAL const xm = m - n0; + c *= (xi - xm) / (xn - xm); + } + } + coeffs[d][n] = c; + } + exact[d] = false; + // The sum of the coefficients must be one + CCTK_REAL s = 0.0; + for (int n=0; n<=order; ++n) { + s += coeffs[d][n]; + } + assert (fabs(s - rone) <= eps); + } else { + exact[d] = true; + } + } + ind3d = iloc.ind3d; + } + + + + // Interpolate a set of variables at the same location, with a given + // set of interpolation coefficients. The template mechanism allows + // the order in each direction to be different; in particular, the + // order can be 0 if the interpolation location coincides with a + // source grid point. For interpolation to order O, this function + // should be instantiated eight times, with On=O and On=0, for + // n=[0,1,2]. We hope that the compiler optimises the for loops + // away if On=0. + template <int O0, int O1, int O2> + void + fasterp_src_loc_t:: + interpolate (ivect const & lsh, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict const vals) + const + { + size_t const di = 1; + size_t const dj = di * lsh[0]; + size_t const dk = dj * lsh[1]; + + for (size_t v=0; v<varptrs.size(); ++v) { + vals[v] = 0.0; + } + + for (size_t k=0; k<=O2; ++k) { + CCTK_REAL const coeff_k = coeffs[2][k]; + for (size_t j=0; j<=O1; ++j) { + CCTK_REAL const coeff_jk = coeff_k * coeffs[1][j]; + for (size_t i=0; i<=O0; ++i) { + CCTK_REAL const coeff_ijk = coeff_jk * coeffs[0][i]; + + for (size_t v=0; v<varptrs.size(); ++v) { + vals[v] += coeff_ijk * varptrs.AT(v)[ind3d + i*di + j*dj + k*dk]; + } // for v + + } + } + } + } + + + + // Interpolate a set of variables at the same location, with a given + // set of interpolation coefficients. This calls the specialised + // interpolation function, depending on whether interpolation is + // exact in each of the directions. + template <int O> + void + fasterp_src_loc_t:: + interpolate (ivect const & lsh, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict const vals) + const + { + int const Z = 0; + if (exact[2]) { + if (exact[1]) { + if (exact[0]) { + interpolate<Z,Z,Z> (lsh, varptrs, vals); + } else { + interpolate<Z,Z,O> (lsh, varptrs, vals); + } + } else { + if (exact[0]) { + interpolate<Z,O,Z> (lsh, varptrs, vals); + } else { + interpolate<Z,O,O> (lsh, varptrs, vals); + } + } + } else { + if (exact[1]) { + if (exact[0]) { + interpolate<O,Z,Z> (lsh, varptrs, vals); + } else { + interpolate<O,Z,O> (lsh, varptrs, vals); + } + } else { + if (exact[0]) { + interpolate<O,O,Z> (lsh, varptrs, vals); + } else { + interpolate<O,O,O> (lsh, varptrs, vals); + } + } + } + } + + + + // Interpolate a set of variables at the same location, with a given + // set of interpolation coefficients. This calls a specialised + // interpolation function, depending on whether interpolation is + // exact in each of the directions. + void + fasterp_src_loc_t:: + interpolate (ivect const & lsh, + int const order, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict const vals) + const + { + switch (order) { + case 0: interpolate< 0> (lsh, varptrs, vals); break; + case 1: interpolate< 1> (lsh, varptrs, vals); break; + case 2: interpolate< 2> (lsh, varptrs, vals); break; + case 3: interpolate< 3> (lsh, varptrs, vals); break; + case 4: interpolate< 4> (lsh, varptrs, vals); break; + case 5: interpolate< 5> (lsh, varptrs, vals); break; + case 6: interpolate< 6> (lsh, varptrs, vals); break; + case 7: interpolate< 7> (lsh, varptrs, vals); break; + case 8: interpolate< 8> (lsh, varptrs, vals); break; + case 9: interpolate< 9> (lsh, varptrs, vals); break; + case 10: interpolate<10> (lsh, varptrs, vals); break; + case 11: interpolate<11> (lsh, varptrs, vals); break; + default: + // Add higher orders here as desired + CCTK_WARN (CCTK_WARN_ABORT, "Interpolation orders larger than 11 are not yet implemented"); + assert (0); + } + } + + + + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////// + + + + // Set up an interpolation + fasterp_setup_t:: + fasterp_setup_t (cGH const * restrict const cctkGH, + fasterp_glocs_t const & locations, + int const order_) + : order (order_) + { + // Some global properties + int const npoints = locations.size(); + int const nprocs = CCTK_nProcs (cctkGH); + // int const myproc = CCTK_MyProc (cctkGH); + + + + // Calculate patch numbers and local coordinates + fasterp_llocs_t local_locations (npoints); + + if (CCTK_IsFunctionAliased ("MultiPatch_GlobalToLocal")) { + // This is a muulti-patch simulation: convert global to local + // coordinates + + CCTK_REAL const * coords[dim]; + CCTK_REAL * local_coords[dim]; + for (int d=0; d<dim; ++d) { + coords[d] = &locations.coords[d].front(); + local_coords[d] = &local_locations.coords[d].front(); + } + + int const ierr = MultiPatch_GlobalToLocal + (cctkGH, dim, npoints, + coords, + &local_locations.maps.front(), local_coords, NULL, NULL); + assert (not ierr); + + } else { + // This is a single-patch simulation + + // TODO: Optimise this: don't copy coordinates, and don't store + // map numbers if there is only one map. + for (int n=0; n<npoints; ++n) { + int const m = 0; + local_locations.maps.AT(n) = m; + for (int d=0; d<dim; ++d) { + local_locations.coords[d].AT(n) = locations.coords[d].AT(n); + } + } + + } // if not multi-patch + + // Obtain the coordinate ranges for all patches + vector<rvect> lower (Carpet::maps); + vector<rvect> upper (Carpet::maps); + vector<rvect> delta (Carpet::maps); // spacing on finest possible grid + for (int m=0; m<Carpet::maps; ++m) { + jvect gsh; + int const ierr = + GetCoordRange (cctkGH, m, Carpet::mglevel, dim, + & gsh[0], + & lower.AT(m)[0], & upper.AT(m)[0], & delta.AT(m)[0]); + assert (not ierr); + delta.AT(m) /= Carpet::maxspacereflevelfact; + } + + // Calculate refinement levels, components, and integer grid point + // indices + vector<fasterp_iloc_t> ilocs (npoints); + vector<int> proc (npoints); + vector<int> nlocs (nprocs, 0); + int n_nz_nlocs = 0; + assert (Carpet::is_level_mode()); + for (int n=0; n<npoints; ++n) { + int const m = local_locations.maps.AT(n); + rvect const pos (local_locations.coords[0].AT(n), + local_locations.coords[1].AT(n), + local_locations.coords[2].AT(n)); + + gh const * const hh = Carpet::vhh.AT(m); + ibbox const & baseext = hh->baseextent(Carpet::mglevel, 0); + rvect const rpos = + (pos - lower.AT(m)) / (upper.AT(m) - lower.AT(m)) * + rvect (baseext.upper() - baseext.lower()); + + // Find refinement level and component + int rl, c; + ivect ipos; + hh->locate_position (rpos, + Carpet::mglevel, + Carpet::reflevel, Carpet::reflevel+1, + rl, c, ipos); + + ibbox const & ext = + Carpet::vdd.AT(m)->boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; + + rvect dpos = rpos - rvect(ipos); + if (order % 2 != 0) { + // Potentially shift the stencil anchor for odd interpolation + // orders (i.e., for even numbers of stencil points) + ipos -= either (dpos > rvect(0), ext.stride(), ivect(0)); + dpos = rpos - rvect(ipos); + } + + ivect const ind = ipos - ext.lower() / ext.stride(); + ivect const lsh = ext.shape() / ext.stride(); + int const ind3d = ind[0] + lsh[0] * (ind[1] + lsh[1] * ind[2]); + + // Store result + fasterp_iloc_t & iloc = ilocs.AT(n); + iloc.m = m; + iloc.rl = rl; + iloc.c = c; + iloc.ind3d = ind3d; + iloc.offset = dpos; + + // Find source processor + int const p = Carpet::vhh.AT(m)->processor(rl,c); + proc.AT(n) = p; + if (nlocs.AT(p) == 0) ++n_nz_nlocs; + ++ nlocs.AT(p); + } + + + + // Find mapping from processors to "processor indices": It may be + // too expensive to store data for all processors, so we store + // only data about those processors with which we actually + // communicate. + { + recv_descr.procs.resize (n_nz_nlocs); + recv_descr.procinds.resize (nprocs, -1); + int pp = 0; + int offset = 0; + for (int p=0; p<nprocs; ++p) { + if (nlocs.AT(p) > 0) { + recv_descr.procs.AT(pp).p = p; + recv_descr.procs.AT(pp).offset = offset; + recv_descr.procs.AT(pp).npoints = nlocs.AT(p); + recv_descr.procinds.AT(p) = pp; + ++ pp; + offset += nlocs.AT(p); + } + } + assert (pp == n_nz_nlocs); + assert (offset == npoints); + recv_descr.npoints = npoints; + } + + // Create a mapping "index" from location index, as specified by + // the user, to the index order in which the data are received + // from the other processors. + { + vector<int> index (recv_descr.procs.size()); + for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { + index.AT(pp) = recv_descr.procs.AT(pp).offset; + } + recv_descr.index.resize (npoints); + for (int n=0; n<npoints; ++n) { + int const p = proc.AT(n); + int const pp = recv_descr.procinds.AT(p); + assert (pp >= 0); + recv_descr.index.AT(n) = index.AT(pp); + ++index.AT(pp); + } + for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { + int const recv_npoints = index.AT(pp) - recv_descr.procs.AT(pp).offset; + assert (recv_npoints == recv_descr.procs.AT(pp).npoints); + } + } + + + + // Count the number of points which have to be sent to other + // processors, and exchange this information with MPI + vector<int> send_npoints (nprocs, 0), send_offsets (nprocs); + { + int offset = 0; + for (int pp=0; pp<int(recv_descr.procs.size()); ++pp) { + int const p = recv_descr.procs.AT(pp).p; + send_npoints.AT(p) = recv_descr.procs.AT(pp).npoints; + send_offsets.AT(p) = offset; + offset += send_npoints.AT(p); + } + assert (offset == npoints); + } + vector<int> recv_npoints (nprocs), recv_offsets (nprocs); + MPI_Alltoall (&send_npoints.front(), 1, MPI_INT, + &recv_npoints.front(), 1, MPI_INT, + MPI_COMM_WORLD); + int npoints_source = 0; + for (int p=0; p<nprocs; ++p) { + recv_offsets.AT(p) = npoints_source; + npoints_source += recv_npoints.AT(p); + } + + // Scatter the location information into a send-array, and + // exchange it with MPI + vector<fasterp_iloc_t> scattered_ilocs(npoints); + for (int n=0; n<npoints; ++n) { + scattered_ilocs.AT(recv_descr.index.AT(n)) = ilocs.AT(n); + } + vector<fasterp_iloc_t> gathered_ilocs(npoints_source); + MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); + MPI_Alltoallv + (&scattered_ilocs.front(), &send_npoints.front(), &send_offsets.front(), + fasterp_iloc_t::mpi_datatype(), + &gathered_ilocs.front(), &recv_npoints.front(), &recv_offsets.front(), + fasterp_iloc_t::mpi_datatype(), + comm_world); + + + + // Fill in send descriptors + send_descr.npoints = npoints_source; + + { + int n_nz_recv_npoints = 0; + for (int p=0; p<nprocs; ++p) { + if (recv_npoints.AT(p) > 0) ++n_nz_recv_npoints; + } + send_descr.procs.resize (n_nz_recv_npoints); + int pp = 0; + for (int p=0; p<nprocs; ++p) { + if (recv_npoints.AT(p) > 0) { + send_proc_t & send_proc = send_descr.procs.AT(pp); + send_proc.p = p; + send_proc.offset = recv_offsets.AT(p); + send_proc.npoints = recv_npoints.AT(p); + ++pp; + } + } + assert (pp == n_nz_recv_npoints); + } + + for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { + send_proc_t & send_proc = send_descr.procs.AT(pp); + + send_proc.maps.resize (Carpet::maps); + for (int m=0; m<Carpet::maps; ++m) { + send_proc.maps.AT(m).rls.resize (Carpet::reflevels); + for (int rl=0; rl<Carpet::reflevels; ++rl) { + int const ncomps = Carpet::vhh.AT(m)->components (rl); + send_proc.maps.AT(m).rls.AT(rl).compinds.resize (ncomps, -1); + } + } + + vector<vector<vector<int> > > npoints_comp (Carpet::maps); + for (int m=0; m<Carpet::maps; ++m) { + npoints_comp.AT(m).resize(Carpet::reflevels); + for (int rl=0; rl<Carpet::reflevels; ++rl) { + int const ncomps = Carpet::vhh.AT(m)->components (rl); + npoints_comp.AT(m).AT(rl).resize(ncomps, 0); + } + } + + vector<vector<int> > n_nz_npoints_comp (Carpet::maps); + for (int m=0; m<Carpet::maps; ++m) { + n_nz_npoints_comp.AT(m).resize(Carpet::reflevels, 0); + } + + for (int n=0; n<send_proc.npoints; ++n) { + fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset + pp); + int const m = iloc.m; + int const rl = iloc.rl; + int const c = iloc.c; + if (npoints_comp.AT(m).AT(rl).AT(c) == 0) { + ++ n_nz_npoints_comp.AT(m).AT(rl); + } + ++ npoints_comp.AT(m).AT(rl).AT(c); + } + + for (int m=0; m<Carpet::maps; ++m) { + for (int rl=0; rl<Carpet::reflevels; ++rl) { + send_proc.maps.AT(m).rls.AT(rl).comps.resize + (n_nz_npoints_comp.AT(m).AT(rl)); + int cc = 0; + int const ncomps = Carpet::vhh.AT(m)->components (rl); + for (int c=0; c<ncomps; ++c) { + if (npoints_comp.AT(m).AT(rl).AT(c) > 0) { + send_comp_t & comp = send_proc.maps.AT(m).rls.AT(rl).comps.AT(cc); + comp.locs.clear(); + comp.locs.reserve (npoints_comp.AT(m).AT(rl).AT(c)); + comp.c = c; + send_proc.maps.AT(m).rls.AT(rl).compinds.AT(c) = cc; + ++cc; + } + } + } + } + + } // for pp + + + + // Calculate stencil coefficients + for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { + send_proc_t & send_proc = send_descr.procs.AT(pp); + for (int n=0; n<send_proc.npoints; ++n) { + fasterp_iloc_t const & iloc = gathered_ilocs.AT(send_proc.offset+pp); + int const m = iloc.m; + int const rl = iloc.rl; + int const c = iloc.c; + + fasterp_src_loc_t sloc; + sloc.calc_stencil (iloc, order); + + int const cc = send_proc.maps.AT(m).rls.AT(rl).compinds.AT(c); + send_comp_t & comp = send_proc.maps.AT(m).rls.AT(rl).comps.AT(cc); + comp.locs.push_back (sloc); + } + } // for pp + } + + + + // Free the setup for one interpolation + fasterp_setup_t:: + ~fasterp_setup_t () + { + // do nothing -- C++ calls destructors automatically + } + + + + // Interpolate + void + fasterp_setup_t:: + interpolate (cGH const * restrict const cctkGH, + vector<int> const & varinds, + vector<CCTK_REAL *> & values) + const + { + size_t const nvars = varinds.size(); + assert (values.size() == nvars); + for (size_t v=0; v<values.size(); ++v) { + assert (varinds.AT(v) >= 0 and varinds.AT(v) <= CCTK_NumVars()); + // Ensure that variable is GF + // Ensure that variable has "good" type + // Ensure that variable has storage + assert (values.AT(v) != NULL); + } + + MPI_Comm & comm_world = * (MPI_Comm *) GetMPICommWorld (cctkGH); + + // Post Irecvs + vector<CCTK_REAL> recv_points (recv_descr.npoints * nvars); + vector<MPI_Request> recv_reqs (recv_descr.procs.size()); + for (size_t pp=0; pp<recv_descr.procs.size(); ++pp) { + recv_proc_t const & recv_proc = recv_descr.procs.AT(pp); + + CCTK_REAL rdummy; + int const tag = 0; + MPI_Irecv (& recv_points.AT(recv_proc.offset * nvars), + recv_proc.npoints * nvars, + dist::datatype (rdummy), recv_proc.p, tag, + comm_world, & recv_reqs.AT(pp)); + } + + // Interpolate data and post Isends + vector<CCTK_REAL> send_points (send_descr.npoints); + vector<CCTK_REAL>::iterator destit = send_points.begin(); + vector<MPI_Request> send_reqs (send_descr.procs.size()); + for (size_t pp=0; pp<send_descr.procs.size(); ++pp) { + send_proc_t const & send_proc = send_descr.procs.AT(pp); + + for (size_t m=0; m<send_proc.maps.size(); ++m) { + send_map_t const & send_map = send_proc.maps.AT(m); + for (size_t rl=0; rl<send_map.rls.size(); ++rl) { + send_rl_t const & send_rl = send_map.rls.AT(rl); + for (size_t cc=0; cc<send_rl.comps.size(); ++cc) { + send_comp_t const & send_comp = send_rl.comps.AT(cc); + int const c = send_comp.c; + + dh const & dd = * Carpet::vdd.AT(m); + ibbox const & + ext = dd.boxes.AT(Carpet::mglevel).AT(rl).AT(c).exterior; + ivect const lsh = ext.shape() / ext.stride(); + + // Get pointers to 3D data + vector<CCTK_REAL const *> varptrs (nvars); + for (size_t v=0; v<nvars; ++v) { + int const gi = CCTK_GroupIndexFromVarI (varinds.AT(v)); + assert (gi >= 0); + int const vi = varinds.AT(v) - CCTK_FirstVarIndexI (gi); + assert (vi >= 0); + int const tl = 0; + varptrs.AT(v) = + (CCTK_REAL const *) + (* Carpet::arrdata.AT(gi).AT(m).data.AT(vi)) + (tl, rl, c, Carpet::mglevel)->storage(); + assert (varptrs.AT(v)); + } + + for (size_t n=0; n<send_comp.locs.size(); ++n) { + assert (destit + nvars <= send_points.end()); + send_comp.locs.AT(n).interpolate + (lsh, order, varptrs, & * destit); + destit += nvars; + } + + } // for cc + } // for rl + } // for m + + CCTK_REAL rdummy; + int const tag = 0; + MPI_Isend (& send_points.AT(send_proc.offset * nvars), + send_proc.npoints * nvars, + dist::datatype (rdummy), send_proc.p, tag, + comm_world, & send_reqs.AT(pp)); + } // for pp + assert (destit == send_points.end()); + + // Wait for Irecvs to complete + MPI_Waitall (recv_reqs.size(), & recv_reqs.front(), MPI_STATUSES_IGNORE); + + // Gather data + for (int n=0; n<recv_descr.npoints; ++n) { + int const nn = recv_descr.index.AT(n); + for (size_t v=0; v<nvars; ++v) { + values.AT(v)[n] = recv_points.AT(nn); + } + } + + // Wait for Isends to complete + MPI_Waitall (send_reqs.size(), & send_reqs.front(), MPI_STATUSES_IGNORE); + } + + + +} // namespace CarpetInterp2 diff --git a/Carpet/CarpetInterp2/src/fasterp.hh b/Carpet/CarpetInterp2/src/fasterp.hh new file mode 100644 index 000000000..d2a13a35f --- /dev/null +++ b/Carpet/CarpetInterp2/src/fasterp.hh @@ -0,0 +1,200 @@ +#ifndef FASTERP_HH +#define FASTERP_HH + +#include <cstdlib> +#include <vector> + +#include <cctk.h> + +#include <defs.hh> +#include <dist.hh> +#include <typeprops.hh> +#include <vect.hh> + + + +namespace CarpetInterp2 { + + using namespace std; + + + + int const dim = 3; + + // An interpolation point descriptor requires (3 * (max_order+1) + + // 1) double precision values of memory + int const max_order = 5; + + + + // Map C structures to MPI datatypes + struct mpi_struct_descr_t { + int blocklength; + MPI_Aint displacement; + MPI_Datatype type; + }; + + void + create_mpi_datatype (size_t count, + mpi_struct_descr_t const descr[], + MPI_Datatype & newtype); + + + + // A global location, given by its global coordinates + struct fasterp_glocs_t { + vector<CCTK_REAL> coords[dim]; + fasterp_glocs_t (size_t const n) + { + for (int d=0; d<dim; ++d) { + coords[d].resize(n); + } + } + size_t size () const { return coords[0].size(); } + }; + + // A local location, given by map and local coordinates + struct fasterp_llocs_t { + vector<int> maps; + vector<CCTK_REAL> coords[dim]; + fasterp_llocs_t (size_t const n) + { + maps.resize(n); + for (int d=0; d<dim; ++d) { + coords[d].resize(n); + } + } + size_t size () const { return maps.size(); } + }; + + // An integer location, given by map, refinement level, and + // component + struct fasterp_iloc_t { + int m, rl, c; + + // ivect idx; + int ind3d; + rvect offset; // in terms of grid points + + static MPI_Datatype mpi_datatype (); + }; + + + + struct fasterp_dest_loc_t { + // ivect idx; + int ind3d; // destination grid point index + }; + + class fasterp_src_loc_t { + CCTK_REAL coeffs[dim][max_order+1]; // interpolation coefficients + bvect exact; + + // ivect idx; + int ind3d; // source grid point offset + + public: + void + calc_stencil (fasterp_iloc_t const & iloc, + int order); + void + interpolate (ivect const & lsh, + int order, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict vals) + const; + + private: + template <int O> + void + interpolate (ivect const & lsh, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict vals) + const; + template <int O0, int O1, int O2> + void + interpolate (ivect const & lsh, + vector<CCTK_REAL const *> const & varptrs, + CCTK_REAL * restrict vals) + const; + }; + + + + // A receive descriptor, describing what is received from other + // processors + struct recv_proc_t { + int p; // sending processor + int offset; + int npoints; // total number of received points + }; + + struct recv_descr_t { + vector<recv_proc_t> procs; + vector<int> procinds; + int npoints; // total number of received points + + vector<int> index; // gather index list + }; + + // A send descriptor; describing what to send to other processors + struct send_comp_t { + // This structure does not exist for all components -- components + // which are not accessed are not described, making this a sparse + // data structure. The field c contains the component number. + vector<fasterp_src_loc_t> locs; + int c; // source component + }; + + struct send_rl_t { + vector<send_comp_t> comps; + vector<int> compinds; + }; + + struct send_map_t { + vector<send_rl_t> rls; + }; + + struct send_proc_t { + // This structure does not exist for all processors -- processors + // with which there is no communication are not described, making + // this a sparse data structure. The field p contains the + // processor number. + vector<send_map_t> maps; + int p; // receiving processor + int offset; + int npoints; // total number of sent points + }; + + struct send_descr_t { + vector<send_proc_t> procs; + // vector<int> procinds; + int npoints; // total number of sent points + }; + + + + class fasterp_setup_t { + recv_descr_t recv_descr; + send_descr_t send_descr; + int order; + + public: + fasterp_setup_t (cGH const * restrict cctkGH, + fasterp_glocs_t const & locations, + int order); + + ~ fasterp_setup_t (); + + void + interpolate (cGH const * restrict cctkGH, + vector<int> const & varinds, + vector<CCTK_REAL *> & values) + const; + }; + + + +} // namespace CarpetInterp2 + +#endif // #define FASTERP_HH diff --git a/Carpet/CarpetInterp2/src/make.code.defn b/Carpet/CarpetInterp2/src/make.code.defn new file mode 100644 index 000000000..d95df47d6 --- /dev/null +++ b/Carpet/CarpetInterp2/src/make.code.defn @@ -0,0 +1,8 @@ +# Main make.code.defn file for thorn CarpetInterp2 + +# Source files in this directory +SRCS = fasterp.cc + +# Subdirectories containing source files +SUBDIRS = + |