You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

prefetch.h 4.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. // Copyright 2022 The Abseil Authors.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // https://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #ifndef ABSL_BASE_INTERNAL_PREFETCH_H_
  15. #define ABSL_BASE_INTERNAL_PREFETCH_H_
  16. #include "absl/base/config.h"
  17. #ifdef __SSE__
  18. #include <xmmintrin.h>
  19. #endif
  20. #if defined(_MSC_VER) && defined(ABSL_INTERNAL_HAVE_SSE)
  21. #include <intrin.h>
  22. #pragma intrinsic(_mm_prefetch)
  23. #endif
  24. // Compatibility wrappers around __builtin_prefetch, to prefetch data
  25. // for read if supported by the toolchain.
  26. // Move data into the cache before it is read, or "prefetch" it.
  27. //
  28. // The value of `addr` is the address of the memory to prefetch. If
  29. // the target and compiler support it, data prefetch instructions are
  30. // generated. If the prefetch is done some time before the memory is
  31. // read, it may be in the cache by the time the read occurs.
  32. //
  33. // The function names specify the temporal locality heuristic applied,
  34. // using the names of Intel prefetch instructions:
  35. //
  36. // T0 - high degree of temporal locality; data should be left in as
  37. // many levels of the cache possible
  38. // T1 - moderate degree of temporal locality
  39. // T2 - low degree of temporal locality
  40. // Nta - no temporal locality, data need not be left in the cache
  41. // after the read
  42. //
  43. // Incorrect or gratuitous use of these functions can degrade
  44. // performance, so use them only when representative benchmarks show
  45. // an improvement.
  46. //
  47. // Example usage:
  48. //
  49. // absl::base_internal::PrefetchT0(addr);
  50. //
  51. // Currently, the different prefetch calls behave on some Intel
  52. // architectures as follows:
  53. //
  54. // SNB..SKL SKX
  55. // PrefetchT0() L1/L2/L3 L1/L2
  56. // PrefetchT1() L2/L3 L2
  57. // PrefetchT2() L2/L3 L2
  58. // PrefetchNta() L1/--/L3 L1*
  59. //
  60. // * On SKX PrefetchNta() will bring the line into L1 but will evict
  61. // from L3 cache. This might result in surprising behavior.
  62. //
  63. // SNB = Sandy Bridge, SKL = Skylake, SKX = Skylake Xeon.
  64. //
  65. namespace absl
  66. {
  67. ABSL_NAMESPACE_BEGIN
  68. namespace base_internal
  69. {
  70. void PrefetchT0(const void* addr);
  71. void PrefetchT1(const void* addr);
  72. void PrefetchT2(const void* addr);
  73. void PrefetchNta(const void* addr);
  74. // Implementation details follow.
  75. #if ABSL_HAVE_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
  76. #define ABSL_INTERNAL_HAVE_PREFETCH 1
  77. // See __builtin_prefetch:
  78. // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html.
  79. //
  80. // These functions speculatively load for read only. This is
  81. // safe for all currently supported platforms. However, prefetch for
  82. // store may have problems depending on the target platform.
  83. //
  84. inline void PrefetchT0(const void* addr)
  85. {
  86. // Note: this uses prefetcht0 on Intel.
  87. __builtin_prefetch(addr, 0, 3);
  88. }
  89. inline void PrefetchT1(const void* addr)
  90. {
  91. // Note: this uses prefetcht1 on Intel.
  92. __builtin_prefetch(addr, 0, 2);
  93. }
  94. inline void PrefetchT2(const void* addr)
  95. {
  96. // Note: this uses prefetcht2 on Intel.
  97. __builtin_prefetch(addr, 0, 1);
  98. }
  99. inline void PrefetchNta(const void* addr)
  100. {
  101. // Note: this uses prefetchtnta on Intel.
  102. __builtin_prefetch(addr, 0, 0);
  103. }
  104. #elif defined(ABSL_INTERNAL_HAVE_SSE)
  105. #define ABSL_INTERNAL_HAVE_PREFETCH 1
  106. inline void PrefetchT0(const void* addr)
  107. {
  108. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T0);
  109. }
  110. inline void PrefetchT1(const void* addr)
  111. {
  112. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T1);
  113. }
  114. inline void PrefetchT2(const void* addr)
  115. {
  116. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_T2);
  117. }
  118. inline void PrefetchNta(const void* addr)
  119. {
  120. _mm_prefetch(reinterpret_cast<const char*>(addr), _MM_HINT_NTA);
  121. }
  122. #else
  123. inline void PrefetchT0(const void*)
  124. {
  125. }
  126. inline void PrefetchT1(const void*)
  127. {
  128. }
  129. inline void PrefetchT2(const void*)
  130. {
  131. }
  132. inline void PrefetchNta(const void*)
  133. {
  134. }
  135. #endif
  136. } // namespace base_internal
  137. ABSL_NAMESPACE_END
  138. } // namespace absl
  139. #endif // ABSL_BASE_INTERNAL_PREFETCH_H_