[v2,5/9] target/arm: Move helper_dc_zva to helper-a64.c

Message ID 20200302175829.2183-6-richard.henderson@linaro.org
State New
Headers show
Series
  • target/arm: Misc cleanups surrounding TBI
Related show

Commit Message

Richard Henderson March 2, 2020, 5:58 p.m.
This is an aarch64-only function.  Move it out of the shared file.
This patch is code movement only.

Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

---
 target/arm/helper-a64.h |  1 +
 target/arm/helper.h     |  1 -
 target/arm/helper-a64.c | 91 ++++++++++++++++++++++++++++++++++++++++
 target/arm/op_helper.c  | 93 -----------------------------------------
 4 files changed, 92 insertions(+), 94 deletions(-)

-- 
2.20.1

Comments

Philippe Mathieu-Daudé March 2, 2020, 11:52 p.m. | #1
On 3/2/20 6:58 PM, Richard Henderson wrote:
> This is an aarch64-only function.  Move it out of the shared file.

> This patch is code movement only.

> 

> Reviewed-by: Peter Maydell <peter.maydell@linaro.org>

> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>

> ---

>   target/arm/helper-a64.h |  1 +

>   target/arm/helper.h     |  1 -

>   target/arm/helper-a64.c | 91 ++++++++++++++++++++++++++++++++++++++++

>   target/arm/op_helper.c  | 93 -----------------------------------------

>   4 files changed, 92 insertions(+), 94 deletions(-)

> 

> diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h

> index a915c1247f..b1a5935f61 100644

> --- a/target/arm/helper-a64.h

> +++ b/target/arm/helper-a64.h

> @@ -90,6 +90,7 @@ DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)

>   DEF_HELPER_2(sqrt_f16, f16, f16, ptr)

>   

>   DEF_HELPER_2(exception_return, void, env, i64)

> +DEF_HELPER_2(dc_zva, void, env, i64)

>   

>   DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)

>   DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)

> diff --git a/target/arm/helper.h b/target/arm/helper.h

> index fcbf504121..72eb9e6a1a 100644

> --- a/target/arm/helper.h

> +++ b/target/arm/helper.h

> @@ -559,7 +559,6 @@ DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)

>   

>   DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)

>   DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)

> -DEF_HELPER_2(dc_zva, void, env, i64)

>   

>   DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,

>                      void, ptr, ptr, ptr, ptr, i32)

> diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c

> index 123ce50e7a..bc0649a44a 100644

> --- a/target/arm/helper-a64.c

> +++ b/target/arm/helper-a64.c

> @@ -18,6 +18,7 @@

>    */

>   

>   #include "qemu/osdep.h"

> +#include "qemu/units.h"

>   #include "cpu.h"

>   #include "exec/gdbstub.h"

>   #include "exec/helper-proto.h"

> @@ -1109,4 +1110,94 @@ uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)

>       return float16_sqrt(a, s);

>   }

>   

> +void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)

> +{

> +    /*

> +     * Implement DC ZVA, which zeroes a fixed-length block of memory.

> +     * Note that we do not implement the (architecturally mandated)

> +     * alignment fault for attempts to use this on Device memory

> +     * (which matches the usual QEMU behaviour of not implementing either

> +     * alignment faults or any memory attribute handling).

> +     */

>   

> +    ARMCPU *cpu = env_archcpu(env);

> +    uint64_t blocklen = 4 << cpu->dcz_blocksize;

> +    uint64_t vaddr = vaddr_in & ~(blocklen - 1);

> +

> +#ifndef CONFIG_USER_ONLY

> +    {

> +        /*

> +         * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than

> +         * the block size so we might have to do more than one TLB lookup.

> +         * We know that in fact for any v8 CPU the page size is at least 4K

> +         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only

> +         * 1K as an artefact of legacy v5 subpage support being present in the

> +         * same QEMU executable. So in practice the hostaddr[] array has

> +         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.

> +         */

> +        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);

> +        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];

> +        int try, i;

> +        unsigned mmu_idx = cpu_mmu_index(env, false);

> +        TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);

> +

> +        assert(maxidx <= ARRAY_SIZE(hostaddr));

> +

> +        for (try = 0; try < 2; try++) {

> +

> +            for (i = 0; i < maxidx; i++) {

> +                hostaddr[i] = tlb_vaddr_to_host(env,

> +                                                vaddr + TARGET_PAGE_SIZE * i,

> +                                                1, mmu_idx);

> +                if (!hostaddr[i]) {

> +                    break;

> +                }

> +            }

> +            if (i == maxidx) {

> +                /*

> +                 * If it's all in the TLB it's fair game for just writing to;

> +                 * we know we don't need to update dirty status, etc.

> +                 */

> +                for (i = 0; i < maxidx - 1; i++) {

> +                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);

> +                }

> +                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));

> +                return;

> +            }

> +            /*

> +             * OK, try a store and see if we can populate the tlb. This

> +             * might cause an exception if the memory isn't writable,

> +             * in which case we will longjmp out of here. We must for

> +             * this purpose use the actual register value passed to us

> +             * so that we get the fault address right.

> +             */

> +            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());

> +            /* Now we can populate the other TLB entries, if any */

> +            for (i = 0; i < maxidx; i++) {

> +                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;

> +                if (va != (vaddr_in & TARGET_PAGE_MASK)) {

> +                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());

> +                }

> +            }

> +        }

> +

> +        /*

> +         * Slow path (probably attempt to do this to an I/O device or

> +         * similar, or clearing of a block of code we have translations

> +         * cached for). Just do a series of byte writes as the architecture

> +         * demands. It's not worth trying to use a cpu_physical_memory_map(),

> +         * memset(), unmap() sequence here because:

> +         *  + we'd need to account for the blocksize being larger than a page

> +         *  + the direct-RAM access case is almost always going to be dealt

> +         *    with in the fastpath code above, so there's no speed benefit

> +         *  + we would have to deal with the map returning NULL because the

> +         *    bounce buffer was in use

> +         */

> +        for (i = 0; i < blocklen; i++) {

> +            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());

> +        }

> +    }

> +#else

> +    memset(g2h(vaddr), 0, blocklen);

> +#endif

> +}

> diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c

> index af3020b78f..eb0de080f1 100644

> --- a/target/arm/op_helper.c

> +++ b/target/arm/op_helper.c

> @@ -17,7 +17,6 @@

>    * License along with this library; if not, see <http://www.gnu.org/licenses/>.

>    */

>   #include "qemu/osdep.h"

> -#include "qemu/units.h"

>   #include "qemu/log.h"

>   #include "qemu/main-loop.h"

>   #include "cpu.h"

> @@ -936,95 +935,3 @@ uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)

>           return ((uint32_t)x >> shift) | (x << (32 - shift));

>       }

>   }

> -

> -void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)

> -{

> -    /*

> -     * Implement DC ZVA, which zeroes a fixed-length block of memory.

> -     * Note that we do not implement the (architecturally mandated)

> -     * alignment fault for attempts to use this on Device memory

> -     * (which matches the usual QEMU behaviour of not implementing either

> -     * alignment faults or any memory attribute handling).

> -     */

> -

> -    ARMCPU *cpu = env_archcpu(env);

> -    uint64_t blocklen = 4 << cpu->dcz_blocksize;

> -    uint64_t vaddr = vaddr_in & ~(blocklen - 1);

> -

> -#ifndef CONFIG_USER_ONLY

> -    {

> -        /*

> -         * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than

> -         * the block size so we might have to do more than one TLB lookup.

> -         * We know that in fact for any v8 CPU the page size is at least 4K

> -         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only

> -         * 1K as an artefact of legacy v5 subpage support being present in the

> -         * same QEMU executable. So in practice the hostaddr[] array has

> -         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.

> -         */

> -        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);

> -        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];

> -        int try, i;

> -        unsigned mmu_idx = cpu_mmu_index(env, false);

> -        TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);

> -

> -        assert(maxidx <= ARRAY_SIZE(hostaddr));

> -

> -        for (try = 0; try < 2; try++) {

> -

> -            for (i = 0; i < maxidx; i++) {

> -                hostaddr[i] = tlb_vaddr_to_host(env,

> -                                                vaddr + TARGET_PAGE_SIZE * i,

> -                                                1, mmu_idx);

> -                if (!hostaddr[i]) {

> -                    break;

> -                }

> -            }

> -            if (i == maxidx) {

> -                /*

> -                 * If it's all in the TLB it's fair game for just writing to;

> -                 * we know we don't need to update dirty status, etc.

> -                 */

> -                for (i = 0; i < maxidx - 1; i++) {

> -                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);

> -                }

> -                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));

> -                return;

> -            }

> -            /*

> -             * OK, try a store and see if we can populate the tlb. This

> -             * might cause an exception if the memory isn't writable,

> -             * in which case we will longjmp out of here. We must for

> -             * this purpose use the actual register value passed to us

> -             * so that we get the fault address right.

> -             */

> -            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());

> -            /* Now we can populate the other TLB entries, if any */

> -            for (i = 0; i < maxidx; i++) {

> -                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;

> -                if (va != (vaddr_in & TARGET_PAGE_MASK)) {

> -                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());

> -                }

> -            }

> -        }

> -

> -        /*

> -         * Slow path (probably attempt to do this to an I/O device or

> -         * similar, or clearing of a block of code we have translations

> -         * cached for). Just do a series of byte writes as the architecture

> -         * demands. It's not worth trying to use a cpu_physical_memory_map(),

> -         * memset(), unmap() sequence here because:

> -         *  + we'd need to account for the blocksize being larger than a page

> -         *  + the direct-RAM access case is almost always going to be dealt

> -         *    with in the fastpath code above, so there's no speed benefit

> -         *  + we would have to deal with the map returning NULL because the

> -         *    bounce buffer was in use

> -         */

> -        for (i = 0; i < blocklen; i++) {

> -            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());

> -        }

> -    }

> -#else

> -    memset(g2h(vaddr), 0, blocklen);

> -#endif

> -}

> 


Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>

Patch

diff --git a/target/arm/helper-a64.h b/target/arm/helper-a64.h
index a915c1247f..b1a5935f61 100644
--- a/target/arm/helper-a64.h
+++ b/target/arm/helper-a64.h
@@ -90,6 +90,7 @@  DEF_HELPER_2(advsimd_f16touinth, i32, f16, ptr)
 DEF_HELPER_2(sqrt_f16, f16, f16, ptr)
 
 DEF_HELPER_2(exception_return, void, env, i64)
+DEF_HELPER_2(dc_zva, void, env, i64)
 
 DEF_HELPER_FLAGS_3(pacia, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_3(pacib, TCG_CALL_NO_WG, i64, env, i64, i64)
diff --git a/target/arm/helper.h b/target/arm/helper.h
index fcbf504121..72eb9e6a1a 100644
--- a/target/arm/helper.h
+++ b/target/arm/helper.h
@@ -559,7 +559,6 @@  DEF_HELPER_FLAGS_3(crypto_sm4ekey, TCG_CALL_NO_RWG, void, ptr, ptr, ptr)
 
 DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
 DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
-DEF_HELPER_2(dc_zva, void, env, i64)
 
 DEF_HELPER_FLAGS_5(gvec_qrdmlah_s16, TCG_CALL_NO_RWG,
                    void, ptr, ptr, ptr, ptr, i32)
diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c
index 123ce50e7a..bc0649a44a 100644
--- a/target/arm/helper-a64.c
+++ b/target/arm/helper-a64.c
@@ -18,6 +18,7 @@ 
  */
 
 #include "qemu/osdep.h"
+#include "qemu/units.h"
 #include "cpu.h"
 #include "exec/gdbstub.h"
 #include "exec/helper-proto.h"
@@ -1109,4 +1110,94 @@  uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
     return float16_sqrt(a, s);
 }
 
+void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
+{
+    /*
+     * Implement DC ZVA, which zeroes a fixed-length block of memory.
+     * Note that we do not implement the (architecturally mandated)
+     * alignment fault for attempts to use this on Device memory
+     * (which matches the usual QEMU behaviour of not implementing either
+     * alignment faults or any memory attribute handling).
+     */
 
+    ARMCPU *cpu = env_archcpu(env);
+    uint64_t blocklen = 4 << cpu->dcz_blocksize;
+    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
+
+#ifndef CONFIG_USER_ONLY
+    {
+        /*
+         * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
+         * the block size so we might have to do more than one TLB lookup.
+         * We know that in fact for any v8 CPU the page size is at least 4K
+         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
+         * 1K as an artefact of legacy v5 subpage support being present in the
+         * same QEMU executable. So in practice the hostaddr[] array has
+         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
+         */
+        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
+        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
+        int try, i;
+        unsigned mmu_idx = cpu_mmu_index(env, false);
+        TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
+
+        assert(maxidx <= ARRAY_SIZE(hostaddr));
+
+        for (try = 0; try < 2; try++) {
+
+            for (i = 0; i < maxidx; i++) {
+                hostaddr[i] = tlb_vaddr_to_host(env,
+                                                vaddr + TARGET_PAGE_SIZE * i,
+                                                1, mmu_idx);
+                if (!hostaddr[i]) {
+                    break;
+                }
+            }
+            if (i == maxidx) {
+                /*
+                 * If it's all in the TLB it's fair game for just writing to;
+                 * we know we don't need to update dirty status, etc.
+                 */
+                for (i = 0; i < maxidx - 1; i++) {
+                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
+                }
+                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
+                return;
+            }
+            /*
+             * OK, try a store and see if we can populate the tlb. This
+             * might cause an exception if the memory isn't writable,
+             * in which case we will longjmp out of here. We must for
+             * this purpose use the actual register value passed to us
+             * so that we get the fault address right.
+             */
+            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
+            /* Now we can populate the other TLB entries, if any */
+            for (i = 0; i < maxidx; i++) {
+                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
+                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
+                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());
+                }
+            }
+        }
+
+        /*
+         * Slow path (probably attempt to do this to an I/O device or
+         * similar, or clearing of a block of code we have translations
+         * cached for). Just do a series of byte writes as the architecture
+         * demands. It's not worth trying to use a cpu_physical_memory_map(),
+         * memset(), unmap() sequence here because:
+         *  + we'd need to account for the blocksize being larger than a page
+         *  + the direct-RAM access case is almost always going to be dealt
+         *    with in the fastpath code above, so there's no speed benefit
+         *  + we would have to deal with the map returning NULL because the
+         *    bounce buffer was in use
+         */
+        for (i = 0; i < blocklen; i++) {
+            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
+        }
+    }
+#else
+    memset(g2h(vaddr), 0, blocklen);
+#endif
+}
diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c
index af3020b78f..eb0de080f1 100644
--- a/target/arm/op_helper.c
+++ b/target/arm/op_helper.c
@@ -17,7 +17,6 @@ 
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 #include "qemu/osdep.h"
-#include "qemu/units.h"
 #include "qemu/log.h"
 #include "qemu/main-loop.h"
 #include "cpu.h"
@@ -936,95 +935,3 @@  uint32_t HELPER(ror_cc)(CPUARMState *env, uint32_t x, uint32_t i)
         return ((uint32_t)x >> shift) | (x << (32 - shift));
     }
 }
-
-void HELPER(dc_zva)(CPUARMState *env, uint64_t vaddr_in)
-{
-    /*
-     * Implement DC ZVA, which zeroes a fixed-length block of memory.
-     * Note that we do not implement the (architecturally mandated)
-     * alignment fault for attempts to use this on Device memory
-     * (which matches the usual QEMU behaviour of not implementing either
-     * alignment faults or any memory attribute handling).
-     */
-
-    ARMCPU *cpu = env_archcpu(env);
-    uint64_t blocklen = 4 << cpu->dcz_blocksize;
-    uint64_t vaddr = vaddr_in & ~(blocklen - 1);
-
-#ifndef CONFIG_USER_ONLY
-    {
-        /*
-         * Slightly awkwardly, QEMU's TARGET_PAGE_SIZE may be less than
-         * the block size so we might have to do more than one TLB lookup.
-         * We know that in fact for any v8 CPU the page size is at least 4K
-         * and the block size must be 2K or less, but TARGET_PAGE_SIZE is only
-         * 1K as an artefact of legacy v5 subpage support being present in the
-         * same QEMU executable. So in practice the hostaddr[] array has
-         * two entries, given the current setting of TARGET_PAGE_BITS_MIN.
-         */
-        int maxidx = DIV_ROUND_UP(blocklen, TARGET_PAGE_SIZE);
-        void *hostaddr[DIV_ROUND_UP(2 * KiB, 1 << TARGET_PAGE_BITS_MIN)];
-        int try, i;
-        unsigned mmu_idx = cpu_mmu_index(env, false);
-        TCGMemOpIdx oi = make_memop_idx(MO_UB, mmu_idx);
-
-        assert(maxidx <= ARRAY_SIZE(hostaddr));
-
-        for (try = 0; try < 2; try++) {
-
-            for (i = 0; i < maxidx; i++) {
-                hostaddr[i] = tlb_vaddr_to_host(env,
-                                                vaddr + TARGET_PAGE_SIZE * i,
-                                                1, mmu_idx);
-                if (!hostaddr[i]) {
-                    break;
-                }
-            }
-            if (i == maxidx) {
-                /*
-                 * If it's all in the TLB it's fair game for just writing to;
-                 * we know we don't need to update dirty status, etc.
-                 */
-                for (i = 0; i < maxidx - 1; i++) {
-                    memset(hostaddr[i], 0, TARGET_PAGE_SIZE);
-                }
-                memset(hostaddr[i], 0, blocklen - (i * TARGET_PAGE_SIZE));
-                return;
-            }
-            /*
-             * OK, try a store and see if we can populate the tlb. This
-             * might cause an exception if the memory isn't writable,
-             * in which case we will longjmp out of here. We must for
-             * this purpose use the actual register value passed to us
-             * so that we get the fault address right.
-             */
-            helper_ret_stb_mmu(env, vaddr_in, 0, oi, GETPC());
-            /* Now we can populate the other TLB entries, if any */
-            for (i = 0; i < maxidx; i++) {
-                uint64_t va = vaddr + TARGET_PAGE_SIZE * i;
-                if (va != (vaddr_in & TARGET_PAGE_MASK)) {
-                    helper_ret_stb_mmu(env, va, 0, oi, GETPC());
-                }
-            }
-        }
-
-        /*
-         * Slow path (probably attempt to do this to an I/O device or
-         * similar, or clearing of a block of code we have translations
-         * cached for). Just do a series of byte writes as the architecture
-         * demands. It's not worth trying to use a cpu_physical_memory_map(),
-         * memset(), unmap() sequence here because:
-         *  + we'd need to account for the blocksize being larger than a page
-         *  + the direct-RAM access case is almost always going to be dealt
-         *    with in the fastpath code above, so there's no speed benefit
-         *  + we would have to deal with the map returning NULL because the
-         *    bounce buffer was in use
-         */
-        for (i = 0; i < blocklen; i++) {
-            helper_ret_stb_mmu(env, vaddr + i, 0, oi, GETPC());
-        }
-    }
-#else
-    memset(g2h(vaddr), 0, blocklen);
-#endif
-}