/* armv8-sha512-asm
 *
 * Copyright (C) 2006-2020 wolfSSL Inc.
 *
 * This file is part of wolfSSL.
 *
 * wolfSSL is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * wolfSSL is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
 */

#ifdef HAVE_CONFIG_H
    #include <config.h>
#endif /* HAVE_CONFIG_H */
#include <wolfssl/wolfcrypt/settings.h>

/* Generated using (from wolfssl):
 *   cd ../scripts
 *   ruby ./sha2/sha512.rb arm64 ../wolfssl/wolfcrypt/src/port/arm/armv8-sha512-asm.c
 */
#ifdef WOLFSSL_ARMASM
#ifdef __aarch64__
#include <wolfssl/wolfcrypt/sha512.h>

static const uint64_t L_SHA512_transform_neon_len_k[] = {
    0x428a2f98d728ae22UL,
    0x7137449123ef65cdUL,
    0xb5c0fbcfec4d3b2fUL,
    0xe9b5dba58189dbbcUL,
    0x3956c25bf348b538UL,
    0x59f111f1b605d019UL,
    0x923f82a4af194f9bUL,
    0xab1c5ed5da6d8118UL,
    0xd807aa98a3030242UL,
    0x12835b0145706fbeUL,
    0x243185be4ee4b28cUL,
    0x550c7dc3d5ffb4e2UL,
    0x72be5d74f27b896fUL,
    0x80deb1fe3b1696b1UL,
    0x9bdc06a725c71235UL,
    0xc19bf174cf692694UL,
    0xe49b69c19ef14ad2UL,
    0xefbe4786384f25e3UL,
    0xfc19dc68b8cd5b5UL,
    0x240ca1cc77ac9c65UL,
    0x2de92c6f592b0275UL,
    0x4a7484aa6ea6e483UL,
    0x5cb0a9dcbd41fbd4UL,
    0x76f988da831153b5UL,
    0x983e5152ee66dfabUL,
    0xa831c66d2db43210UL,
    0xb00327c898fb213fUL,
    0xbf597fc7beef0ee4UL,
    0xc6e00bf33da88fc2UL,
    0xd5a79147930aa725UL,
    0x6ca6351e003826fUL,
    0x142929670a0e6e70UL,
    0x27b70a8546d22ffcUL,
    0x2e1b21385c26c926UL,
    0x4d2c6dfc5ac42aedUL,
    0x53380d139d95b3dfUL,
    0x650a73548baf63deUL,
    0x766a0abb3c77b2a8UL,
    0x81c2c92e47edaee6UL,
    0x92722c851482353bUL,
    0xa2bfe8a14cf10364UL,
    0xa81a664bbc423001UL,
    0xc24b8b70d0f89791UL,
    0xc76c51a30654be30UL,
    0xd192e819d6ef5218UL,
    0xd69906245565a910UL,
    0xf40e35855771202aUL,
    0x106aa07032bbd1b8UL,
    0x19a4c116b8d2d0c8UL,
    0x1e376c085141ab53UL,
    0x2748774cdf8eeb99UL,
    0x34b0bcb5e19b48a8UL,
    0x391c0cb3c5c95a63UL,
    0x4ed8aa4ae3418acbUL,
    0x5b9cca4f7763e373UL,
    0x682e6ff3d6b2b8a3UL,
    0x748f82ee5defb2fcUL,
    0x78a5636f43172f60UL,
    0x84c87814a1f0ab72UL,
    0x8cc702081a6439ecUL,
    0x90befffa23631e28UL,
    0xa4506cebde82bde9UL,
    0xbef9a3f7b2c67915UL,
    0xc67178f2e372532bUL,
    0xca273eceea26619cUL,
    0xd186b8c721c0c207UL,
    0xeada7dd6cde0eb1eUL,
    0xf57d4f7fee6ed178UL,
    0x6f067aa72176fbaUL,
    0xa637dc5a2c898a6UL,
    0x113f9804bef90daeUL,
    0x1b710b35131c471bUL,
    0x28db77f523047d84UL,
    0x32caab7b40c72493UL,
    0x3c9ebe0a15c9bebcUL,
    0x431d67c49c100d4cUL,
    0x4cc5d4becb3e42b6UL,
    0x597f299cfc657e2aUL,
    0x5fcb6fab3ad6faecUL,
    0x6c44198c4a475817UL,
};

static const uint64_t L_SHA512_transform_neon_len_ror8[] = {
    0x7060504030201UL,
    0x80f0e0d0c0b0a09UL,
};

void Transform_Sha512_Len(wc_Sha512* sha512, const byte* data, word32 len)
{
    __asm__ __volatile__ (
        "stp	x29, x30, [sp, #-16]!\n\t"
        "add	x29, sp, #0\n\t"
#ifndef __APPLE__
        "adr	x3, %[L_SHA512_transform_neon_len_k]\n\t"
#else
        "adrp	x3, %[L_SHA512_transform_neon_len_k]@PAGE\n\t"
        "add	x3, x3, %[L_SHA512_transform_neon_len_k]@PAGEOFF\n\t"
#endif /* __APPLE__ */
#ifndef __APPLE__
        "adr	x27, %[L_SHA512_transform_neon_len_ror8]\n\t"
#else
        "adrp	x27, %[L_SHA512_transform_neon_len_ror8]@PAGE\n\t"
        "add	x27, x27, %[L_SHA512_transform_neon_len_ror8]@PAGEOFF\n\t"
#endif /* __APPLE__ */
        "ld1	{v11.16b}, [x27]\n\t"
        /* Load digest into working vars */
        "ldp	x4, x5, [%x[sha512]]\n\t"
        "ldp	x6, x7, [%x[sha512], #16]\n\t"
        "ldp	x8, x9, [%x[sha512], #32]\n\t"
        "ldp	x10, x11, [%x[sha512], #48]\n\t"
        /* Start of loop processing a block */
        "\n"
    "L_sha512_len_neon_begin_%=: \n\t"
        /* Load W */
        /* Copy digest to add in at end */
        "ld1	{v0.2d, v1.2d, v2.2d, v3.2d}, [%x[data]], #0x40\n\t"
        "mov	x19, x4\n\t"
        "ld1	{v4.2d, v5.2d, v6.2d, v7.2d}, [%x[data]], #0x40\n\t"
        "mov	x20, x5\n\t"
        "rev64	v0.16b, v0.16b\n\t"
        "mov	x21, x6\n\t"
        "rev64	v1.16b, v1.16b\n\t"
        "mov	x22, x7\n\t"
        "rev64	v2.16b, v2.16b\n\t"
        "mov	x23, x8\n\t"
        "rev64	v3.16b, v3.16b\n\t"
        "mov	x24, x9\n\t"
        "rev64	v4.16b, v4.16b\n\t"
        "mov	x25, x10\n\t"
        "rev64	v5.16b, v5.16b\n\t"
        "mov	x26, x11\n\t"
        "rev64	v6.16b, v6.16b\n\t"
        "rev64	v7.16b, v7.16b\n\t"
        /* Pre-calc: b ^ c */
        "eor	x16, x5, x6\n\t"
        "mov	x27, #4\n\t"
        /* Start of 16 rounds */
        "\n"
    "L_sha512_len_neon_start_%=: \n\t"
        /* Round 0 */
        "mov	x13, v0.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x8, #14\n\t"
        "ror	x14, x4, #28\n\t"
        "eor	x12, x12, x8, ror 18\n\t"
        "eor	x14, x14, x4, ror 34\n\t"
        "eor	x12, x12, x8, ror 41\n\t"
        "eor	x14, x14, x4, ror 39\n\t"
        "add	x11, x11, x12\n\t"
        "eor	x17, x4, x5\n\t"
        "eor	x12, x9, x10\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x8\n\t"
        "add	x11, x11, x13\n\t"
        "eor	x12, x12, x10\n\t"
        "add	x11, x11, x15\n\t"
        "eor	x16, x16, x5\n\t"
        "add	x11, x11, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x7, x7, x11\n\t"
        "add	x11, x11, x14\n\t"
        /* Round 1 */
        "mov	x13, v0.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v0.16b, v1.16b, #8\n\t"
        "ror	x12, x7, #14\n\t"
        "shl	v8.2d, v7.2d, #45\n\t"
        "ror	x14, x11, #28\n\t"
        "sri	v8.2d, v7.2d, #19\n\t"
        "eor	x12, x12, x7, ror 18\n\t"
        "shl	v9.2d, v7.2d, #3\n\t"
        "eor	x14, x14, x11, ror 34\n\t"
        "sri	v9.2d, v7.2d, #61\n\t"
        "eor	x12, x12, x7, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x11, ror 39\n\t"
        "ushr	v8.2d, v7.2d, #6\n\t"
        "add	x10, x10, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x11, x4\n\t"
        "add	v0.2d, v0.2d, v9.2d\n\t"
        "eor	x12, x8, x9\n\t"
        "ext	v9.16b, v4.16b, v5.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v0.2d, v0.2d, v9.2d\n\t"
        "and	x12, x12, x7\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x10, x10, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x9\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x10, x10, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x4\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x10, x10, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v0.2d, v0.2d, v9.2d\n\t"
        "add	x6, x6, x10\n\t"
        "add	x10, x10, x14\n\t"
        /* Round 2 */
        "mov	x13, v1.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x6, #14\n\t"
        "ror	x14, x10, #28\n\t"
        "eor	x12, x12, x6, ror 18\n\t"
        "eor	x14, x14, x10, ror 34\n\t"
        "eor	x12, x12, x6, ror 41\n\t"
        "eor	x14, x14, x10, ror 39\n\t"
        "add	x9, x9, x12\n\t"
        "eor	x17, x10, x11\n\t"
        "eor	x12, x7, x8\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x6\n\t"
        "add	x9, x9, x13\n\t"
        "eor	x12, x12, x8\n\t"
        "add	x9, x9, x15\n\t"
        "eor	x16, x16, x11\n\t"
        "add	x9, x9, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x5, x5, x9\n\t"
        "add	x9, x9, x14\n\t"
        /* Round 3 */
        "mov	x13, v1.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v1.16b, v2.16b, #8\n\t"
        "ror	x12, x5, #14\n\t"
        "shl	v8.2d, v0.2d, #45\n\t"
        "ror	x14, x9, #28\n\t"
        "sri	v8.2d, v0.2d, #19\n\t"
        "eor	x12, x12, x5, ror 18\n\t"
        "shl	v9.2d, v0.2d, #3\n\t"
        "eor	x14, x14, x9, ror 34\n\t"
        "sri	v9.2d, v0.2d, #61\n\t"
        "eor	x12, x12, x5, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x9, ror 39\n\t"
        "ushr	v8.2d, v0.2d, #6\n\t"
        "add	x8, x8, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x9, x10\n\t"
        "add	v1.2d, v1.2d, v9.2d\n\t"
        "eor	x12, x6, x7\n\t"
        "ext	v9.16b, v5.16b, v6.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v1.2d, v1.2d, v9.2d\n\t"
        "and	x12, x12, x5\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x8, x8, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x7\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x8, x8, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x10\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x8, x8, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v1.2d, v1.2d, v9.2d\n\t"
        "add	x4, x4, x8\n\t"
        "add	x8, x8, x14\n\t"
        /* Round 4 */
        "mov	x13, v2.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x4, #14\n\t"
        "ror	x14, x8, #28\n\t"
        "eor	x12, x12, x4, ror 18\n\t"
        "eor	x14, x14, x8, ror 34\n\t"
        "eor	x12, x12, x4, ror 41\n\t"
        "eor	x14, x14, x8, ror 39\n\t"
        "add	x7, x7, x12\n\t"
        "eor	x17, x8, x9\n\t"
        "eor	x12, x5, x6\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x4\n\t"
        "add	x7, x7, x13\n\t"
        "eor	x12, x12, x6\n\t"
        "add	x7, x7, x15\n\t"
        "eor	x16, x16, x9\n\t"
        "add	x7, x7, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x11, x11, x7\n\t"
        "add	x7, x7, x14\n\t"
        /* Round 5 */
        "mov	x13, v2.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v2.16b, v3.16b, #8\n\t"
        "ror	x12, x11, #14\n\t"
        "shl	v8.2d, v1.2d, #45\n\t"
        "ror	x14, x7, #28\n\t"
        "sri	v8.2d, v1.2d, #19\n\t"
        "eor	x12, x12, x11, ror 18\n\t"
        "shl	v9.2d, v1.2d, #3\n\t"
        "eor	x14, x14, x7, ror 34\n\t"
        "sri	v9.2d, v1.2d, #61\n\t"
        "eor	x12, x12, x11, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x7, ror 39\n\t"
        "ushr	v8.2d, v1.2d, #6\n\t"
        "add	x6, x6, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x7, x8\n\t"
        "add	v2.2d, v2.2d, v9.2d\n\t"
        "eor	x12, x4, x5\n\t"
        "ext	v9.16b, v6.16b, v7.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v2.2d, v2.2d, v9.2d\n\t"
        "and	x12, x12, x11\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x6, x6, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x5\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x6, x6, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x8\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x6, x6, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v2.2d, v2.2d, v9.2d\n\t"
        "add	x10, x10, x6\n\t"
        "add	x6, x6, x14\n\t"
        /* Round 6 */
        "mov	x13, v3.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x10, #14\n\t"
        "ror	x14, x6, #28\n\t"
        "eor	x12, x12, x10, ror 18\n\t"
        "eor	x14, x14, x6, ror 34\n\t"
        "eor	x12, x12, x10, ror 41\n\t"
        "eor	x14, x14, x6, ror 39\n\t"
        "add	x5, x5, x12\n\t"
        "eor	x17, x6, x7\n\t"
        "eor	x12, x11, x4\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x10\n\t"
        "add	x5, x5, x13\n\t"
        "eor	x12, x12, x4\n\t"
        "add	x5, x5, x15\n\t"
        "eor	x16, x16, x7\n\t"
        "add	x5, x5, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x9, x9, x5\n\t"
        "add	x5, x5, x14\n\t"
        /* Round 7 */
        "mov	x13, v3.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v3.16b, v4.16b, #8\n\t"
        "ror	x12, x9, #14\n\t"
        "shl	v8.2d, v2.2d, #45\n\t"
        "ror	x14, x5, #28\n\t"
        "sri	v8.2d, v2.2d, #19\n\t"
        "eor	x12, x12, x9, ror 18\n\t"
        "shl	v9.2d, v2.2d, #3\n\t"
        "eor	x14, x14, x5, ror 34\n\t"
        "sri	v9.2d, v2.2d, #61\n\t"
        "eor	x12, x12, x9, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x5, ror 39\n\t"
        "ushr	v8.2d, v2.2d, #6\n\t"
        "add	x4, x4, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x5, x6\n\t"
        "add	v3.2d, v3.2d, v9.2d\n\t"
        "eor	x12, x10, x11\n\t"
        "ext	v9.16b, v7.16b, v0.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v3.2d, v3.2d, v9.2d\n\t"
        "and	x12, x12, x9\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x4, x4, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x11\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x4, x4, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x6\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x4, x4, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v3.2d, v3.2d, v9.2d\n\t"
        "add	x8, x8, x4\n\t"
        "add	x4, x4, x14\n\t"
        /* Round 8 */
        "mov	x13, v4.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x8, #14\n\t"
        "ror	x14, x4, #28\n\t"
        "eor	x12, x12, x8, ror 18\n\t"
        "eor	x14, x14, x4, ror 34\n\t"
        "eor	x12, x12, x8, ror 41\n\t"
        "eor	x14, x14, x4, ror 39\n\t"
        "add	x11, x11, x12\n\t"
        "eor	x17, x4, x5\n\t"
        "eor	x12, x9, x10\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x8\n\t"
        "add	x11, x11, x13\n\t"
        "eor	x12, x12, x10\n\t"
        "add	x11, x11, x15\n\t"
        "eor	x16, x16, x5\n\t"
        "add	x11, x11, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x7, x7, x11\n\t"
        "add	x11, x11, x14\n\t"
        /* Round 9 */
        "mov	x13, v4.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v4.16b, v5.16b, #8\n\t"
        "ror	x12, x7, #14\n\t"
        "shl	v8.2d, v3.2d, #45\n\t"
        "ror	x14, x11, #28\n\t"
        "sri	v8.2d, v3.2d, #19\n\t"
        "eor	x12, x12, x7, ror 18\n\t"
        "shl	v9.2d, v3.2d, #3\n\t"
        "eor	x14, x14, x11, ror 34\n\t"
        "sri	v9.2d, v3.2d, #61\n\t"
        "eor	x12, x12, x7, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x11, ror 39\n\t"
        "ushr	v8.2d, v3.2d, #6\n\t"
        "add	x10, x10, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x11, x4\n\t"
        "add	v4.2d, v4.2d, v9.2d\n\t"
        "eor	x12, x8, x9\n\t"
        "ext	v9.16b, v0.16b, v1.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v4.2d, v4.2d, v9.2d\n\t"
        "and	x12, x12, x7\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x10, x10, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x9\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x10, x10, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x4\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x10, x10, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v4.2d, v4.2d, v9.2d\n\t"
        "add	x6, x6, x10\n\t"
        "add	x10, x10, x14\n\t"
        /* Round 10 */
        "mov	x13, v5.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x6, #14\n\t"
        "ror	x14, x10, #28\n\t"
        "eor	x12, x12, x6, ror 18\n\t"
        "eor	x14, x14, x10, ror 34\n\t"
        "eor	x12, x12, x6, ror 41\n\t"
        "eor	x14, x14, x10, ror 39\n\t"
        "add	x9, x9, x12\n\t"
        "eor	x17, x10, x11\n\t"
        "eor	x12, x7, x8\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x6\n\t"
        "add	x9, x9, x13\n\t"
        "eor	x12, x12, x8\n\t"
        "add	x9, x9, x15\n\t"
        "eor	x16, x16, x11\n\t"
        "add	x9, x9, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x5, x5, x9\n\t"
        "add	x9, x9, x14\n\t"
        /* Round 11 */
        "mov	x13, v5.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v5.16b, v6.16b, #8\n\t"
        "ror	x12, x5, #14\n\t"
        "shl	v8.2d, v4.2d, #45\n\t"
        "ror	x14, x9, #28\n\t"
        "sri	v8.2d, v4.2d, #19\n\t"
        "eor	x12, x12, x5, ror 18\n\t"
        "shl	v9.2d, v4.2d, #3\n\t"
        "eor	x14, x14, x9, ror 34\n\t"
        "sri	v9.2d, v4.2d, #61\n\t"
        "eor	x12, x12, x5, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x9, ror 39\n\t"
        "ushr	v8.2d, v4.2d, #6\n\t"
        "add	x8, x8, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x9, x10\n\t"
        "add	v5.2d, v5.2d, v9.2d\n\t"
        "eor	x12, x6, x7\n\t"
        "ext	v9.16b, v1.16b, v2.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v5.2d, v5.2d, v9.2d\n\t"
        "and	x12, x12, x5\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x8, x8, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x7\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x8, x8, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x10\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x8, x8, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v5.2d, v5.2d, v9.2d\n\t"
        "add	x4, x4, x8\n\t"
        "add	x8, x8, x14\n\t"
        /* Round 12 */
        "mov	x13, v6.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x4, #14\n\t"
        "ror	x14, x8, #28\n\t"
        "eor	x12, x12, x4, ror 18\n\t"
        "eor	x14, x14, x8, ror 34\n\t"
        "eor	x12, x12, x4, ror 41\n\t"
        "eor	x14, x14, x8, ror 39\n\t"
        "add	x7, x7, x12\n\t"
        "eor	x17, x8, x9\n\t"
        "eor	x12, x5, x6\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x4\n\t"
        "add	x7, x7, x13\n\t"
        "eor	x12, x12, x6\n\t"
        "add	x7, x7, x15\n\t"
        "eor	x16, x16, x9\n\t"
        "add	x7, x7, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x11, x11, x7\n\t"
        "add	x7, x7, x14\n\t"
        /* Round 13 */
        "mov	x13, v6.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v6.16b, v7.16b, #8\n\t"
        "ror	x12, x11, #14\n\t"
        "shl	v8.2d, v5.2d, #45\n\t"
        "ror	x14, x7, #28\n\t"
        "sri	v8.2d, v5.2d, #19\n\t"
        "eor	x12, x12, x11, ror 18\n\t"
        "shl	v9.2d, v5.2d, #3\n\t"
        "eor	x14, x14, x7, ror 34\n\t"
        "sri	v9.2d, v5.2d, #61\n\t"
        "eor	x12, x12, x11, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x7, ror 39\n\t"
        "ushr	v8.2d, v5.2d, #6\n\t"
        "add	x6, x6, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x7, x8\n\t"
        "add	v6.2d, v6.2d, v9.2d\n\t"
        "eor	x12, x4, x5\n\t"
        "ext	v9.16b, v2.16b, v3.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v6.2d, v6.2d, v9.2d\n\t"
        "and	x12, x12, x11\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x6, x6, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x5\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x6, x6, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x8\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x6, x6, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v6.2d, v6.2d, v9.2d\n\t"
        "add	x10, x10, x6\n\t"
        "add	x6, x6, x14\n\t"
        /* Round 14 */
        "mov	x13, v7.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x10, #14\n\t"
        "ror	x14, x6, #28\n\t"
        "eor	x12, x12, x10, ror 18\n\t"
        "eor	x14, x14, x6, ror 34\n\t"
        "eor	x12, x12, x10, ror 41\n\t"
        "eor	x14, x14, x6, ror 39\n\t"
        "add	x5, x5, x12\n\t"
        "eor	x17, x6, x7\n\t"
        "eor	x12, x11, x4\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x10\n\t"
        "add	x5, x5, x13\n\t"
        "eor	x12, x12, x4\n\t"
        "add	x5, x5, x15\n\t"
        "eor	x16, x16, x7\n\t"
        "add	x5, x5, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x9, x9, x5\n\t"
        "add	x5, x5, x14\n\t"
        /* Round 15 */
        "mov	x13, v7.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ext	v10.16b, v7.16b, v0.16b, #8\n\t"
        "ror	x12, x9, #14\n\t"
        "shl	v8.2d, v6.2d, #45\n\t"
        "ror	x14, x5, #28\n\t"
        "sri	v8.2d, v6.2d, #19\n\t"
        "eor	x12, x12, x9, ror 18\n\t"
        "shl	v9.2d, v6.2d, #3\n\t"
        "eor	x14, x14, x5, ror 34\n\t"
        "sri	v9.2d, v6.2d, #61\n\t"
        "eor	x12, x12, x9, ror 41\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x14, x14, x5, ror 39\n\t"
        "ushr	v8.2d, v6.2d, #6\n\t"
        "add	x4, x4, x12\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x16, x5, x6\n\t"
        "add	v7.2d, v7.2d, v9.2d\n\t"
        "eor	x12, x10, x11\n\t"
        "ext	v9.16b, v3.16b, v4.16b, #8\n\t"
        "and	x17, x16, x17\n\t"
        "add	v7.2d, v7.2d, v9.2d\n\t"
        "and	x12, x12, x9\n\t"
        "shl	v8.2d, v10.2d, #63\n\t"
        "add	x4, x4, x13\n\t"
        "sri	v8.2d, v10.2d, #1\n\t"
        "eor	x12, x12, x11\n\t"
        "tbl	v9.16b, {v10.16b}, v11.16b\n\t"
        "add	x4, x4, x15\n\t"
        "eor	v9.16b, v9.16b, v8.16b\n\t"
        "eor	x17, x17, x6\n\t"
        "ushr	v10.2d, v10.2d, #7\n\t"
        "add	x4, x4, x12\n\t"
        "eor	v9.16b, v9.16b, v10.16b\n\t"
        "add	x14, x14, x17\n\t"
        "add	v7.2d, v7.2d, v9.2d\n\t"
        "add	x8, x8, x4\n\t"
        "add	x4, x4, x14\n\t"
        "subs	x27, x27, #1\n\t"
        "bne	L_sha512_len_neon_start_%=\n\t"
        /* Round 0 */
        "mov	x13, v0.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x8, #14\n\t"
        "ror	x14, x4, #28\n\t"
        "eor	x12, x12, x8, ror 18\n\t"
        "eor	x14, x14, x4, ror 34\n\t"
        "eor	x12, x12, x8, ror 41\n\t"
        "eor	x14, x14, x4, ror 39\n\t"
        "add	x11, x11, x12\n\t"
        "eor	x17, x4, x5\n\t"
        "eor	x12, x9, x10\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x8\n\t"
        "add	x11, x11, x13\n\t"
        "eor	x12, x12, x10\n\t"
        "add	x11, x11, x15\n\t"
        "eor	x16, x16, x5\n\t"
        "add	x11, x11, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x7, x7, x11\n\t"
        "add	x11, x11, x14\n\t"
        /* Round 1 */
        "mov	x13, v0.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x7, #14\n\t"
        "ror	x14, x11, #28\n\t"
        "eor	x12, x12, x7, ror 18\n\t"
        "eor	x14, x14, x11, ror 34\n\t"
        "eor	x12, x12, x7, ror 41\n\t"
        "eor	x14, x14, x11, ror 39\n\t"
        "add	x10, x10, x12\n\t"
        "eor	x16, x11, x4\n\t"
        "eor	x12, x8, x9\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x7\n\t"
        "add	x10, x10, x13\n\t"
        "eor	x12, x12, x9\n\t"
        "add	x10, x10, x15\n\t"
        "eor	x17, x17, x4\n\t"
        "add	x10, x10, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x6, x6, x10\n\t"
        "add	x10, x10, x14\n\t"
        /* Round 2 */
        "mov	x13, v1.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x6, #14\n\t"
        "ror	x14, x10, #28\n\t"
        "eor	x12, x12, x6, ror 18\n\t"
        "eor	x14, x14, x10, ror 34\n\t"
        "eor	x12, x12, x6, ror 41\n\t"
        "eor	x14, x14, x10, ror 39\n\t"
        "add	x9, x9, x12\n\t"
        "eor	x17, x10, x11\n\t"
        "eor	x12, x7, x8\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x6\n\t"
        "add	x9, x9, x13\n\t"
        "eor	x12, x12, x8\n\t"
        "add	x9, x9, x15\n\t"
        "eor	x16, x16, x11\n\t"
        "add	x9, x9, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x5, x5, x9\n\t"
        "add	x9, x9, x14\n\t"
        /* Round 3 */
        "mov	x13, v1.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x5, #14\n\t"
        "ror	x14, x9, #28\n\t"
        "eor	x12, x12, x5, ror 18\n\t"
        "eor	x14, x14, x9, ror 34\n\t"
        "eor	x12, x12, x5, ror 41\n\t"
        "eor	x14, x14, x9, ror 39\n\t"
        "add	x8, x8, x12\n\t"
        "eor	x16, x9, x10\n\t"
        "eor	x12, x6, x7\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x5\n\t"
        "add	x8, x8, x13\n\t"
        "eor	x12, x12, x7\n\t"
        "add	x8, x8, x15\n\t"
        "eor	x17, x17, x10\n\t"
        "add	x8, x8, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x4, x4, x8\n\t"
        "add	x8, x8, x14\n\t"
        /* Round 4 */
        "mov	x13, v2.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x4, #14\n\t"
        "ror	x14, x8, #28\n\t"
        "eor	x12, x12, x4, ror 18\n\t"
        "eor	x14, x14, x8, ror 34\n\t"
        "eor	x12, x12, x4, ror 41\n\t"
        "eor	x14, x14, x8, ror 39\n\t"
        "add	x7, x7, x12\n\t"
        "eor	x17, x8, x9\n\t"
        "eor	x12, x5, x6\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x4\n\t"
        "add	x7, x7, x13\n\t"
        "eor	x12, x12, x6\n\t"
        "add	x7, x7, x15\n\t"
        "eor	x16, x16, x9\n\t"
        "add	x7, x7, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x11, x11, x7\n\t"
        "add	x7, x7, x14\n\t"
        /* Round 5 */
        "mov	x13, v2.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x11, #14\n\t"
        "ror	x14, x7, #28\n\t"
        "eor	x12, x12, x11, ror 18\n\t"
        "eor	x14, x14, x7, ror 34\n\t"
        "eor	x12, x12, x11, ror 41\n\t"
        "eor	x14, x14, x7, ror 39\n\t"
        "add	x6, x6, x12\n\t"
        "eor	x16, x7, x8\n\t"
        "eor	x12, x4, x5\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x11\n\t"
        "add	x6, x6, x13\n\t"
        "eor	x12, x12, x5\n\t"
        "add	x6, x6, x15\n\t"
        "eor	x17, x17, x8\n\t"
        "add	x6, x6, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x10, x10, x6\n\t"
        "add	x6, x6, x14\n\t"
        /* Round 6 */
        "mov	x13, v3.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x10, #14\n\t"
        "ror	x14, x6, #28\n\t"
        "eor	x12, x12, x10, ror 18\n\t"
        "eor	x14, x14, x6, ror 34\n\t"
        "eor	x12, x12, x10, ror 41\n\t"
        "eor	x14, x14, x6, ror 39\n\t"
        "add	x5, x5, x12\n\t"
        "eor	x17, x6, x7\n\t"
        "eor	x12, x11, x4\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x10\n\t"
        "add	x5, x5, x13\n\t"
        "eor	x12, x12, x4\n\t"
        "add	x5, x5, x15\n\t"
        "eor	x16, x16, x7\n\t"
        "add	x5, x5, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x9, x9, x5\n\t"
        "add	x5, x5, x14\n\t"
        /* Round 7 */
        "mov	x13, v3.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x9, #14\n\t"
        "ror	x14, x5, #28\n\t"
        "eor	x12, x12, x9, ror 18\n\t"
        "eor	x14, x14, x5, ror 34\n\t"
        "eor	x12, x12, x9, ror 41\n\t"
        "eor	x14, x14, x5, ror 39\n\t"
        "add	x4, x4, x12\n\t"
        "eor	x16, x5, x6\n\t"
        "eor	x12, x10, x11\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x9\n\t"
        "add	x4, x4, x13\n\t"
        "eor	x12, x12, x11\n\t"
        "add	x4, x4, x15\n\t"
        "eor	x17, x17, x6\n\t"
        "add	x4, x4, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x8, x8, x4\n\t"
        "add	x4, x4, x14\n\t"
        /* Round 8 */
        "mov	x13, v4.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x8, #14\n\t"
        "ror	x14, x4, #28\n\t"
        "eor	x12, x12, x8, ror 18\n\t"
        "eor	x14, x14, x4, ror 34\n\t"
        "eor	x12, x12, x8, ror 41\n\t"
        "eor	x14, x14, x4, ror 39\n\t"
        "add	x11, x11, x12\n\t"
        "eor	x17, x4, x5\n\t"
        "eor	x12, x9, x10\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x8\n\t"
        "add	x11, x11, x13\n\t"
        "eor	x12, x12, x10\n\t"
        "add	x11, x11, x15\n\t"
        "eor	x16, x16, x5\n\t"
        "add	x11, x11, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x7, x7, x11\n\t"
        "add	x11, x11, x14\n\t"
        /* Round 9 */
        "mov	x13, v4.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x7, #14\n\t"
        "ror	x14, x11, #28\n\t"
        "eor	x12, x12, x7, ror 18\n\t"
        "eor	x14, x14, x11, ror 34\n\t"
        "eor	x12, x12, x7, ror 41\n\t"
        "eor	x14, x14, x11, ror 39\n\t"
        "add	x10, x10, x12\n\t"
        "eor	x16, x11, x4\n\t"
        "eor	x12, x8, x9\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x7\n\t"
        "add	x10, x10, x13\n\t"
        "eor	x12, x12, x9\n\t"
        "add	x10, x10, x15\n\t"
        "eor	x17, x17, x4\n\t"
        "add	x10, x10, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x6, x6, x10\n\t"
        "add	x10, x10, x14\n\t"
        /* Round 10 */
        "mov	x13, v5.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x6, #14\n\t"
        "ror	x14, x10, #28\n\t"
        "eor	x12, x12, x6, ror 18\n\t"
        "eor	x14, x14, x10, ror 34\n\t"
        "eor	x12, x12, x6, ror 41\n\t"
        "eor	x14, x14, x10, ror 39\n\t"
        "add	x9, x9, x12\n\t"
        "eor	x17, x10, x11\n\t"
        "eor	x12, x7, x8\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x6\n\t"
        "add	x9, x9, x13\n\t"
        "eor	x12, x12, x8\n\t"
        "add	x9, x9, x15\n\t"
        "eor	x16, x16, x11\n\t"
        "add	x9, x9, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x5, x5, x9\n\t"
        "add	x9, x9, x14\n\t"
        /* Round 11 */
        "mov	x13, v5.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x5, #14\n\t"
        "ror	x14, x9, #28\n\t"
        "eor	x12, x12, x5, ror 18\n\t"
        "eor	x14, x14, x9, ror 34\n\t"
        "eor	x12, x12, x5, ror 41\n\t"
        "eor	x14, x14, x9, ror 39\n\t"
        "add	x8, x8, x12\n\t"
        "eor	x16, x9, x10\n\t"
        "eor	x12, x6, x7\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x5\n\t"
        "add	x8, x8, x13\n\t"
        "eor	x12, x12, x7\n\t"
        "add	x8, x8, x15\n\t"
        "eor	x17, x17, x10\n\t"
        "add	x8, x8, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x4, x4, x8\n\t"
        "add	x8, x8, x14\n\t"
        /* Round 12 */
        "mov	x13, v6.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x4, #14\n\t"
        "ror	x14, x8, #28\n\t"
        "eor	x12, x12, x4, ror 18\n\t"
        "eor	x14, x14, x8, ror 34\n\t"
        "eor	x12, x12, x4, ror 41\n\t"
        "eor	x14, x14, x8, ror 39\n\t"
        "add	x7, x7, x12\n\t"
        "eor	x17, x8, x9\n\t"
        "eor	x12, x5, x6\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x4\n\t"
        "add	x7, x7, x13\n\t"
        "eor	x12, x12, x6\n\t"
        "add	x7, x7, x15\n\t"
        "eor	x16, x16, x9\n\t"
        "add	x7, x7, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x11, x11, x7\n\t"
        "add	x7, x7, x14\n\t"
        /* Round 13 */
        "mov	x13, v6.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x11, #14\n\t"
        "ror	x14, x7, #28\n\t"
        "eor	x12, x12, x11, ror 18\n\t"
        "eor	x14, x14, x7, ror 34\n\t"
        "eor	x12, x12, x11, ror 41\n\t"
        "eor	x14, x14, x7, ror 39\n\t"
        "add	x6, x6, x12\n\t"
        "eor	x16, x7, x8\n\t"
        "eor	x12, x4, x5\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x11\n\t"
        "add	x6, x6, x13\n\t"
        "eor	x12, x12, x5\n\t"
        "add	x6, x6, x15\n\t"
        "eor	x17, x17, x8\n\t"
        "add	x6, x6, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x10, x10, x6\n\t"
        "add	x6, x6, x14\n\t"
        /* Round 14 */
        "mov	x13, v7.d[0]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x10, #14\n\t"
        "ror	x14, x6, #28\n\t"
        "eor	x12, x12, x10, ror 18\n\t"
        "eor	x14, x14, x6, ror 34\n\t"
        "eor	x12, x12, x10, ror 41\n\t"
        "eor	x14, x14, x6, ror 39\n\t"
        "add	x5, x5, x12\n\t"
        "eor	x17, x6, x7\n\t"
        "eor	x12, x11, x4\n\t"
        "and	x16, x17, x16\n\t"
        "and	x12, x12, x10\n\t"
        "add	x5, x5, x13\n\t"
        "eor	x12, x12, x4\n\t"
        "add	x5, x5, x15\n\t"
        "eor	x16, x16, x7\n\t"
        "add	x5, x5, x12\n\t"
        "add	x14, x14, x16\n\t"
        "add	x9, x9, x5\n\t"
        "add	x5, x5, x14\n\t"
        /* Round 15 */
        "mov	x13, v7.d[1]\n\t"
        "ldr	x15, [x3], #8\n\t"
        "ror	x12, x9, #14\n\t"
        "ror	x14, x5, #28\n\t"
        "eor	x12, x12, x9, ror 18\n\t"
        "eor	x14, x14, x5, ror 34\n\t"
        "eor	x12, x12, x9, ror 41\n\t"
        "eor	x14, x14, x5, ror 39\n\t"
        "add	x4, x4, x12\n\t"
        "eor	x16, x5, x6\n\t"
        "eor	x12, x10, x11\n\t"
        "and	x17, x16, x17\n\t"
        "and	x12, x12, x9\n\t"
        "add	x4, x4, x13\n\t"
        "eor	x12, x12, x11\n\t"
        "add	x4, x4, x15\n\t"
        "eor	x17, x17, x6\n\t"
        "add	x4, x4, x12\n\t"
        "add	x14, x14, x17\n\t"
        "add	x8, x8, x4\n\t"
        "add	x4, x4, x14\n\t"
        "add	x11, x11, x26\n\t"
        "add	x10, x10, x25\n\t"
        "add	x9, x9, x24\n\t"
        "add	x8, x8, x23\n\t"
        "add	x7, x7, x22\n\t"
        "add	x6, x6, x21\n\t"
        "add	x5, x5, x20\n\t"
        "add	x4, x4, x19\n\t"
#ifndef __APPLE__
        "adr	x3, %[L_SHA512_transform_neon_len_k]\n\t"
#else
        "adrp	x3, %[L_SHA512_transform_neon_len_k]@PAGE\n\t"
        "add	x3, x3, %[L_SHA512_transform_neon_len_k]@PAGEOFF\n\t"
#endif /* __APPLE__ */
        "subs	%w[len], %w[len], #0x80\n\t"
        "bne	L_sha512_len_neon_begin_%=\n\t"
        "stp	x4, x5, [%x[sha512]]\n\t"
        "stp	x6, x7, [%x[sha512], #16]\n\t"
        "stp	x8, x9, [%x[sha512], #32]\n\t"
        "stp	x10, x11, [%x[sha512], #48]\n\t"
        "ldp	x29, x30, [sp], #16\n\t"
        : [sha512] "+r" (sha512), [data] "+r" (data), [len] "+r" (len)
        : [L_SHA512_transform_neon_len_k] "S" (L_SHA512_transform_neon_len_k), [L_SHA512_transform_neon_len_ror8] "S" (L_SHA512_transform_neon_len_ror8)
        : "memory", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11"
    );
}

#endif /* __aarch64__ */
#endif /* WOLFSSL_ARMASM */
