arch/arm64/crypto/ghash-ce-core.S

   1 /*
   2  * Accelerated GHASH implementation with ARMv8 PMULL instructions.
   3  *
   4  * Copyright (C) 2014 Linaro Ltd. <ard.biesheuvel@linaro.org>
   5  *
   6  * This program is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 as published
   8  * by the Free Software Foundation.
   9  */
  10
  11 #include <linux/linkage.h>
  12 #include <asm/assembler.h>
  13
  14         SHASH   .req    v0
  15         SHASH2  .req    v1
  16         T1      .req    v2
  17         T2      .req    v3
  18         MASK    .req    v4
  19         XL      .req    v5
  20         XM      .req    v6
  21         XH      .req    v7
  22         IN1     .req    v7
  23
  24         .text
  25         .arch           armv8-a+crypto
  26
  27         /*
  28          * void pmull_ghash_update(int blocks, u64 dg[], const char *src,
  29          *                         struct ghash_key const *k, const char *head)
  30          */
  31 ENTRY(pmull_ghash_update)
  32         ld1             {SHASH.16b}, [x3]
  33         ld1             {XL.16b}, [x1]
  34         movi            MASK.16b, #0xe1
  35         ext             SHASH2.16b, SHASH.16b, SHASH.16b, #8
  36         shl             MASK.2d, MASK.2d, #57
  37         eor             SHASH2.16b, SHASH2.16b, SHASH.16b
  38
  39         /* do the head block first, if supplied */
  40         cbz             x4, 0f
  41         ld1             {T1.2d}, [x4]
  42         b               1f
  43
  44 0:      ld1             {T1.2d}, [x2], #16
  45         sub             w0, w0, #1
  46
  47 1:      /* multiply XL by SHASH in GF(2^128) */
  48 CPU_LE( rev64           T1.16b, T1.16b  )
  49
  50         ext             T2.16b, XL.16b, XL.16b, #8
  51         ext             IN1.16b, T1.16b, T1.16b, #8
  52         eor             T1.16b, T1.16b, T2.16b
  53         eor             XL.16b, XL.16b, IN1.16b
  54
  55         pmull2          XH.1q, SHASH.2d, XL.2d          // a1 * b1
  56         eor             T1.16b, T1.16b, XL.16b
  57         pmull           XL.1q, SHASH.1d, XL.1d          // a0 * b0
  58         pmull           XM.1q, SHASH2.1d, T1.1d         // (a1 + a0)(b1 + b0)
  59
  60         ext             T1.16b, XL.16b, XH.16b, #8
  61         eor             T2.16b, XL.16b, XH.16b
  62         eor             XM.16b, XM.16b, T1.16b
  63         eor             XM.16b, XM.16b, T2.16b
  64         pmull           T2.1q, XL.1d, MASK.1d
  65
  66         mov             XH.d[0], XM.d[1]
  67         mov             XM.d[1], XL.d[0]
  68
  69         eor             XL.16b, XM.16b, T2.16b
  70         ext             T2.16b, XL.16b, XL.16b, #8
  71         pmull           XL.1q, XL.1d, MASK.1d
  72         eor             T2.16b, T2.16b, XH.16b
  73         eor             XL.16b, XL.16b, T2.16b
  74
  75         cbnz            w0, 0b
  76
  77         st1             {XL.16b}, [x1]
  78         ret
  79 ENDPROC(pmull_ghash_update)