crypto: x86/twofish-avx - use optimized XTS code
authorJussi Kivilinna <jussi.kivilinna@iki.fi>
Mon, 8 Apr 2013 18:51:00 +0000 (21:51 +0300)
committerHerbert Xu <herbert@gondor.apana.org.au>
Thu, 25 Apr 2013 13:01:51 +0000 (21:01 +0800)
Change twofish-avx to use the new XTS code, for smaller stack usage and small
boost to performance.

tcrypt results, with Intel i5-2450M:
        enc     dec
16B     1.03x   1.02x
64B     0.91x   0.91x
256B    1.10x   1.09x
1024B   1.12x   1.11x
8192B   1.12x   1.11x

Since XTS is practically always used with data blocks of size 512 bytes or
more, I chose to not make use of twofish-3way for block sized smaller than
128 bytes. This causes slower result in tcrypt for 64 bytes.

Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/x86/crypto/twofish-avx-x86_64-asm_64.S
arch/x86/crypto/twofish_avx_glue.c

index 8d3e113b2c95cc7ded467bbea21ca6180e78a307..05058134c443176ec717cbdec6eb50235530fb03 100644 (file)
@@ -4,7 +4,7 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
- * Copyright © 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -33,6 +33,8 @@
 
 .Lbswap128_mask:
        .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.Lxts_gf128mul_and_shl1_mask:
+       .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
 
 .text
 
@@ -408,3 +410,47 @@ ENTRY(twofish_ctr_8way)
 
        ret;
 ENDPROC(twofish_ctr_8way)
+
+ENTRY(twofish_xts_enc_8way)
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
+        */
+
+       movq %rsi, %r11;
+
+       /* regs <= src, dst <= IVs, regs <= regs xor IVs */
+       load_xts_8way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2,
+                     RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
+
+       call __twofish_enc_blk8;
+
+       /* dst <= regs xor IVs(in dst) */
+       store_xts_8way(%r11, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2);
+
+       ret;
+ENDPROC(twofish_xts_enc_8way)
+
+ENTRY(twofish_xts_dec_8way)
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
+        */
+
+       movq %rsi, %r11;
+
+       /* regs <= src, dst <= IVs, regs <= regs xor IVs */
+       load_xts_8way(%rcx, %rdx, %rsi, RC1, RD1, RA1, RB1, RC2, RD2, RA2, RB2,
+                     RX0, RX1, RY0, .Lxts_gf128mul_and_shl1_mask);
+
+       call __twofish_dec_blk8;
+
+       /* dst <= regs xor IVs(in dst) */
+       store_xts_8way(%r11, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
+
+       ret;
+ENDPROC(twofish_xts_dec_8way)
index 94ac91d26e47e3b4ef415df06115d1c490b9f6a9..a62ba541884ef1a15da1082d9d2ca48296c563ec 100644 (file)
@@ -4,6 +4,8 @@
  * Copyright (C) 2012 Johannes Goetzfried
  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  *
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -56,12 +58,29 @@ asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
 asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
                                 const u8 *src, le128 *iv);
 
+asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
+                                    const u8 *src, le128 *iv);
+asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
+                                    const u8 *src, le128 *iv);
+
 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
                                        const u8 *src)
 {
        __twofish_enc_blk_3way(ctx, dst, src, false);
 }
 
+static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+{
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+                                 GLUE_FUNC_CAST(twofish_enc_blk));
+}
+
+static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
+{
+       glue_xts_crypt_128bit_one(ctx, dst, src, iv,
+                                 GLUE_FUNC_CAST(twofish_dec_blk));
+}
+
 
 static const struct common_glue_ctx twofish_enc = {
        .num_funcs = 3,
@@ -95,6 +114,19 @@ static const struct common_glue_ctx twofish_ctr = {
        } }
 };
 
+static const struct common_glue_ctx twofish_enc_xts = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
+       } }
+};
+
 static const struct common_glue_ctx twofish_dec = {
        .num_funcs = 3,
        .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
@@ -127,6 +159,19 @@ static const struct common_glue_ctx twofish_dec_cbc = {
        } }
 };
 
+static const struct common_glue_ctx twofish_dec_xts = {
+       .num_funcs = 2,
+       .fpu_blocks_limit = TWOFISH_PARALLEL_BLOCKS,
+
+       .funcs = { {
+               .num_blocks = TWOFISH_PARALLEL_BLOCKS,
+               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
+       }, {
+               .num_blocks = 1,
+               .fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
+       } }
+};
+
 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                       struct scatterlist *src, unsigned int nbytes)
 {
@@ -275,54 +320,20 @@ static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                       struct scatterlist *src, unsigned int nbytes)
 {
        struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[TWOFISH_PARALLEL_BLOCKS];
-       struct crypt_priv crypt_ctx = {
-               .ctx = &ctx->crypt_ctx,
-               .fpu_enabled = false,
-       };
-       struct xts_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .tweak_ctx = &ctx->tweak_ctx,
-               .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-               .crypt_ctx = &crypt_ctx,
-               .crypt_fn = encrypt_callback,
-       };
-       int ret;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       ret = xts_crypt(desc, dst, src, nbytes, &req);
-       twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-       return ret;
+       return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
+                                    XTS_TWEAK_CAST(twofish_enc_blk),
+                                    &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
 static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
                       struct scatterlist *src, unsigned int nbytes)
 {
        struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-       be128 buf[TWOFISH_PARALLEL_BLOCKS];
-       struct crypt_priv crypt_ctx = {
-               .ctx = &ctx->crypt_ctx,
-               .fpu_enabled = false,
-       };
-       struct xts_crypt_req req = {
-               .tbuf = buf,
-               .tbuflen = sizeof(buf),
-
-               .tweak_ctx = &ctx->tweak_ctx,
-               .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk),
-               .crypt_ctx = &crypt_ctx,
-               .crypt_fn = decrypt_callback,
-       };
-       int ret;
 
-       desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-       ret = xts_crypt(desc, dst, src, nbytes, &req);
-       twofish_fpu_end(crypt_ctx.fpu_enabled);
-
-       return ret;
+       return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
+                                    XTS_TWEAK_CAST(twofish_enc_blk),
+                                    &ctx->tweak_ctx, &ctx->crypt_ctx);
 }
 
 static struct crypto_alg twofish_algs[10] = { {