/// -*- mode: asm; asm-comment-char: ?/ -*- /// /// GCM acceleration for x86 processors /// /// (c) 2018 Straylight/Edgeware /// ///----- Licensing notice --------------------------------------------------- /// /// This file is part of Catacomb. /// /// Catacomb is free software: you can redistribute it and/or modify it /// under the terms of the GNU Library General Public License as published /// by the Free Software Foundation; either version 2 of the License, or /// (at your option) any later version. /// /// Catacomb is distributed in the hope that it will be useful, but /// WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU /// Library General Public License for more details. /// /// You should have received a copy of the GNU Library General Public /// License along with Catacomb. If not, write to the Free Software /// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, /// USA. ///-------------------------------------------------------------------------- /// Preliminaries. #include "config.h" #include "asm-common.h" .arch .pclmul .text ///-------------------------------------------------------------------------- /// Common register allocation. #if CPUFAM_X86 # define A eax # define K edx #elif CPUFAM_AMD64 && ABI_SYSV # define A rdi # define K rsi #elif CPUFAM_AMD64 && ABI_WIN # define A rcx # define K rdx #endif ///-------------------------------------------------------------------------- /// Multiplication macros. // The good news is that we have a fancy instruction to do the // multiplications. The bad news is that it's not particularly well- // suited to the job. // // For one thing, it only does a 64-bit multiplication, so in general // we'll need to synthesize the full-width multiply by hand. For // another thing, it doesn't help with the reduction, so we have to // do that by hand too. And, finally, GCM has crazy bit ordering, // and the instruction does nothing useful for that at all. // // Focusing on that last problem first: the bits aren't in monotonic // significance order unless we permute them. If we reverse the byte // order, then we'll have the bits in monotonic order, but backwards, // so the degree-0 coefficient will be in the most-significant bit. // // This is less of a difficulty than it seems at first, because // algebra. Suppose we are given u = SUM_{0<=i