/*************************************************
* Lowest Level MPI Algorithms Header File        *
* (C) 1999-2006 The Botan Project                *
*************************************************/

#ifndef BOTAN_MP_ASM_INTERNAL_H__
#define BOTAN_MP_ASM_INTERNAL_H__

#include <botan/mp_asm.h>

namespace Botan {

extern "C" {

/*************************************************
* Word Addition                                  *
*************************************************/
inline word word_add(word x, word y, word* carry)
   {
   asm(
        "rorq %[carry]                 \n\t" //x+=carry
        "adcq %[y],%[x]                \n\t" //x+=y
        "sbbq %[carry],%[carry]        \n\t"
        "negq %[carry]                 \n\t"
        : [x]"=r"(x),
          [carry]"=r"(*carry)    // *carry is changed
        : "0"(x),                // x is input and output on the same register
          [y]"rm"(y),            // y is a register or memory
          "1"(*carry)
        : "cc");
   return x;
   }

/*************************************************
* Eight Word Block Addition, Two Argument        *
*************************************************/
inline word word8_add2(word x[8], const word y[8], word carry)
   {
   asm(
        "rorq %[carry]                           \n\t"
        "movq (%[y]), %[carry]                   \n\t"
        "adcq %[carry], 0(%[x])                  \n\t"
        "movq 4*2(%[y]), %[carry]                \n\t"
        "adcq %[carry], 4*2(%[x])                \n\t"
        "movq 8*2(%[y]), %[carry]                \n\t"
        "adcq %[carry], 8*2(%[x])                \n\t"
        "movq 12*2(%[y]), %[carry]               \n\t"
        "adcq %[carry], 12*2(%[x])               \n\t"
        "movq 16*2(%[y]), %[carry]               \n\t"
        "adcq %[carry], 16*2(%[x])               \n\t"
        "movq 20*2(%[y]), %[carry]               \n\t"
        "adcq %[carry], 20*2(%[x])               \n\t"
        "movq 24*2(%[y]), %[carry]               \n\t"
        "adcq %[carry], 24*2(%[x])               \n\t"
        "movq 28*2(%[y]), %[carry]               \n\t"
        "adcq %[carry], 28*2(%[x])               \n\t"
        "sbbq %[carry], %[carry]                 \n\t"
        "negq %[carry]                           \n\t"
        : [carry]"=r"(carry)         // *carry is changed
        : [x]"r"(x),                 // x is a register
          [y]"r"(y),                 // y is a register
          "0"(carry)
        : "cc", "memory");
   return carry;
   }

/*************************************************
* Eight Word Block Addition, Three Argument      *
*************************************************/
inline word word8_add3(word z[8], const word x[8], const word y[8], word carry)
   {
   asm(
        "rorq %[carry] \n\t"
        "movq (%[y]), %[carry]               \n\t"
        "adcq (%[x]), %[carry]               \n\t"
        "movq %[carry], (%[z])               \n\t"
        "movq 4*2(%[y]), %[carry]               \n\t"
        "adcq 4*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 4*2(%[z])               \n\t"
        "movq 8*2(%[y]), %[carry]               \n\t"
        "adcq 8*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 8*2(%[z])               \n\t"
        "movq 12*2(%[y]), %[carry]               \n\t"
        "adcq 12*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 12*2(%[z])               \n\t"
        "movq 16*2(%[y]), %[carry]               \n\t"
        "adcq 16*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 16*2(%[z])               \n\t"
        "movq 20*2(%[y]), %[carry]               \n\t"
        "adcq 20*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 20*2(%[z])               \n\t"
        "movq 24*2(%[y]), %[carry]               \n\t"
        "adcq 24*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 24*2(%[z])               \n\t"
        "movq 28*2(%[y]), %[carry]               \n\t"
        "adcq 28*2(%[x]), %[carry]               \n\t"
        "movq %[carry], 28*2(%[z])               \n\t"
        "sbbq %[carry], %[carry]          \n\t"
        "negq %[carry] \n\t"
        : [carry]"=r"(carry)         //*carry is changed
        : [x]"r"(x),                 //x is a register
          [y]"r"(y),                 //y is a register
          [z]"r"(z),                 //y is a register
          "0"(carry)         //
        : "cc", "memory");
   return carry;
   }

/*************************************************
* Word Subtraction                               *
*************************************************/
inline word word_sub(word x, word y, word* carry)
   {
   asm(
        "rorq %[carry]                 \n\t" //carry->CF
        "sbbq %[y], %[x]                \n\t" //x+=y+carry
        "sbbq %[carry], %[carry]        \n\t"
        "negq %[carry]                 \n\t"
        : [x]"=r"(x), 
          [carry]"=r"(*carry)     //*carry is changed
        : "0"(x),                 //x is input and output on the same register
          [y]"rm"(y),             //y is a register or memory
          "1"(*carry)             //
        : "cc");
   return x;
   }

/*************************************************
* Four Word Block Subtraction, Two Argument      *
*************************************************/
inline word word8_sub2(word x[8], const word y[8], word carry)
   {
   asm(
        "rorq %[carry] \n\t"
        "movq (%[y]), %[carry]               \n\t"
        "sbbq %[carry], 0(%[x])               \n\t"
        "movq 4*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 4*2(%[x])               \n\t"
        "movq 8*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 8*2(%[x])               \n\t"
        "movq 12*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 12*2(%[x])               \n\t"
        "movq 16*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 16*2(%[x])               \n\t"
        "movq 20*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 20*2(%[x])               \n\t"
        "movq 24*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 24*2(%[x])               \n\t"
        "movq 28*2(%[y]), %[carry]               \n\t"
        "sbbq %[carry], 28*2(%[x])               \n\t"
        "sbbq %[carry], %[carry]          \n\t"
        "negq %[carry] \n\t"
        : [carry]"=r"(carry)         //*carry is changed
        : [x]"r"(x),                 //x is a register
          [y]"r"(y),                 //y is a register
          "0"(carry)         //
        : "cc", "memory");
   return carry;
   }

/*************************************************
* Four Word Block Subtraction, Three Argument    *
*************************************************/
inline word word8_sub3(word z[8], const word x[8],const word y[8], word carry)
   {
   asm(
        "rorq %[carry] \n\t"
        "movq (%[x]), %[carry]               \n\t"
        "sbbq (%[y]), %[carry]               \n\t"
        "movq %[carry], (%[z])               \n\t"
        "movq 4*2(%[x]), %[carry]               \n\t"
        "sbbq 4*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 4*2(%[z])               \n\t"
        "movq 8*2(%[x]), %[carry]               \n\t"
        "sbbq 8*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 8*2(%[z])               \n\t"
        "movq 12*2(%[x]), %[carry]               \n\t"
        "sbbq 12*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 12*2(%[z])               \n\t"

        "movq 16*2(%[x]), %[carry]               \n\t"
        "sbbq 16*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 16*2(%[z])               \n\t"
        "movq 20*2(%[x]), %[carry]               \n\t"
        "sbbq 20*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 20*2(%[z])               \n\t"
        "movq 24*2(%[x]), %[carry]               \n\t"
        "sbbq 24*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 24*2(%[z])               \n\t"
        "movq 28*2(%[x]), %[carry]               \n\t"
        "sbbq 28*2(%[y]), %[carry]               \n\t"
        "movq %[carry], 28*2(%[z])               \n\t"

        "sbbq %[carry], %[carry]          \n\t"
        "negq %[carry] \n\t"
        : [carry]"=r"(carry)         //*carry is changed
        : [x]"r"(x),                 //x is a register
          [y]"r"(y),                 //y is a register
          [z]"r"(z),                 //y is a register
          "0"(carry)         //
        : "cc", "memory");
   return carry;
   }

/*************************************************
* Eight Word Block Linear Multiplication         *
*************************************************/
inline word word8_linmul2(word x[8], word y, word carry)
   {
    asm(
        "movq (%[x]), %%rax       \n\t" //eax = x[0]
        "mulq %[y]               \n\t" //edx:eax = x[0]*y
        "addq %[carry], %%rax     \n\t" //edx:eax+=carry (lo)
        "adcq $0, %%rdx           \n\t" //edx:eax+=carry (hi)
        "movq %%rdx, %[carry]     \n\t" //update carry
        "movq %%rax, (%[x])       \n\t" //store z[0]

        "movq 4*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 4*2(%[x])      \n\t"

        "movq 8*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 8*2(%[x])      \n\t"

        "movq 12*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 12*2(%[x])      \n\t"

        "movq 16*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 16*2(%[x])      \n\t"

        "movq 20*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 20*2(%[x])      \n\t"

        "movq 24*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 24*2(%[x])      \n\t"

        "movq 28*2(%[x]), %%rax     \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 28*2(%[x])     \n\t"
        : [carry]"=r"(carry)
        : [x]"r"(x), [y]"m"(y), "0"(carry)
        : "cc", "%eax", "%edx");
   return carry;
   }

/*************************************************
* Eight Word Block Linear Multiplication          *
*************************************************/
inline word word8_linmul3(word z[8], const word x[8], word y, word carry)
   {
    asm(
        "movq (%[x]), %%rax       \n\t" //eax = x[0]
        "mulq %[y]               \n\t" //edx:eax = x[0]*y
        "addq %[carry], %%rax     \n\t" //edx:eax+=carry (lo)
        "adcq $0, %%rdx           \n\t" //edx:eax+=carry (hi)
        "movq %%rdx, %[carry]     \n\t" //update carry
        "movq %%rax, (%[z])       \n\t" //store z[0]

        "movq 4*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 4*2(%[z])      \n\t"

        "movq 8*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 8*2(%[z])      \n\t"

        "movq 12*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 12*2(%[z])      \n\t"

        "movq 16*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 16*2(%[z])      \n\t"

        "movq 20*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 20*2(%[z])      \n\t"

        "movq 24*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 24*2(%[z])      \n\t"

        "movq 28*2(%[x]), %%rax     \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 28*2(%[z])     \n\t"
        : [carry]"=r"(carry)
        : [z]"r"(z), [x]"r"(x), [y]"m"(y), "0"(carry)
        : "cc", "%eax", "%edx");
   return carry;
   }

/*************************************************
* Eight Word Block Multiply/Add                  *
*************************************************/
inline word word8_madd3(word z[8], const word x[8], word y, word carry)
   {
    asm(
        "movq (%[x]), %%rax       \n\t" //rax = x[0]
        "mulq %[y]               \n\t" //rdx:rax = x[0]*y
        "addq %[carry], %%rax     \n\t" //rdx:rax+=carry (lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry (hi)
        "addq (%[z]), %%rax     \n\t" //rdx:rax+=carry (lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry (hi)
        "movq %%rdx, %[carry]     \n\t" //update carry
        "movq %%rax, (%[z])       \n\t" //store z[0]

        "movq 4*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 4*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 4*2(%[z])      \n\t"

        "movq 8*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 8*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 8*2(%[z])      \n\t"

        "movq 12*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 12*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 12*2(%[z])      \n\t"

        "movq 16*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 16*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 16*2(%[z])      \n\t"

        "movq 20*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 20*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 20*2(%[z])      \n\t"

        "movq 24*2(%[x]), %%rax      \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 24*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 24*2(%[z])      \n\t"

        "movq 28*2(%[x]), %%rax     \n\t"
        "mulq %[y]               \n\t"
        "addq %[carry], %%rax     \n\t"
        "adcq $0, %%rdx           \n\t"
        "addq 28*2(%[z]), %%rax     \n\t" //rdx:rax+=carry *2(lo)
        "adcq $0, %%rdx           \n\t" //rdx:rax+=carry *2(hi)
        "movq %%rdx, %[carry]     \n\t"
        "movq %%rax, 28*2(%[z])     \n\t"
        : [carry]"=r"(carry)
        : [z]"r"(z), [x]"r"(x), [y]"r"(y), "0"(carry)
        : "cc", "%rax", "%rdx");
   return carry;
   }

}

}

#endif
