123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880 |
- /* FFdecsa -- fast decsa algorithm
- *
- * Copyright (C) 2003-2004 fatih89r
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-
- #include <sys/types.h>
- #include <string.h>
- #include <stdio.h>
- #include <stdlib.h>
-
- #include "FFdecsa.h"
-
- #ifndef NULL
- #define NULL 0
- #endif
-
- //#define DEBUG
- #ifdef DEBUG
- #define DBG(a) a
- #else
- #define DBG(a)
- #endif
-
- //// parallelization stuff, large speed differences are possible
- // possible choices
- #define PARALLEL_32_4CHAR 320
- #define PARALLEL_32_4CHARA 321
- #define PARALLEL_32_INT 322
- #define PARALLEL_64_8CHAR 640
- #define PARALLEL_64_8CHARA 641
- #define PARALLEL_64_2INT 642
- #define PARALLEL_64_LONG 643
- #define PARALLEL_64_MMX 644
- #define PARALLEL_128_16CHAR 1280
- #define PARALLEL_128_16CHARA 1281
- #define PARALLEL_128_4INT 1282
- #define PARALLEL_128_2LONG 1283
- #define PARALLEL_128_2MMX 1284
- #define PARALLEL_128_SSE 1285
- #define PARALLEL_128_SSE2 1286
-
- //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
- #ifndef PARALLEL_MODE
- #define PARALLEL_MODE PARALLEL_32_INT
- #endif
- //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
-
- #include "parallel_generic.h"
- //// conditionals
- #if PARALLEL_MODE==PARALLEL_32_4CHAR
- #include "parallel_032_4char.h"
- #elif PARALLEL_MODE==PARALLEL_32_4CHARA
- #include "parallel_032_4charA.h"
- #elif PARALLEL_MODE==PARALLEL_32_INT
- #include "parallel_032_int.h"
- #elif PARALLEL_MODE==PARALLEL_64_8CHAR
- #include "parallel_064_8char.h"
- #elif PARALLEL_MODE==PARALLEL_64_8CHARA
- #include "parallel_064_8charA.h"
- #elif PARALLEL_MODE==PARALLEL_64_2INT
- #include "parallel_064_2int.h"
- #elif PARALLEL_MODE==PARALLEL_64_LONG
- #include "parallel_064_long.h"
- #elif PARALLEL_MODE==PARALLEL_64_MMX
- #include "parallel_064_mmx.h"
- #elif PARALLEL_MODE==PARALLEL_128_16CHAR
- #include "parallel_128_16char.h"
- #elif PARALLEL_MODE==PARALLEL_128_16CHARA
- #include "parallel_128_16charA.h"
- #elif PARALLEL_MODE==PARALLEL_128_4INT
- #include "parallel_128_4int.h"
- #elif PARALLEL_MODE==PARALLEL_128_2LONG
- #include "parallel_128_2long.h"
- #elif PARALLEL_MODE==PARALLEL_128_2MMX
- #include "parallel_128_2mmx.h"
- #elif PARALLEL_MODE==PARALLEL_128_SSE
- #include "parallel_128_sse.h"
- #elif PARALLEL_MODE==PARALLEL_128_SSE2
- #include "parallel_128_sse2.h"
- #else
- #error "unknown/undefined parallel mode"
- #endif
-
- // stuff depending on conditionals
-
- #define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
- #define BYPG BYTES_PER_GROUP
- #define BITS_PER_GROUP GROUP_PARALLELISM
- #define BIPG BITS_PER_GROUP
-
- #ifndef MALLOC
- #define MALLOC(X) malloc(X)
- #endif
- #ifndef FREE
- #define FREE(X) free(X)
- #endif
- #ifndef MEMALIGN
- #define MEMALIGN
- #endif
-
- //// debug tool
-
- #ifdef DEBUG
- static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
- int i;
- for(i=0;i<len;i++){
- if(i%linelen==0&&i) fprintf(stderr,"\n");
- if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
- else{
- if(i%8==0) fprintf(stderr," ");
- if(i%4==0) fprintf(stderr," ");
- }
- fprintf(stderr," %02x",p[i]);
- }
- if(i%linelen==0) fprintf(stderr,"\n");
- }
- #endif
-
- //////////////////////////////////////////////////////////////////////////////////
-
- struct csa_key_t{
- unsigned char ck[8];
- // used by stream
- int iA[8]; // iA[0] is for A1, iA[7] is for A8
- int iB[8]; // iB[0] is for B1, iB[7] is for B8
- // used by stream (group)
- MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
- MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
- MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
- // used by block
- unsigned char kk[56];
- // used by block (group)
- MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
- };
-
- struct csa_keys_t{
- struct csa_key_t even;
- struct csa_key_t odd;
- };
-
- //-----stream cypher
-
- //-----key schedule for stream decypher
- static void key_schedule_stream(
- unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
- int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
- int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
- {
- iA[0]=(ck[0]>>4)&0xf;
- iA[1]=(ck[0] )&0xf;
- iA[2]=(ck[1]>>4)&0xf;
- iA[3]=(ck[1] )&0xf;
- iA[4]=(ck[2]>>4)&0xf;
- iA[5]=(ck[2] )&0xf;
- iA[6]=(ck[3]>>4)&0xf;
- iA[7]=(ck[3] )&0xf;
- iB[0]=(ck[4]>>4)&0xf;
- iB[1]=(ck[4] )&0xf;
- iB[2]=(ck[5]>>4)&0xf;
- iB[3]=(ck[5] )&0xf;
- iB[4]=(ck[6]>>4)&0xf;
- iB[5]=(ck[6] )&0xf;
- iB[6]=(ck[7]>>4)&0xf;
- iB[7]=(ck[7] )&0xf;
- }
-
- //----- stream main function
-
- #define STREAM_INIT
- #include "stream.c"
- #undef STREAM_INIT
-
- #define STREAM_NORMAL
- #include "stream.c"
- #undef STREAM_NORMAL
-
-
- //-----block decypher
-
- //-----key schedule for block decypher
-
- static void key_schedule_block(
- unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
- unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
- {
- static const unsigned char key_perm[0x40] = {
- 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
- 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
- 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
- 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
- };
-
- int i,j,k;
- int bit[64];
- int newbit[64];
- int kb[7][8];
-
- // 56 steps
- // 56 key bytes kk(55)..kk(0) by key schedule from ck
-
- // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
- kb[6][0] = ck[0];
- kb[6][1] = ck[1];
- kb[6][2] = ck[2];
- kb[6][3] = ck[3];
- kb[6][4] = ck[4];
- kb[6][5] = ck[5];
- kb[6][6] = ck[6];
- kb[6][7] = ck[7];
-
- // calculate kb[5] .. kb[0]
- for(i=5; i>=0; i--){
- // 64 bit perm on kb
- for(j=0; j<8; j++){
- for(k=0; k<8; k++){
- bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
- newbit[key_perm[j*8+k]-1] = bit[j*8+k];
- }
- }
- for(j=0; j<8; j++){
- kb[i][j] = 0;
- for(k=0; k<8; k++){
- kb[i][j] |= newbit[j*8+k] << (7-k);
- }
- }
- }
-
- // xor to give kk
- for(i=0; i<7; i++){
- for(j=0; j<8; j++){
- kk[i*8+j] = kb[i][j] ^ i;
- }
- }
-
- }
-
- //-----block utils
-
- static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
- int *ri=(int *)in;
- int *ibi=(int *)out;
- int j,i,k,g;
- // copy and first step
- for(g=0;g<count;g++){
- ri[g]=ibi[2*g];
- ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
- }
- //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 01230123
- #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
- for(j=0;j<8;j+=4){
- for(i=0;i<2;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+2)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
- ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
- }
- }
- }
- //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 01010101
- for(j=0;j<8;j+=2){
- for(i=0;i<1;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+1)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- }
- //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 00000000
- }
-
- static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
- int *ri=(int *)in;
- int *bdi=(int *)out;
- int j,i,k,g;
- #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
- //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 00000000
- for(j=0;j<8;j+=2){
- for(i=0;i<1;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+1)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
- ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
- }
- }
- }
- //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 01010101
- for(j=0;j<8;j+=4){
- for(i=0;i<2;i++){
- for(k=0;k<INTS_PER_ROW;k++){
- unsigned int t,b;
- t=ri[INTS_PER_ROW*(j+i)+k];
- b=ri[INTS_PER_ROW*(j+i+2)+k];
- ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
- ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
- }
- }
- }
- //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
- // now 01230123
- for(g=0;g<count;g++){
- bdi[2*g]=ri[g];
- bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
- }
- }
-
- //-----block main function
-
- // block group
- static void block_decypher_group(
- batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
- unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
- unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
- int count)
- {
- // int is faster than unsigned char. apparently not
- static const unsigned char block_sbox[0x100] = {
- 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
- 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
- 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
- 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
- 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
- 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
- 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
- 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
-
- 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
- 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
- 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
- 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
- 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
- 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
- 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
- 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
- };
- MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
- MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
- int roff;
- int i,g,count_all=GROUP_PARALLELISM;
-
- roff=GROUP_PARALLELISM*56;
-
- #define FASTTRASP1
- #ifndef FASTTRASP1
- for(g=0;g<count;g++){
- // Init registers
- int j;
- for(j=0;j<8;j++){
- r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
- }
- }
- #else
- trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
- #endif
- //dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
-
- // loop over kk[55]..kk[0]
- for(i=55;i>=0;i--){
- {
- MEMALIGN batch tkkmulti=kkmulti[i];
- batch *si=(batch *)sbox_in;
- batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
- for(g=0;g<count_all/BYTES_PER_BATCH;g++){
- si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
- }
- }
-
- // table lookup, this works on only one byte at a time
- // most difficult part of all
- // - can't be parallelized
- // - can't be synthetized through boolean terms (8 input bits are too many)
- for(g=0;g<count_all;g++){
- sbox_out[g]=block_sbox[sbox_in[g]];
- }
-
- // bit permutation
- {
- unsigned char *po=(unsigned char *)perm_out;
- unsigned char *so=(unsigned char *)sbox_out;
- //dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
- for(g=0;g<count_all;g+=BYTES_PER_BATCH){
- MEMALIGN batch in,out;
- in=*(batch *)&so[g];
-
- out=B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFOR(
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
- B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
- B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
-
- *(batch *)&po[g]=out;
- }
- //dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
- }
-
- roff-=GROUP_PARALLELISM; /* virtual shift of registers */
-
- #if 0
- /* one by one */
- for(g=0;g<count_all;g++){
- r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
- r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
- r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
- r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
- r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
- }
- #else
- for(g=0;g<count_all;g+=BEST_SPAN){
- XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
- XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
- }
- #endif
- }
-
- #define FASTTRASP2
- #ifndef FASTTRASP2
- for(g=0;g<count;g++){
- // Copy results
- int j;
- for(j=0;j<8;j++){
- bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
- }
- }
- #else
- trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
- #endif
- }
-
- //-----------------------------------EXTERNAL INTERFACE
-
- //-----get internal parallelism
-
- int get_internal_parallelism(void){
- return GROUP_PARALLELISM;
- }
-
- //-----get suggested cluster size
-
- int get_suggested_cluster_size(void){
- int r;
- r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
- if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
- return r;
- }
-
- //-----key structure
-
- void *get_key_struct(void){
- struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
- if(keys) {
- static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
- set_control_words(keys,pk,pk);
- }
- return keys;
- }
-
- void free_key_struct(void *keys){
- return FREE(keys);
- }
-
- //-----set control words
-
- static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
- // could be made faster, but is not run often
- int bi,by;
- int i,j;
- // key
- memcpy(key->ck,pk,8);
- // precalculations for stream
- key_schedule_stream(key->ck,key->iA,key->iB);
- for(by=0;by<8;by++){
- for(bi=0;bi<8;bi++){
- key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
- }
- }
- for(by=0;by<8;by++){
- for(bi=0;bi<4;bi++){
- key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
- key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
- }
- }
- // precalculations for block
- key_schedule_block(key->ck,key->kk);
- for(i=0;i<56;i++){
- for(j=0;j<BYTES_PER_BATCH;j++){
- *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
- }
- }
- }
-
- void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){
- schedule_key(&((struct csa_keys_t *)keys)->even,ev);
- schedule_key(&((struct csa_keys_t *)keys)->odd,od);
- }
-
- void set_even_control_word(void *keys, const unsigned char *pk){
- schedule_key(&((struct csa_keys_t *)keys)->even,pk);
- }
-
- void set_odd_control_word(void *keys, const unsigned char *pk){
- schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
- }
-
- //-----get control words
-
- void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
- memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
- memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
- }
-
- //----- decrypt
-
- int decrypt_packets(void *keys, unsigned char **cluster){
- // statistics, currently unused
- int stat_no_scramble=0;
- int stat_reserved=0;
- int stat_decrypted[2]={0,0};
- int stat_decrypted_mini=0;
- unsigned char **clst;
- unsigned char **clst2;
- int grouped;
- int group_ev_od;
- int advanced;
- int can_advance;
- unsigned char *g_pkt[GROUP_PARALLELISM];
- int g_len[GROUP_PARALLELISM];
- int g_offset[GROUP_PARALLELISM];
- int g_n[GROUP_PARALLELISM];
- int g_residue[GROUP_PARALLELISM];
- unsigned char *pkt;
- int xc0,ev_od,len,offset,n,residue;
- struct csa_key_t* k;
- int i,j,iter,g;
- int t23,tsmall;
- int alive[24];
- //icc craziness int pad1=0; //////////align! FIXME
- unsigned char *encp[GROUP_PARALLELISM];
- MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
- MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
- struct stream_regs regs;
-
- //icc craziness i=(int)&pad1;//////////align!!! FIXME
-
- // build a list of packets to be processed
- clst=cluster;
- grouped=0;
- advanced=0;
- can_advance=1;
- group_ev_od=-1; // silence incorrect compiler warning
- pkt=*clst;
- do{ // find a new packet
- if(grouped==GROUP_PARALLELISM){
- // full
- break;
- }
- if(pkt==NULL){
- // no more ranges
- break;
- }
- if(pkt>=*(clst+1)){
- // out of this range, try next
- clst++;clst++;
- pkt=*clst;
- continue;
- }
-
- do{ // handle this packet
- xc0=pkt[3]&0xc0;
- DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
- if(xc0==0x00){
- DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
- advanced+=can_advance;
- stat_no_scramble++;
- break;
- }
- if(xc0==0x40){
- DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
- advanced+=can_advance;
- stat_reserved++;
- break;
- }
- if(xc0==0x80||xc0==0xc0){ // encrypted
- ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
- if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
- if(group_ev_od==ev_od){ // could be added to group
- pkt[3]&=0x3f; // consider it decrypted now
- if(pkt[3]&0x20){ // incomplete packet
- offset=4+pkt[4]+1;
- len=188-offset;
- n=len>>3;
- residue=len-(n<<3);
- if(n==0){ // decrypted==encrypted!
- DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
- advanced+=can_advance;
- stat_decrypted_mini++;
- break; // this doesn't need more processing
- }
- }else{
- len=184;
- offset=4;
- n=23;
- residue=0;
- }
- g_pkt[grouped]=pkt;
- g_len[grouped]=len;
- g_offset[grouped]=offset;
- g_n[grouped]=n;
- g_residue[grouped]=residue;
- DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
- grouped++;
- advanced+=can_advance;
- stat_decrypted[ev_od]++;
- }
- else{
- can_advance=0;
- DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
- break; // skip and go on
- }
- }
- } while(0);
-
- if(can_advance){
- // move range start forward
- *clst+=188;
- }
- // next packet, if there is one
- pkt+=188;
- } while(1);
- DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
-
- // delete empty ranges and compact list
- clst2=cluster;
- for(clst=cluster;*clst!=NULL;clst+=2){
- // if not empty
- if(*clst<*(clst+1)){
- // it will remain
- *clst2=*clst;
- *(clst2+1)=*(clst+1);
- clst2+=2;
- }
- }
- *clst2=NULL;
-
- if(grouped==0){
- // no processing needed
- return advanced;
- }
-
- // sort them, longest payload first
- // we expect many n=23 packets and a few n<23
- DBG(fprintf(stderr,"PRESORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
- // grouped is always <= GROUP_PARALLELISM
-
- #define g_swap(a,b) \
- pkt=g_pkt[a]; \
- g_pkt[a]=g_pkt[b]; \
- g_pkt[b]=pkt; \
- \
- len=g_len[a]; \
- g_len[a]=g_len[b]; \
- g_len[b]=len; \
- \
- offset=g_offset[a]; \
- g_offset[a]=g_offset[b]; \
- g_offset[b]=offset; \
- \
- n=g_n[a]; \
- g_n[a]=g_n[b]; \
- g_n[b]=n; \
- \
- residue=g_residue[a]; \
- g_residue[a]=g_residue[b]; \
- g_residue[b]=residue;
-
- // step 1: move n=23 packets before small packets
- t23=0;
- tsmall=grouped-1;
- for(;;){
- for(;t23<grouped;t23++){
- if(g_n[t23]!=23) break;
- }
- DBG(fprintf(stderr,"t23 after for =%i\n",t23));
-
- for(;tsmall>=0;tsmall--){
- if(g_n[tsmall]==23) break;
- }
- DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
-
- if(tsmall-t23<1) break;
-
- DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
-
- g_swap(t23,tsmall);
-
- t23++;
- tsmall--;
- DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
- }
- DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
- DBG(fprintf(stderr,"MIDSORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
-
- // step 2: sort small packets in decreasing order of n (bubble sort is enough)
- for(i=t23;i<grouped;i++){
- for(j=i+1;j<grouped;j++){
- if(g_n[j]>g_n[i]){
- g_swap(i,j);
- }
- }
- }
- DBG(fprintf(stderr,"POSTSORTING\n"));
- for(i=0;i<grouped;i++){
- DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
- }
-
- // we need to know how many packets need 23 iterations, how many 22...
- for(i=0;i<=23;i++){
- alive[i]=0;
- }
- // count
- alive[23-1]=t23;
- for(i=t23;i<grouped;i++){
- alive[g_n[i]-1]++;
- }
- // integrate
- for(i=22;i>=0;i--){
- alive[i]+=alive[i+1];
- }
- DBG(fprintf(stderr,"ALIVE\n"));
- for(i=0;i<=23;i++){
- DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
- }
-
- // choose key
- if(group_ev_od==0){
- k=&((struct csa_keys_t *)keys)->even;
- }
- else{
- k=&((struct csa_keys_t *)keys)->odd;
- }
-
- //INIT
- //#define INITIALIZE_UNUSED_INPUT
- #ifdef INITIALIZE_UNUSED_INPUT
- // unnecessary zeroing.
- // without this, we operate on uninitialized memory
- // when grouped<GROUP_PARALLELISM, but it's not a problem,
- // as final results will be discarded.
- // random data makes debugging sessions difficult.
- for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
- DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
- #else
- DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
- #endif
-
- for(g=0;g<grouped;g++){
- encp[g]=g_pkt[g];
- DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
- encp[g]+=g_offset[g]; // skip header
- FFTABLEIN(stream_in,g,encp[g]);
- }
- //dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
-
-
- // ITER 0
- DBG(fprintf(stderr,">>>>>ITER 0\n"));
- iter=0;
- stream_cypher_group_init(®s,k->iA_g,k->iB_g,stream_in);
- // fill first ib
- for(g=0;g<alive[iter];g++){
- COPY_8_BY(ib+8*g,encp[g]);
- }
- DBG(dump_mem("IB ",ib,8*alive[iter],8));
- // ITER 1..N-1
- for (iter=1;iter<23&&alive[iter-1]>0;iter++){
- DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
- // alive and just dead packets: calc block
- block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
- DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
- // all packets (dead too): calc stream
- stream_cypher_group_normal(®s,stream_out);
- //dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
-
- // alive packets: calc ib
- for(g=0;g<alive[iter];g++){
- FFTABLEOUT(ib+8*g,stream_out,g);
- DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
- // XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
- #if 1
- XOREQ_4_BY(ib+8*g,encp[g]+8);
- XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
- #else
- XOREQ_8_BY(ib+8*g,encp[g]+8);
- #endif
- DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
- }
- // alive packets: decrypt data
- for(g=0;g<alive[iter];g++){
- DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
- XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
- DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
- }
- // just dead packets: write decrypted data
- for(g=alive[iter];g<alive[iter-1];g++){
- DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
- COPY_8_BY(encp[g],block_out+8*g);
- DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
- }
- // just dead packets: decrypt residue
- for(g=alive[iter];g<alive[iter-1];g++){
- DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
- FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
- DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
- }
- // alive packets: pointers++
- for(g=0;g<alive[iter];g++) encp[g]+=8;
- };
- // ITER N
- DBG(fprintf(stderr,">>>>>ITER 23\n"));
- iter=23;
- // calc block
- block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
- DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
- // just dead packets: write decrypted data
- for(g=alive[iter];g<alive[iter-1];g++){
- DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
- COPY_8_BY(encp[g],block_out+8*g);
- DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
- }
- // no residue possible
- // so do nothing
-
- DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
-
- M_EMPTY(); // restore CPU multimedia state
-
- return advanced;
- }
|