tsdecrypt reads and decrypts CSA encrypted incoming mpeg transport stream over UDP/RTP using code words obtained from OSCAM or similar CAM server. tsdecrypt communicates with CAM server using cs378x (camd35 over tcp) protocol or newcamd protocol. https://georgi.unixsol.org/programs/tsdecrypt/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FFdecsa.c 26KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880
  1. /* FFdecsa -- fast decsa algorithm
  2. *
  3. * Copyright (C) 2003-2004 fatih89r
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. #include <sys/types.h>
  20. #include <string.h>
  21. #include <stdio.h>
  22. #include <stdlib.h>
  23. #include "FFdecsa.h"
  24. #ifndef NULL
  25. #define NULL 0
  26. #endif
  27. //#define DEBUG
  28. #ifdef DEBUG
  29. #define DBG(a) a
  30. #else
  31. #define DBG(a)
  32. #endif
  33. //// parallelization stuff, large speed differences are possible
  34. // possible choices
  35. #define PARALLEL_32_4CHAR 320
  36. #define PARALLEL_32_4CHARA 321
  37. #define PARALLEL_32_INT 322
  38. #define PARALLEL_64_8CHAR 640
  39. #define PARALLEL_64_8CHARA 641
  40. #define PARALLEL_64_2INT 642
  41. #define PARALLEL_64_LONG 643
  42. #define PARALLEL_64_MMX 644
  43. #define PARALLEL_128_16CHAR 1280
  44. #define PARALLEL_128_16CHARA 1281
  45. #define PARALLEL_128_4INT 1282
  46. #define PARALLEL_128_2LONG 1283
  47. #define PARALLEL_128_2MMX 1284
  48. #define PARALLEL_128_SSE 1285
  49. #define PARALLEL_128_SSE2 1286
  50. //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
  51. #ifndef PARALLEL_MODE
  52. #define PARALLEL_MODE PARALLEL_32_INT
  53. #endif
  54. //////// our choice //////////////// our choice //////////////// our choice //////////////// our choice ////////
  55. #include "parallel_generic.h"
  56. //// conditionals
  57. #if PARALLEL_MODE==PARALLEL_32_4CHAR
  58. #include "parallel_032_4char.h"
  59. #elif PARALLEL_MODE==PARALLEL_32_4CHARA
  60. #include "parallel_032_4charA.h"
  61. #elif PARALLEL_MODE==PARALLEL_32_INT
  62. #include "parallel_032_int.h"
  63. #elif PARALLEL_MODE==PARALLEL_64_8CHAR
  64. #include "parallel_064_8char.h"
  65. #elif PARALLEL_MODE==PARALLEL_64_8CHARA
  66. #include "parallel_064_8charA.h"
  67. #elif PARALLEL_MODE==PARALLEL_64_2INT
  68. #include "parallel_064_2int.h"
  69. #elif PARALLEL_MODE==PARALLEL_64_LONG
  70. #include "parallel_064_long.h"
  71. #elif PARALLEL_MODE==PARALLEL_64_MMX
  72. #include "parallel_064_mmx.h"
  73. #elif PARALLEL_MODE==PARALLEL_128_16CHAR
  74. #include "parallel_128_16char.h"
  75. #elif PARALLEL_MODE==PARALLEL_128_16CHARA
  76. #include "parallel_128_16charA.h"
  77. #elif PARALLEL_MODE==PARALLEL_128_4INT
  78. #include "parallel_128_4int.h"
  79. #elif PARALLEL_MODE==PARALLEL_128_2LONG
  80. #include "parallel_128_2long.h"
  81. #elif PARALLEL_MODE==PARALLEL_128_2MMX
  82. #include "parallel_128_2mmx.h"
  83. #elif PARALLEL_MODE==PARALLEL_128_SSE
  84. #include "parallel_128_sse.h"
  85. #elif PARALLEL_MODE==PARALLEL_128_SSE2
  86. #include "parallel_128_sse2.h"
  87. #else
  88. #error "unknown/undefined parallel mode"
  89. #endif
  90. // stuff depending on conditionals
  91. #define BYTES_PER_GROUP (GROUP_PARALLELISM/8)
  92. #define BYPG BYTES_PER_GROUP
  93. #define BITS_PER_GROUP GROUP_PARALLELISM
  94. #define BIPG BITS_PER_GROUP
  95. #ifndef MALLOC
  96. #define MALLOC(X) malloc(X)
  97. #endif
  98. #ifndef FREE
  99. #define FREE(X) free(X)
  100. #endif
  101. #ifndef MEMALIGN
  102. #define MEMALIGN
  103. #endif
  104. //// debug tool
  105. #ifdef DEBUG
  106. static void dump_mem(const char *string, const unsigned char *p, int len, int linelen){
  107. int i;
  108. for(i=0;i<len;i++){
  109. if(i%linelen==0&&i) fprintf(stderr,"\n");
  110. if(i%linelen==0) fprintf(stderr,"%s %08x:",string,i);
  111. else{
  112. if(i%8==0) fprintf(stderr," ");
  113. if(i%4==0) fprintf(stderr," ");
  114. }
  115. fprintf(stderr," %02x",p[i]);
  116. }
  117. if(i%linelen==0) fprintf(stderr,"\n");
  118. }
  119. #endif
  120. //////////////////////////////////////////////////////////////////////////////////
  121. struct csa_key_t{
  122. unsigned char ck[8];
  123. // used by stream
  124. int iA[8]; // iA[0] is for A1, iA[7] is for A8
  125. int iB[8]; // iB[0] is for B1, iB[7] is for B8
  126. // used by stream (group)
  127. MEMALIGN group ck_g[8][8]; // [byte][bit:0=LSB,7=MSB]
  128. MEMALIGN group iA_g[8][4]; // [0 for A1][0 for LSB]
  129. MEMALIGN group iB_g[8][4]; // [0 for B1][0 for LSB]
  130. // used by block
  131. unsigned char kk[56];
  132. // used by block (group)
  133. MEMALIGN batch kkmulti[56]; // many times the same byte in every batch
  134. };
  135. struct csa_keys_t{
  136. struct csa_key_t even;
  137. struct csa_key_t odd;
  138. };
  139. //-----stream cypher
  140. //-----key schedule for stream decypher
  141. static void key_schedule_stream(
  142. unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
  143. int *iA, // [Out] iA[0]-iA[7] 8 nibbles | Key schedule.
  144. int *iB) // [Out] iB[0]-iB[7] 8 nibbles | Key schedule.
  145. {
  146. iA[0]=(ck[0]>>4)&0xf;
  147. iA[1]=(ck[0] )&0xf;
  148. iA[2]=(ck[1]>>4)&0xf;
  149. iA[3]=(ck[1] )&0xf;
  150. iA[4]=(ck[2]>>4)&0xf;
  151. iA[5]=(ck[2] )&0xf;
  152. iA[6]=(ck[3]>>4)&0xf;
  153. iA[7]=(ck[3] )&0xf;
  154. iB[0]=(ck[4]>>4)&0xf;
  155. iB[1]=(ck[4] )&0xf;
  156. iB[2]=(ck[5]>>4)&0xf;
  157. iB[3]=(ck[5] )&0xf;
  158. iB[4]=(ck[6]>>4)&0xf;
  159. iB[5]=(ck[6] )&0xf;
  160. iB[6]=(ck[7]>>4)&0xf;
  161. iB[7]=(ck[7] )&0xf;
  162. }
  163. //----- stream main function
  164. #define STREAM_INIT
  165. #include "stream.c"
  166. #undef STREAM_INIT
  167. #define STREAM_NORMAL
  168. #include "stream.c"
  169. #undef STREAM_NORMAL
  170. //-----block decypher
  171. //-----key schedule for block decypher
  172. static void key_schedule_block(
  173. unsigned char *ck, // [In] ck[0]-ck[7] 8 bytes | Key.
  174. unsigned char *kk) // [Out] kk[0]-kk[55] 56 bytes | Key schedule.
  175. {
  176. static const unsigned char key_perm[0x40] = {
  177. 0x12,0x24,0x09,0x07,0x2A,0x31,0x1D,0x15, 0x1C,0x36,0x3E,0x32,0x13,0x21,0x3B,0x40,
  178. 0x18,0x14,0x25,0x27,0x02,0x35,0x1B,0x01, 0x22,0x04,0x0D,0x0E,0x39,0x28,0x1A,0x29,
  179. 0x33,0x23,0x34,0x0C,0x16,0x30,0x1E,0x3A, 0x2D,0x1F,0x08,0x19,0x17,0x2F,0x3D,0x11,
  180. 0x3C,0x05,0x38,0x2B,0x0B,0x06,0x0A,0x2C, 0x20,0x3F,0x2E,0x0F,0x03,0x26,0x10,0x37,
  181. };
  182. int i,j,k;
  183. int bit[64];
  184. int newbit[64];
  185. int kb[7][8];
  186. // 56 steps
  187. // 56 key bytes kk(55)..kk(0) by key schedule from ck
  188. // kb(6,0) .. kb(6,7) = ck(0) .. ck(7)
  189. kb[6][0] = ck[0];
  190. kb[6][1] = ck[1];
  191. kb[6][2] = ck[2];
  192. kb[6][3] = ck[3];
  193. kb[6][4] = ck[4];
  194. kb[6][5] = ck[5];
  195. kb[6][6] = ck[6];
  196. kb[6][7] = ck[7];
  197. // calculate kb[5] .. kb[0]
  198. for(i=5; i>=0; i--){
  199. // 64 bit perm on kb
  200. for(j=0; j<8; j++){
  201. for(k=0; k<8; k++){
  202. bit[j*8+k] = (kb[i+1][j] >> (7-k)) & 1;
  203. newbit[key_perm[j*8+k]-1] = bit[j*8+k];
  204. }
  205. }
  206. for(j=0; j<8; j++){
  207. kb[i][j] = 0;
  208. for(k=0; k<8; k++){
  209. kb[i][j] |= newbit[j*8+k] << (7-k);
  210. }
  211. }
  212. }
  213. // xor to give kk
  214. for(i=0; i<7; i++){
  215. for(j=0; j<8; j++){
  216. kk[i*8+j] = kb[i][j] ^ i;
  217. }
  218. }
  219. }
  220. //-----block utils
  221. static inline __attribute__((always_inline)) void trasp_N_8 (unsigned char *in,unsigned char* out,int count){
  222. int *ri=(int *)in;
  223. int *ibi=(int *)out;
  224. int j,i,k,g;
  225. // copy and first step
  226. for(g=0;g<count;g++){
  227. ri[g]=ibi[2*g];
  228. ri[GROUP_PARALLELISM+g]=ibi[2*g+1];
  229. }
  230. //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  231. // now 01230123
  232. #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
  233. for(j=0;j<8;j+=4){
  234. for(i=0;i<2;i++){
  235. for(k=0;k<INTS_PER_ROW;k++){
  236. unsigned int t,b;
  237. t=ri[INTS_PER_ROW*(j+i)+k];
  238. b=ri[INTS_PER_ROW*(j+i+2)+k];
  239. ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
  240. ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
  241. }
  242. }
  243. }
  244. //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  245. // now 01010101
  246. for(j=0;j<8;j+=2){
  247. for(i=0;i<1;i++){
  248. for(k=0;k<INTS_PER_ROW;k++){
  249. unsigned int t,b;
  250. t=ri[INTS_PER_ROW*(j+i)+k];
  251. b=ri[INTS_PER_ROW*(j+i+1)+k];
  252. ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
  253. ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
  254. }
  255. }
  256. }
  257. //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  258. // now 00000000
  259. }
  260. static inline __attribute__((always_inline)) void trasp_8_N (unsigned char *in,unsigned char* out,int count){
  261. int *ri=(int *)in;
  262. int *bdi=(int *)out;
  263. int j,i,k,g;
  264. #define INTS_PER_ROW (GROUP_PARALLELISM/8*2)
  265. //dump_mem("NE1 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  266. // now 00000000
  267. for(j=0;j<8;j+=2){
  268. for(i=0;i<1;i++){
  269. for(k=0;k<INTS_PER_ROW;k++){
  270. unsigned int t,b;
  271. t=ri[INTS_PER_ROW*(j+i)+k];
  272. b=ri[INTS_PER_ROW*(j+i+1)+k];
  273. ri[INTS_PER_ROW*(j+i)+k]= (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
  274. ri[INTS_PER_ROW*(j+i+1)+k]= ((t&0xff00ff00)>>8) | (b&0xff00ff00);
  275. }
  276. }
  277. }
  278. //dump_mem("NE2 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  279. // now 01010101
  280. for(j=0;j<8;j+=4){
  281. for(i=0;i<2;i++){
  282. for(k=0;k<INTS_PER_ROW;k++){
  283. unsigned int t,b;
  284. t=ri[INTS_PER_ROW*(j+i)+k];
  285. b=ri[INTS_PER_ROW*(j+i+2)+k];
  286. ri[INTS_PER_ROW*(j+i)+k]= (t&0x0000ffff) | ((b )<<16);
  287. ri[INTS_PER_ROW*(j+i+2)+k]= ((t )>>16) | (b&0xffff0000) ;
  288. }
  289. }
  290. }
  291. //dump_mem("NE3 r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  292. // now 01230123
  293. for(g=0;g<count;g++){
  294. bdi[2*g]=ri[g];
  295. bdi[2*g+1]=ri[GROUP_PARALLELISM+g];
  296. }
  297. }
  298. //-----block main function
  299. // block group
  300. static void block_decypher_group(
  301. batch *kkmulti, // [In] kkmulti[0]-kkmulti[55] 56 batches | Key schedule (each batch has repeated equal bytes).
  302. unsigned char *ib, // [In] (ib0,ib1,...ib7)...x32 32*8 bytes | Initialization vector.
  303. unsigned char *bd, // [Out] (bd0,bd1,...bd7)...x32 32*8 bytes | Block decipher.
  304. int count)
  305. {
  306. // int is faster than unsigned char. apparently not
  307. static const unsigned char block_sbox[0x100] = {
  308. 0x3A,0xEA,0x68,0xFE,0x33,0xE9,0x88,0x1A, 0x83,0xCF,0xE1,0x7F,0xBA,0xE2,0x38,0x12,
  309. 0xE8,0x27,0x61,0x95,0x0C,0x36,0xE5,0x70, 0xA2,0x06,0x82,0x7C,0x17,0xA3,0x26,0x49,
  310. 0xBE,0x7A,0x6D,0x47,0xC1,0x51,0x8F,0xF3, 0xCC,0x5B,0x67,0xBD,0xCD,0x18,0x08,0xC9,
  311. 0xFF,0x69,0xEF,0x03,0x4E,0x48,0x4A,0x84, 0x3F,0xB4,0x10,0x04,0xDC,0xF5,0x5C,0xC6,
  312. 0x16,0xAB,0xAC,0x4C,0xF1,0x6A,0x2F,0x3C, 0x3B,0xD4,0xD5,0x94,0xD0,0xC4,0x63,0x62,
  313. 0x71,0xA1,0xF9,0x4F,0x2E,0xAA,0xC5,0x56, 0xE3,0x39,0x93,0xCE,0x65,0x64,0xE4,0x58,
  314. 0x6C,0x19,0x42,0x79,0xDD,0xEE,0x96,0xF6, 0x8A,0xEC,0x1E,0x85,0x53,0x45,0xDE,0xBB,
  315. 0x7E,0x0A,0x9A,0x13,0x2A,0x9D,0xC2,0x5E, 0x5A,0x1F,0x32,0x35,0x9C,0xA8,0x73,0x30,
  316. 0x29,0x3D,0xE7,0x92,0x87,0x1B,0x2B,0x4B, 0xA5,0x57,0x97,0x40,0x15,0xE6,0xBC,0x0E,
  317. 0xEB,0xC3,0x34,0x2D,0xB8,0x44,0x25,0xA4, 0x1C,0xC7,0x23,0xED,0x90,0x6E,0x50,0x00,
  318. 0x99,0x9E,0x4D,0xD9,0xDA,0x8D,0x6F,0x5F, 0x3E,0xD7,0x21,0x74,0x86,0xDF,0x6B,0x05,
  319. 0x8E,0x5D,0x37,0x11,0xD2,0x28,0x75,0xD6, 0xA7,0x77,0x24,0xBF,0xF0,0xB0,0x02,0xB7,
  320. 0xF8,0xFC,0x81,0x09,0xB1,0x01,0x76,0x91, 0x7D,0x0F,0xC8,0xA0,0xF2,0xCB,0x78,0x60,
  321. 0xD1,0xF7,0xE0,0xB5,0x98,0x22,0xB3,0x20, 0x1D,0xA6,0xDB,0x7B,0x59,0x9F,0xAE,0x31,
  322. 0xFB,0xD3,0xB6,0xCA,0x43,0x72,0x07,0xF4, 0xD8,0x41,0x14,0x55,0x0D,0x54,0x8B,0xB9,
  323. 0xAD,0x46,0x0B,0xAF,0x80,0x52,0x2C,0xFA, 0x8C,0x89,0x66,0xFD,0xB2,0xA9,0x9B,0xC0,
  324. };
  325. MEMALIGN unsigned char r[GROUP_PARALLELISM*(8+56)]; /* 56 because we will move back in memory while looping */
  326. MEMALIGN unsigned char sbox_in[GROUP_PARALLELISM],sbox_out[GROUP_PARALLELISM],perm_out[GROUP_PARALLELISM];
  327. int roff;
  328. int i,g,count_all=GROUP_PARALLELISM;
  329. roff=GROUP_PARALLELISM*56;
  330. #define FASTTRASP1
  331. #ifndef FASTTRASP1
  332. for(g=0;g<count;g++){
  333. // Init registers
  334. int j;
  335. for(j=0;j<8;j++){
  336. r[roff+GROUP_PARALLELISM*j+g]=ib[8*g+j];
  337. }
  338. }
  339. #else
  340. trasp_N_8((unsigned char *)&r[roff],(unsigned char *)ib,count);
  341. #endif
  342. //dump_mem("OLD r[roff]",&r[roff],GROUP_PARALLELISM*8,GROUP_PARALLELISM);
  343. // loop over kk[55]..kk[0]
  344. for(i=55;i>=0;i--){
  345. {
  346. MEMALIGN batch tkkmulti=kkmulti[i];
  347. batch *si=(batch *)sbox_in;
  348. batch *r6_N=(batch *)(r+roff+GROUP_PARALLELISM*6);
  349. for(g=0;g<count_all/BYTES_PER_BATCH;g++){
  350. si[g]=B_FFXOR(tkkmulti,r6_N[g]); //FIXME: introduce FASTBATCH?
  351. }
  352. }
  353. // table lookup, this works on only one byte at a time
  354. // most difficult part of all
  355. // - can't be parallelized
  356. // - can't be synthetized through boolean terms (8 input bits are too many)
  357. for(g=0;g<count_all;g++){
  358. sbox_out[g]=block_sbox[sbox_in[g]];
  359. }
  360. // bit permutation
  361. {
  362. unsigned char *po=(unsigned char *)perm_out;
  363. unsigned char *so=(unsigned char *)sbox_out;
  364. //dump_mem("pre perm ",(unsigned char *)so,GROUP_PARALLELISM,GROUP_PARALLELISM);
  365. for(g=0;g<count_all;g+=BYTES_PER_BATCH){
  366. MEMALIGN batch in,out;
  367. in=*(batch *)&so[g];
  368. out=B_FFOR(
  369. B_FFOR(
  370. B_FFOR(
  371. B_FFOR(
  372. B_FFOR(
  373. B_FFSH8L(B_FFAND(in,B_FFN_ALL_29()),1),
  374. B_FFSH8L(B_FFAND(in,B_FFN_ALL_02()),6)),
  375. B_FFSH8L(B_FFAND(in,B_FFN_ALL_04()),3)),
  376. B_FFSH8R(B_FFAND(in,B_FFN_ALL_10()),2)),
  377. B_FFSH8R(B_FFAND(in,B_FFN_ALL_40()),6)),
  378. B_FFSH8R(B_FFAND(in,B_FFN_ALL_80()),4));
  379. *(batch *)&po[g]=out;
  380. }
  381. //dump_mem("post perm",(unsigned char *)po,GROUP_PARALLELISM,GROUP_PARALLELISM);
  382. }
  383. roff-=GROUP_PARALLELISM; /* virtual shift of registers */
  384. #if 0
  385. /* one by one */
  386. for(g=0;g<count_all;g++){
  387. r[roff+GROUP_PARALLELISM*0+g]=r[roff+GROUP_PARALLELISM*8+g]^sbox_out[g];
  388. r[roff+GROUP_PARALLELISM*6+g]^=perm_out[g];
  389. r[roff+GROUP_PARALLELISM*4+g]^=r[roff+GROUP_PARALLELISM*0+g];
  390. r[roff+GROUP_PARALLELISM*3+g]^=r[roff+GROUP_PARALLELISM*0+g];
  391. r[roff+GROUP_PARALLELISM*2+g]^=r[roff+GROUP_PARALLELISM*0+g];
  392. }
  393. #else
  394. for(g=0;g<count_all;g+=BEST_SPAN){
  395. XOR_BEST_BY(&r[roff+GROUP_PARALLELISM*0+g],&r[roff+GROUP_PARALLELISM*8+g],&sbox_out[g]);
  396. XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*6+g],&perm_out[g]);
  397. XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*4+g],&r[roff+GROUP_PARALLELISM*0+g]);
  398. XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*3+g],&r[roff+GROUP_PARALLELISM*0+g]);
  399. XOREQ_BEST_BY(&r[roff+GROUP_PARALLELISM*2+g],&r[roff+GROUP_PARALLELISM*0+g]);
  400. }
  401. #endif
  402. }
  403. #define FASTTRASP2
  404. #ifndef FASTTRASP2
  405. for(g=0;g<count;g++){
  406. // Copy results
  407. int j;
  408. for(j=0;j<8;j++){
  409. bd[8*g+j]=r[roff+GROUP_PARALLELISM*j+g];
  410. }
  411. }
  412. #else
  413. trasp_8_N((unsigned char *)&r[roff],(unsigned char *)bd,count);
  414. #endif
  415. }
  416. //-----------------------------------EXTERNAL INTERFACE
  417. //-----get internal parallelism
  418. int get_internal_parallelism(void){
  419. return GROUP_PARALLELISM;
  420. }
  421. //-----get suggested cluster size
  422. int get_suggested_cluster_size(void){
  423. int r;
  424. r=GROUP_PARALLELISM+GROUP_PARALLELISM/10;
  425. if(r<GROUP_PARALLELISM+5) r=GROUP_PARALLELISM+5;
  426. return r;
  427. }
  428. //-----key structure
  429. void *get_key_struct(void){
  430. struct csa_keys_t *keys=(struct csa_keys_t *)MALLOC(sizeof(struct csa_keys_t));
  431. if(keys) {
  432. static const unsigned char pk[8] = { 0,0,0,0,0,0,0,0 };
  433. set_control_words(keys,pk,pk);
  434. }
  435. return keys;
  436. }
  437. void free_key_struct(void *keys){
  438. return FREE(keys);
  439. }
  440. //-----set control words
  441. static void schedule_key(struct csa_key_t *key, const unsigned char *pk){
  442. // could be made faster, but is not run often
  443. int bi,by;
  444. int i,j;
  445. // key
  446. memcpy(key->ck,pk,8);
  447. // precalculations for stream
  448. key_schedule_stream(key->ck,key->iA,key->iB);
  449. for(by=0;by<8;by++){
  450. for(bi=0;bi<8;bi++){
  451. key->ck_g[by][bi]=(key->ck[by]&(1<<bi))?FF1():FF0();
  452. }
  453. }
  454. for(by=0;by<8;by++){
  455. for(bi=0;bi<4;bi++){
  456. key->iA_g[by][bi]=(key->iA[by]&(1<<bi))?FF1():FF0();
  457. key->iB_g[by][bi]=(key->iB[by]&(1<<bi))?FF1():FF0();
  458. }
  459. }
  460. // precalculations for block
  461. key_schedule_block(key->ck,key->kk);
  462. for(i=0;i<56;i++){
  463. for(j=0;j<BYTES_PER_BATCH;j++){
  464. *(((unsigned char *)&key->kkmulti[i])+j)=key->kk[i];
  465. }
  466. }
  467. }
  468. void set_control_words(void *keys, const unsigned char *ev, const unsigned char *od){
  469. schedule_key(&((struct csa_keys_t *)keys)->even,ev);
  470. schedule_key(&((struct csa_keys_t *)keys)->odd,od);
  471. }
  472. void set_even_control_word(void *keys, const unsigned char *pk){
  473. schedule_key(&((struct csa_keys_t *)keys)->even,pk);
  474. }
  475. void set_odd_control_word(void *keys, const unsigned char *pk){
  476. schedule_key(&((struct csa_keys_t *)keys)->odd,pk);
  477. }
  478. //-----get control words
  479. void get_control_words(void *keys, unsigned char *even, unsigned char *odd){
  480. memcpy(even,&((struct csa_keys_t *)keys)->even.ck,8);
  481. memcpy(odd,&((struct csa_keys_t *)keys)->odd.ck,8);
  482. }
  483. //----- decrypt
  484. int decrypt_packets(void *keys, unsigned char **cluster){
  485. // statistics, currently unused
  486. int stat_no_scramble=0;
  487. int stat_reserved=0;
  488. int stat_decrypted[2]={0,0};
  489. int stat_decrypted_mini=0;
  490. unsigned char **clst;
  491. unsigned char **clst2;
  492. int grouped;
  493. int group_ev_od;
  494. int advanced;
  495. int can_advance;
  496. unsigned char *g_pkt[GROUP_PARALLELISM];
  497. int g_len[GROUP_PARALLELISM];
  498. int g_offset[GROUP_PARALLELISM];
  499. int g_n[GROUP_PARALLELISM];
  500. int g_residue[GROUP_PARALLELISM];
  501. unsigned char *pkt;
  502. int xc0,ev_od,len,offset,n,residue;
  503. struct csa_key_t* k;
  504. int i,j,iter,g;
  505. int t23,tsmall;
  506. int alive[24];
  507. //icc craziness int pad1=0; //////////align! FIXME
  508. unsigned char *encp[GROUP_PARALLELISM];
  509. MEMALIGN unsigned char stream_in[GROUP_PARALLELISM*8];
  510. MEMALIGN unsigned char stream_out[GROUP_PARALLELISM*8];
  511. MEMALIGN unsigned char ib[GROUP_PARALLELISM*8];
  512. MEMALIGN unsigned char block_out[GROUP_PARALLELISM*8];
  513. struct stream_regs regs;
  514. //icc craziness i=(int)&pad1;//////////align!!! FIXME
  515. // build a list of packets to be processed
  516. clst=cluster;
  517. grouped=0;
  518. advanced=0;
  519. can_advance=1;
  520. group_ev_od=-1; // silence incorrect compiler warning
  521. pkt=*clst;
  522. do{ // find a new packet
  523. if(grouped==GROUP_PARALLELISM){
  524. // full
  525. break;
  526. }
  527. if(pkt==NULL){
  528. // no more ranges
  529. break;
  530. }
  531. if(pkt>=*(clst+1)){
  532. // out of this range, try next
  533. clst++;clst++;
  534. pkt=*clst;
  535. continue;
  536. }
  537. do{ // handle this packet
  538. xc0=pkt[3]&0xc0;
  539. DBG(fprintf(stderr," exam pkt=%p, xc0=%02x, can_adv=%i\n",pkt,xc0,can_advance));
  540. if(xc0==0x00){
  541. DBG(fprintf(stderr,"skip clear pkt %p (can_advance is %i)\n",pkt,can_advance));
  542. advanced+=can_advance;
  543. stat_no_scramble++;
  544. break;
  545. }
  546. if(xc0==0x40){
  547. DBG(fprintf(stderr,"skip reserved pkt %p (can_advance is %i)\n",pkt,can_advance));
  548. advanced+=can_advance;
  549. stat_reserved++;
  550. break;
  551. }
  552. if(xc0==0x80||xc0==0xc0){ // encrypted
  553. ev_od=(xc0&0x40)>>6; // 0 even, 1 odd
  554. if(grouped==0) group_ev_od=ev_od; // this group will be all even (or odd)
  555. if(group_ev_od==ev_od){ // could be added to group
  556. pkt[3]&=0x3f; // consider it decrypted now
  557. if(pkt[3]&0x20){ // incomplete packet
  558. offset=4+pkt[4]+1;
  559. len=188-offset;
  560. n=len>>3;
  561. residue=len-(n<<3);
  562. if(n==0){ // decrypted==encrypted!
  563. DBG(fprintf(stderr,"DECRYPTED MINI! (can_advance is %i)\n",can_advance));
  564. advanced+=can_advance;
  565. stat_decrypted_mini++;
  566. break; // this doesn't need more processing
  567. }
  568. }else{
  569. len=184;
  570. offset=4;
  571. n=23;
  572. residue=0;
  573. }
  574. g_pkt[grouped]=pkt;
  575. g_len[grouped]=len;
  576. g_offset[grouped]=offset;
  577. g_n[grouped]=n;
  578. g_residue[grouped]=residue;
  579. DBG(fprintf(stderr,"%2i: eo=%i pkt=%p len=%03i n=%2i residue=%i\n",grouped,ev_od,pkt,len,n,residue));
  580. grouped++;
  581. advanced+=can_advance;
  582. stat_decrypted[ev_od]++;
  583. }
  584. else{
  585. can_advance=0;
  586. DBG(fprintf(stderr,"skip pkt %p and can_advance set to 0\n",pkt));
  587. break; // skip and go on
  588. }
  589. }
  590. } while(0);
  591. if(can_advance){
  592. // move range start forward
  593. *clst+=188;
  594. }
  595. // next packet, if there is one
  596. pkt+=188;
  597. } while(1);
  598. DBG(fprintf(stderr,"-- result: grouped %i pkts, advanced %i pkts\n",grouped,advanced));
  599. // delete empty ranges and compact list
  600. clst2=cluster;
  601. for(clst=cluster;*clst!=NULL;clst+=2){
  602. // if not empty
  603. if(*clst<*(clst+1)){
  604. // it will remain
  605. *clst2=*clst;
  606. *(clst2+1)=*(clst+1);
  607. clst2+=2;
  608. }
  609. }
  610. *clst2=NULL;
  611. if(grouped==0){
  612. // no processing needed
  613. return advanced;
  614. }
  615. // sort them, longest payload first
  616. // we expect many n=23 packets and a few n<23
  617. DBG(fprintf(stderr,"PRESORTING\n"));
  618. for(i=0;i<grouped;i++){
  619. DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
  620. }
  621. // grouped is always <= GROUP_PARALLELISM
  622. #define g_swap(a,b) \
  623. pkt=g_pkt[a]; \
  624. g_pkt[a]=g_pkt[b]; \
  625. g_pkt[b]=pkt; \
  626. \
  627. len=g_len[a]; \
  628. g_len[a]=g_len[b]; \
  629. g_len[b]=len; \
  630. \
  631. offset=g_offset[a]; \
  632. g_offset[a]=g_offset[b]; \
  633. g_offset[b]=offset; \
  634. \
  635. n=g_n[a]; \
  636. g_n[a]=g_n[b]; \
  637. g_n[b]=n; \
  638. \
  639. residue=g_residue[a]; \
  640. g_residue[a]=g_residue[b]; \
  641. g_residue[b]=residue;
  642. // step 1: move n=23 packets before small packets
  643. t23=0;
  644. tsmall=grouped-1;
  645. for(;;){
  646. for(;t23<grouped;t23++){
  647. if(g_n[t23]!=23) break;
  648. }
  649. DBG(fprintf(stderr,"t23 after for =%i\n",t23));
  650. for(;tsmall>=0;tsmall--){
  651. if(g_n[tsmall]==23) break;
  652. }
  653. DBG(fprintf(stderr,"tsmall after for =%i\n",tsmall));
  654. if(tsmall-t23<1) break;
  655. DBG(fprintf(stderr,"swap t23=%i,tsmall=%i\n",t23,tsmall));
  656. g_swap(t23,tsmall);
  657. t23++;
  658. tsmall--;
  659. DBG(fprintf(stderr,"new t23=%i,tsmall=%i\n\n",t23,tsmall));
  660. }
  661. DBG(fprintf(stderr,"packets with n=23, t23=%i grouped=%i\n",t23,grouped));
  662. DBG(fprintf(stderr,"MIDSORTING\n"));
  663. for(i=0;i<grouped;i++){
  664. DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
  665. }
  666. // step 2: sort small packets in decreasing order of n (bubble sort is enough)
  667. for(i=t23;i<grouped;i++){
  668. for(j=i+1;j<grouped;j++){
  669. if(g_n[j]>g_n[i]){
  670. g_swap(i,j);
  671. }
  672. }
  673. }
  674. DBG(fprintf(stderr,"POSTSORTING\n"));
  675. for(i=0;i<grouped;i++){
  676. DBG(fprintf(stderr,"%2i of %2i: pkt=%p len=%03i n=%2i residue=%i\n",i,grouped,g_pkt[i],g_len[i],g_n[i],g_residue[i]));
  677. }
  678. // we need to know how many packets need 23 iterations, how many 22...
  679. for(i=0;i<=23;i++){
  680. alive[i]=0;
  681. }
  682. // count
  683. alive[23-1]=t23;
  684. for(i=t23;i<grouped;i++){
  685. alive[g_n[i]-1]++;
  686. }
  687. // integrate
  688. for(i=22;i>=0;i--){
  689. alive[i]+=alive[i+1];
  690. }
  691. DBG(fprintf(stderr,"ALIVE\n"));
  692. for(i=0;i<=23;i++){
  693. DBG(fprintf(stderr,"alive%2i=%i\n",i,alive[i]));
  694. }
  695. // choose key
  696. if(group_ev_od==0){
  697. k=&((struct csa_keys_t *)keys)->even;
  698. }
  699. else{
  700. k=&((struct csa_keys_t *)keys)->odd;
  701. }
  702. //INIT
  703. //#define INITIALIZE_UNUSED_INPUT
  704. #ifdef INITIALIZE_UNUSED_INPUT
  705. // unnecessary zeroing.
  706. // without this, we operate on uninitialized memory
  707. // when grouped<GROUP_PARALLELISM, but it's not a problem,
  708. // as final results will be discarded.
  709. // random data makes debugging sessions difficult.
  710. for(j=0;j<GROUP_PARALLELISM*8;j++) stream_in[j]=0;
  711. DBG(fprintf(stderr,"--- WARNING: you could gain speed by not initializing unused memory ---\n"));
  712. #else
  713. DBG(fprintf(stderr,"--- WARNING: DEBUGGING IS MORE DIFFICULT WHEN PROCESSING RANDOM DATA CHANGING AT EVERY RUN! ---\n"));
  714. #endif
  715. for(g=0;g<grouped;g++){
  716. encp[g]=g_pkt[g];
  717. DBG(fprintf(stderr,"header[%i]=%p (%02x)\n",g,encp[g],*(encp[g])));
  718. encp[g]+=g_offset[g]; // skip header
  719. FFTABLEIN(stream_in,g,encp[g]);
  720. }
  721. //dump_mem("stream_in",stream_in,GROUP_PARALLELISM*8,BYPG);
  722. // ITER 0
  723. DBG(fprintf(stderr,">>>>>ITER 0\n"));
  724. iter=0;
  725. stream_cypher_group_init(&regs,k->iA_g,k->iB_g,stream_in);
  726. // fill first ib
  727. for(g=0;g<alive[iter];g++){
  728. COPY_8_BY(ib+8*g,encp[g]);
  729. }
  730. DBG(dump_mem("IB ",ib,8*alive[iter],8));
  731. // ITER 1..N-1
  732. for (iter=1;iter<23&&alive[iter-1]>0;iter++){
  733. DBG(fprintf(stderr,">>>>>ITER %i\n",iter));
  734. // alive and just dead packets: calc block
  735. block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
  736. DBG(dump_mem("BLO_ib ",block_out,8*alive[iter-1],8));
  737. // all packets (dead too): calc stream
  738. stream_cypher_group_normal(&regs,stream_out);
  739. //dump_mem("stream_out",stream_out,GROUP_PARALLELISM*8,BYPG);
  740. // alive packets: calc ib
  741. for(g=0;g<alive[iter];g++){
  742. FFTABLEOUT(ib+8*g,stream_out,g);
  743. DBG(dump_mem("stream_out_ib ",ib+8*g,8,8));
  744. // XOREQ8BY gcc bug? 2x4 ok, 8 ko UPDATE: result ok but speed 1-2% slower (!!!???)
  745. #if 1
  746. XOREQ_4_BY(ib+8*g,encp[g]+8);
  747. XOREQ_4_BY(ib+8*g+4,encp[g]+8+4);
  748. #else
  749. XOREQ_8_BY(ib+8*g,encp[g]+8);
  750. #endif
  751. DBG(dump_mem("after_stream_xor_ib ",ib+8*g,8,8));
  752. }
  753. // alive packets: decrypt data
  754. for(g=0;g<alive[iter];g++){
  755. DBG(dump_mem("before_ib_decrypt_data ",encp[g],8,8));
  756. XOR_8_BY(encp[g],ib+8*g,block_out+8*g);
  757. DBG(dump_mem("after_ib_decrypt_data ",encp[g],8,8));
  758. }
  759. // just dead packets: write decrypted data
  760. for(g=alive[iter];g<alive[iter-1];g++){
  761. DBG(dump_mem("jd_before_ib_decrypt_data ",encp[g],8,8));
  762. COPY_8_BY(encp[g],block_out+8*g);
  763. DBG(dump_mem("jd_after_ib_decrypt_data ",encp[g],8,8));
  764. }
  765. // just dead packets: decrypt residue
  766. for(g=alive[iter];g<alive[iter-1];g++){
  767. DBG(dump_mem("jd_before_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
  768. FFTABLEOUTXORNBY(g_residue[g],encp[g]+8,stream_out,g);
  769. DBG(dump_mem("jd_after_decrypt_residue ",encp[g]+8,g_residue[g],g_residue[g]));
  770. }
  771. // alive packets: pointers++
  772. for(g=0;g<alive[iter];g++) encp[g]+=8;
  773. };
  774. // ITER N
  775. DBG(fprintf(stderr,">>>>>ITER 23\n"));
  776. iter=23;
  777. // calc block
  778. block_decypher_group(k->kkmulti,ib,block_out,alive[iter-1]);
  779. DBG(dump_mem("23BLO_ib ",block_out,8*alive[iter-1],8));
  780. // just dead packets: write decrypted data
  781. for(g=alive[iter];g<alive[iter-1];g++){
  782. DBG(dump_mem("23jd_before_ib_decrypt_data ",encp[g],8,8));
  783. COPY_8_BY(encp[g],block_out+8*g);
  784. DBG(dump_mem("23jd_after_ib_decrypt_data ",encp[g],8,8));
  785. }
  786. // no residue possible
  787. // so do nothing
  788. DBG(fprintf(stderr,"returning advanced=%i\n",advanced));
  789. M_EMPTY(); // restore CPU multimedia state
  790. return advanced;
  791. }