tsdecrypt reads and decrypts CSA encrypted incoming mpeg transport stream over UDP/RTP using code words obtained from OSCAM or similar CAM server. tsdecrypt communicates with CAM server using cs378x (camd35 over tcp) protocol or newcamd protocol. https://georgi.unixsol.org/programs/tsdecrypt/
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

stream.c 33KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906
  1. /* FFdecsa -- fast decsa algorithm
  2. *
  3. * Copyright (C) 2003-2004 fatih89r
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with this program; if not, write to the Free Software
  17. * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  18. */
  19. // define statics only once, when STREAM_INIT
  20. #ifdef STREAM_INIT
  21. struct stream_regs {
  22. group A[32+10][4]; // 32 because we will move back (virtual shift register)
  23. group B[32+10][4]; // 32 because we will move back (virtual shift register)
  24. group X[4];
  25. group Y[4];
  26. group Z[4];
  27. group D[4];
  28. group E[4];
  29. group F[4];
  30. group p;
  31. group q;
  32. group r;
  33. };
  34. static inline void trasp64_32_88ccw(unsigned char *data){
  35. /* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
  36. #define row ((unsigned int *)data)
  37. int i,j;
  38. for(j=0;j<64;j+=32){
  39. unsigned int t,b;
  40. for(i=0;i<16;i++){
  41. t=row[j+i];
  42. b=row[j+16+i];
  43. row[j+i] = (t&0x0000ffff) | ((b )<<16);
  44. row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
  45. }
  46. }
  47. for(j=0;j<64;j+=16){
  48. unsigned int t,b;
  49. for(i=0;i<8;i++){
  50. t=row[j+i];
  51. b=row[j+8+i];
  52. row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
  53. row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
  54. }
  55. }
  56. for(j=0;j<64;j+=8){
  57. unsigned int t,b;
  58. for(i=0;i<4;i++){
  59. t=row[j+i];
  60. b=row[j+4+i];
  61. row[j+i] =((t&0x0f0f0f0f)<<4) | (b&0x0f0f0f0f);
  62. row[j+4+i] = (t&0xf0f0f0f0) | ((b&0xf0f0f0f0)>>4);
  63. }
  64. }
  65. for(j=0;j<64;j+=4){
  66. unsigned int t,b;
  67. for(i=0;i<2;i++){
  68. t=row[j+i];
  69. b=row[j+2+i];
  70. row[j+i] =((t&0x33333333)<<2) | (b&0x33333333);
  71. row[j+2+i] = (t&0xcccccccc) | ((b&0xcccccccc)>>2);
  72. }
  73. }
  74. for(j=0;j<64;j+=2){
  75. unsigned int t,b;
  76. for(i=0;i<1;i++){
  77. t=row[j+i];
  78. b=row[j+1+i];
  79. row[j+i] =((t&0x55555555)<<1) | (b&0x55555555);
  80. row[j+1+i] = (t&0xaaaaaaaa) | ((b&0xaaaaaaaa)>>1);
  81. }
  82. }
  83. #undef row
  84. }
  85. static inline void trasp64_32_88cw(unsigned char *data){
  86. /* 64 rows of 32 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
  87. #define row ((unsigned int *)data)
  88. int i,j;
  89. for(j=0;j<64;j+=32){
  90. unsigned int t,b;
  91. for(i=0;i<16;i++){
  92. t=row[j+i];
  93. b=row[j+16+i];
  94. row[j+i] = (t&0x0000ffff) | ((b )<<16);
  95. row[j+16+i]=((t )>>16) | (b&0xffff0000) ;
  96. }
  97. }
  98. for(j=0;j<64;j+=16){
  99. unsigned int t,b;
  100. for(i=0;i<8;i++){
  101. t=row[j+i];
  102. b=row[j+8+i];
  103. row[j+i] = (t&0x00ff00ff) | ((b&0x00ff00ff)<<8);
  104. row[j+8+i] =((t&0xff00ff00)>>8) | (b&0xff00ff00);
  105. }
  106. }
  107. for(j=0;j<64;j+=8){
  108. unsigned int t,b;
  109. for(i=0;i<4;i++){
  110. t=row[j+i];
  111. b=row[j+4+i];
  112. row[j+i] =((t&0xf0f0f0f0)>>4) | (b&0xf0f0f0f0);
  113. row[j+4+i]= (t&0x0f0f0f0f) | ((b&0x0f0f0f0f)<<4);
  114. }
  115. }
  116. for(j=0;j<64;j+=4){
  117. unsigned int t,b;
  118. for(i=0;i<2;i++){
  119. t=row[j+i];
  120. b=row[j+2+i];
  121. row[j+i] =((t&0xcccccccc)>>2) | (b&0xcccccccc);
  122. row[j+2+i]= (t&0x33333333) | ((b&0x33333333)<<2);
  123. }
  124. }
  125. for(j=0;j<64;j+=2){
  126. unsigned int t,b;
  127. for(i=0;i<1;i++){
  128. t=row[j+i];
  129. b=row[j+1+i];
  130. row[j+i] =((t&0xaaaaaaaa)>>1) | (b&0xaaaaaaaa);
  131. row[j+1+i]= (t&0x55555555) | ((b&0x55555555)<<1);
  132. }
  133. }
  134. #undef row
  135. }
  136. //64-64----------------------------------------------------------
  137. static inline void trasp64_64_88ccw(unsigned char *data){
  138. /* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
  139. #define row ((unsigned long long int *)data)
  140. int i,j;
  141. for(j=0;j<64;j+=64){
  142. unsigned long long int t,b;
  143. for(i=0;i<32;i++){
  144. t=row[j+i];
  145. b=row[j+32+i];
  146. row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
  147. row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  148. }
  149. }
  150. for(j=0;j<64;j+=32){
  151. unsigned long long int t,b;
  152. for(i=0;i<16;i++){
  153. t=row[j+i];
  154. b=row[j+16+i];
  155. row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  156. row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  157. }
  158. }
  159. for(j=0;j<64;j+=16){
  160. unsigned long long int t,b;
  161. for(i=0;i<8;i++){
  162. t=row[j+i];
  163. b=row[j+8+i];
  164. row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  165. row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  166. }
  167. }
  168. for(j=0;j<64;j+=8){
  169. unsigned long long int t,b;
  170. for(i=0;i<4;i++){
  171. t=row[j+i];
  172. b=row[j+4+i];
  173. row[j+i] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
  174. row[j+4+i] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
  175. }
  176. }
  177. for(j=0;j<64;j+=4){
  178. unsigned long long int t,b;
  179. for(i=0;i<2;i++){
  180. t=row[j+i];
  181. b=row[j+2+i];
  182. row[j+i] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
  183. row[j+2+i] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
  184. }
  185. }
  186. for(j=0;j<64;j+=2){
  187. unsigned long long int t,b;
  188. for(i=0;i<1;i++){
  189. t=row[j+i];
  190. b=row[j+1+i];
  191. row[j+i] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
  192. row[j+1+i] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
  193. }
  194. }
  195. #undef row
  196. }
  197. static inline void trasp64_64_88cw(unsigned char *data){
  198. /* 64 rows of 64 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
  199. #define row ((unsigned long long int *)data)
  200. int i,j;
  201. for(j=0;j<64;j+=64){
  202. unsigned long long int t,b;
  203. for(i=0;i<32;i++){
  204. t=row[j+i];
  205. b=row[j+32+i];
  206. row[j+i] = (t&0x00000000ffffffffULL) | ((b )<<32);
  207. row[j+32+i]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  208. }
  209. }
  210. for(j=0;j<64;j+=32){
  211. unsigned long long int t,b;
  212. for(i=0;i<16;i++){
  213. t=row[j+i];
  214. b=row[j+16+i];
  215. row[j+i] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  216. row[j+16+i]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  217. }
  218. }
  219. for(j=0;j<64;j+=16){
  220. unsigned long long int t,b;
  221. for(i=0;i<8;i++){
  222. t=row[j+i];
  223. b=row[j+8+i];
  224. row[j+i] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  225. row[j+8+i] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  226. }
  227. }
  228. for(j=0;j<64;j+=8){
  229. unsigned long long int t,b;
  230. for(i=0;i<4;i++){
  231. t=row[j+i];
  232. b=row[j+4+i];
  233. row[j+i] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
  234. row[j+4+i] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
  235. }
  236. }
  237. for(j=0;j<64;j+=4){
  238. unsigned long long int t,b;
  239. for(i=0;i<2;i++){
  240. t=row[j+i];
  241. b=row[j+2+i];
  242. row[j+i] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
  243. row[j+2+i] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
  244. }
  245. }
  246. for(j=0;j<64;j+=2){
  247. unsigned long long int t,b;
  248. for(i=0;i<1;i++){
  249. t=row[j+i];
  250. b=row[j+1+i];
  251. row[j+i] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
  252. row[j+1+i] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
  253. }
  254. }
  255. #undef row
  256. }
  257. //64-128----------------------------------------------------------
  258. static inline void trasp64_128_88ccw(unsigned char *data){
  259. /* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate counterclockwise)*/
  260. #define halfrow ((unsigned long long int *)data)
  261. int i,j;
  262. for(j=0;j<64;j+=64){
  263. unsigned long long int t,b;
  264. for(i=0;i<32;i++){
  265. t=halfrow[2*(j+i)];
  266. b=halfrow[2*(j+32+i)];
  267. halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
  268. halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  269. t=halfrow[2*(j+i)+1];
  270. b=halfrow[2*(j+32+i)+1];
  271. halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
  272. halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  273. }
  274. }
  275. for(j=0;j<64;j+=32){
  276. unsigned long long int t,b;
  277. for(i=0;i<16;i++){
  278. t=halfrow[2*(j+i)];
  279. b=halfrow[2*(j+16+i)];
  280. halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  281. halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  282. t=halfrow[2*(j+i)+1];
  283. b=halfrow[2*(j+16+i)+1];
  284. halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  285. halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  286. }
  287. }
  288. for(j=0;j<64;j+=16){
  289. unsigned long long int t,b;
  290. for(i=0;i<8;i++){
  291. t=halfrow[2*(j+i)];
  292. b=halfrow[2*(j+8+i)];
  293. halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  294. halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  295. t=halfrow[2*(j+i)+1];
  296. b=halfrow[2*(j+8+i)+1];
  297. halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  298. halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  299. }
  300. }
  301. for(j=0;j<64;j+=8){
  302. unsigned long long int t,b;
  303. for(i=0;i<4;i++){
  304. t=halfrow[2*(j+i)];
  305. b=halfrow[2*(j+4+i)];
  306. halfrow[2*(j+i)] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
  307. halfrow[2*(j+4+i)] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
  308. t=halfrow[2*(j+i)+1];
  309. b=halfrow[2*(j+4+i)+1];
  310. halfrow[2*(j+i)+1] =((t&0x0f0f0f0f0f0f0f0fULL)<<4) | (b&0x0f0f0f0f0f0f0f0fULL);
  311. halfrow[2*(j+4+i)+1] = (t&0xf0f0f0f0f0f0f0f0ULL) | ((b&0xf0f0f0f0f0f0f0f0ULL)>>4);
  312. }
  313. }
  314. for(j=0;j<64;j+=4){
  315. unsigned long long int t,b;
  316. for(i=0;i<2;i++){
  317. t=halfrow[2*(j+i)];
  318. b=halfrow[2*(j+2+i)];
  319. halfrow[2*(j+i)] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
  320. halfrow[2*(j+2+i)] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
  321. t=halfrow[2*(j+i)+1];
  322. b=halfrow[2*(j+2+i)+1];
  323. halfrow[2*(j+i)+1] =((t&0x3333333333333333ULL)<<2) | (b&0x3333333333333333ULL);
  324. halfrow[2*(j+2+i)+1] = (t&0xccccccccccccccccULL) | ((b&0xccccccccccccccccULL)>>2);
  325. }
  326. }
  327. for(j=0;j<64;j+=2){
  328. unsigned long long int t,b;
  329. for(i=0;i<1;i++){
  330. t=halfrow[2*(j+i)];
  331. b=halfrow[2*(j+1+i)];
  332. halfrow[2*(j+i)] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
  333. halfrow[2*(j+1+i)] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
  334. t=halfrow[2*(j+i)+1];
  335. b=halfrow[2*(j+1+i)+1];
  336. halfrow[2*(j+i)+1] =((t&0x5555555555555555ULL)<<1) | (b&0x5555555555555555ULL);
  337. halfrow[2*(j+1+i)+1] = (t&0xaaaaaaaaaaaaaaaaULL) | ((b&0xaaaaaaaaaaaaaaaaULL)>>1);
  338. }
  339. }
  340. #undef halfrow
  341. }
  342. static inline void trasp64_128_88cw(unsigned char *data){
  343. /* 64 rows of 128 bits transposition (bytes transp. - 8x8 rotate clockwise)*/
  344. #define halfrow ((unsigned long long int *)data)
  345. int i,j;
  346. for(j=0;j<64;j+=64){
  347. unsigned long long int t,b;
  348. for(i=0;i<32;i++){
  349. t=halfrow[2*(j+i)];
  350. b=halfrow[2*(j+32+i)];
  351. halfrow[2*(j+i)] = (t&0x00000000ffffffffULL) | ((b )<<32);
  352. halfrow[2*(j+32+i)]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  353. t=halfrow[2*(j+i)+1];
  354. b=halfrow[2*(j+32+i)+1];
  355. halfrow[2*(j+i)+1] = (t&0x00000000ffffffffULL) | ((b )<<32);
  356. halfrow[2*(j+32+i)+1]=((t )>>32) | (b&0xffffffff00000000ULL) ;
  357. }
  358. }
  359. for(j=0;j<64;j+=32){
  360. unsigned long long int t,b;
  361. for(i=0;i<16;i++){
  362. t=halfrow[2*(j+i)];
  363. b=halfrow[2*(j+16+i)];
  364. halfrow[2*(j+i)] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  365. halfrow[2*(j+16+i)]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  366. t=halfrow[2*(j+i)+1];
  367. b=halfrow[2*(j+16+i)+1];
  368. halfrow[2*(j+i)+1] = (t&0x0000ffff0000ffffULL) | ((b&0x0000ffff0000ffffULL)<<16);
  369. halfrow[2*(j+16+i)+1]=((t&0xffff0000ffff0000ULL)>>16) | (b&0xffff0000ffff0000ULL) ;
  370. }
  371. }
  372. for(j=0;j<64;j+=16){
  373. unsigned long long int t,b;
  374. for(i=0;i<8;i++){
  375. t=halfrow[2*(j+i)];
  376. b=halfrow[2*(j+8+i)];
  377. halfrow[2*(j+i)] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  378. halfrow[2*(j+8+i)] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  379. t=halfrow[2*(j+i)+1];
  380. b=halfrow[2*(j+8+i)+1];
  381. halfrow[2*(j+i)+1] = (t&0x00ff00ff00ff00ffULL) | ((b&0x00ff00ff00ff00ffULL)<<8);
  382. halfrow[2*(j+8+i)+1] =((t&0xff00ff00ff00ff00ULL)>>8) | (b&0xff00ff00ff00ff00ULL);
  383. }
  384. }
  385. for(j=0;j<64;j+=8){
  386. unsigned long long int t,b;
  387. for(i=0;i<4;i++){
  388. t=halfrow[2*(j+i)];
  389. b=halfrow[2*(j+4+i)];
  390. halfrow[2*(j+i)] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
  391. halfrow[2*(j+4+i)] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
  392. t=halfrow[2*(j+i)+1];
  393. b=halfrow[2*(j+4+i)+1];
  394. halfrow[2*(j+i)+1] =((t&0xf0f0f0f0f0f0f0f0ULL)>>4) | (b&0xf0f0f0f0f0f0f0f0ULL);
  395. halfrow[2*(j+4+i)+1] = (t&0x0f0f0f0f0f0f0f0fULL) | ((b&0x0f0f0f0f0f0f0f0fULL)<<4);
  396. }
  397. }
  398. for(j=0;j<64;j+=4){
  399. unsigned long long int t,b;
  400. for(i=0;i<2;i++){
  401. t=halfrow[2*(j+i)];
  402. b=halfrow[2*(j+2+i)];
  403. halfrow[2*(j+i)] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
  404. halfrow[2*(j+2+i)] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
  405. t=halfrow[2*(j+i)+1];
  406. b=halfrow[2*(j+2+i)+1];
  407. halfrow[2*(j+i)+1] =((t&0xccccccccccccccccULL)>>2) | (b&0xccccccccccccccccULL);
  408. halfrow[2*(j+2+i)+1] = (t&0x3333333333333333ULL) | ((b&0x3333333333333333ULL)<<2);
  409. }
  410. }
  411. for(j=0;j<64;j+=2){
  412. unsigned long long int t,b;
  413. for(i=0;i<1;i++){
  414. t=halfrow[2*(j+i)];
  415. b=halfrow[2*(j+1+i)];
  416. halfrow[2*(j+i)] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
  417. halfrow[2*(j+1+i)] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
  418. t=halfrow[2*(j+i)+1];
  419. b=halfrow[2*(j+1+i)+1];
  420. halfrow[2*(j+i)+1] =((t&0xaaaaaaaaaaaaaaaaULL)>>1) | (b&0xaaaaaaaaaaaaaaaaULL);
  421. halfrow[2*(j+1+i)+1] = (t&0x5555555555555555ULL) | ((b&0x5555555555555555ULL)<<1);
  422. }
  423. }
  424. #undef halfrow
  425. }
  426. #endif
  427. #ifdef STREAM_INIT
  428. void stream_cypher_group_init(
  429. struct stream_regs *regs,
  430. group iA[8][4], // [In] iA00,iA01,...iA73 32 groups | Derived from key.
  431. group iB[8][4], // [In] iB00,iB01,...iB73 32 groups | Derived from key.
  432. unsigned char *sb) // [In] (SB0,SB1,...SB7)...x32 32*8 bytes | Extra input.
  433. #endif
  434. #ifdef STREAM_NORMAL
  435. void stream_cypher_group_normal(
  436. struct stream_regs *regs,
  437. unsigned char *cb) // [Out] (CB0,CB1,...CB7)...x32 32*8 bytes | Output.
  438. #endif
  439. {
  440. #ifdef STREAM_INIT
  441. group in1[4];
  442. group in2[4];
  443. #endif
  444. group extra_B[4];
  445. group fa,fb,fc,fd,fe;
  446. group s1a,s1b,s2a,s2b,s3a,s3b,s4a,s4b,s5a,s5b,s6a,s6b,s7a,s7b;
  447. group next_E[4];
  448. group tmp0,tmp1,tmp2,tmp3,tmp4;
  449. #ifdef STREAM_INIT
  450. group *sb_g=(group *)sb;
  451. #endif
  452. #ifdef STREAM_NORMAL
  453. group *cb_g=(group *)cb;
  454. #endif
  455. int aboff;
  456. int i,j,k,b;
  457. int dbg;
  458. #ifdef STREAM_INIT
  459. DBG(fprintf(stderr,":::::::::: BEGIN STREAM INIT\n"));
  460. #endif
  461. #ifdef STREAM_NORMAL
  462. DBG(fprintf(stderr,":::::::::: BEGIN STREAM NORMAL\n"));
  463. #endif
  464. #ifdef STREAM_INIT
  465. for(j=0;j<64;j++){
  466. DBG(fprintf(stderr,"precall prerot stream_in[%2i]=",j));
  467. DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
  468. }
  469. DBG(dump_mem("stream_prerot ",sb,GROUP_PARALLELISM*8,BYPG));
  470. #if GROUP_PARALLELISM==32
  471. trasp64_32_88ccw(sb);
  472. #endif
  473. #if GROUP_PARALLELISM==64
  474. trasp64_64_88ccw(sb);
  475. #endif
  476. #if GROUP_PARALLELISM==128
  477. trasp64_128_88ccw(sb);
  478. #endif
  479. DBG(dump_mem("stream_postrot",sb,GROUP_PARALLELISM*8,BYPG));
  480. for(j=0;j<64;j++){
  481. DBG(fprintf(stderr,"precall stream_in[%2i]=",j));
  482. DBG(dump_mem("",sb+BYPG*j,BYPG,BYPG));
  483. }
  484. #endif
  485. aboff=32;
  486. #ifdef STREAM_INIT
  487. // load first 32 bits of ck into A[aboff+0]..A[aboff+7]
  488. // load last 32 bits of ck into B[aboff+0]..B[aboff+7]
  489. // all other regs = 0
  490. for(i=0;i<8;i++){
  491. for(b=0;b<4;b++){
  492. DBG(fprintf(stderr,"dbg from iA A[%i][%i]=",i,b));
  493. DBG(dump_mem("",(unsigned char *)&iA[i][b],BYPG,BYPG));
  494. DBG(fprintf(stderr," dbg from iB B[%i][%i]=",i,b));
  495. DBG(dump_mem("",(unsigned char *)&iB[i][b],BYPG,BYPG));
  496. regs->A[aboff+i][b]=iA[i][b];
  497. regs->B[aboff+i][b]=iB[i][b];
  498. }
  499. }
  500. for(b=0;b<4;b++){
  501. regs->A[aboff+8][b]=FF0();
  502. regs->A[aboff+9][b]=FF0();
  503. regs->B[aboff+8][b]=FF0();
  504. regs->B[aboff+9][b]=FF0();
  505. }
  506. for(b=0;b<4;b++){
  507. regs->X[b]=FF0();
  508. regs->Y[b]=FF0();
  509. regs->Z[b]=FF0();
  510. regs->D[b]=FF0();
  511. regs->E[b]=FF0();
  512. regs->F[b]=FF0();
  513. }
  514. regs->p=FF0();
  515. regs->q=FF0();
  516. regs->r=FF0();
  517. #endif
  518. for(dbg=0;dbg<4;dbg++){
  519. DBG(fprintf(stderr,"dbg A0[%i]=",dbg));
  520. DBG(dump_mem("",(unsigned char *)&regs->A[aboff+0][dbg],BYPG,BYPG));
  521. DBG(fprintf(stderr,"dbg B0[%i]=",dbg));
  522. DBG(dump_mem("",(unsigned char *)&regs->B[aboff+0][dbg],BYPG,BYPG));
  523. }
  524. ////////////////////////////////////////////////////////////////////////////////
  525. // EXTERNAL LOOP - 8 bytes per operation
  526. for(i=0;i<8;i++){
  527. DBG(fprintf(stderr,"--BEGIN EXTERNAL LOOP %i\n",i));
  528. #ifdef STREAM_INIT
  529. for(b=0;b<4;b++){
  530. in1[b]=sb_g[8*i+4+b];
  531. in2[b]=sb_g[8*i+b];
  532. }
  533. #endif
  534. // INTERNAL LOOP - 2 bits per iteration
  535. for(j=0; j<4; j++){
  536. DBG(fprintf(stderr,"---BEGIN INTERNAL LOOP %i (EXT %i, INT %i)\n",j,i,j));
  537. // from A0..A9, 35 bits are selected as inputs to 7 s-boxes
  538. // 5 bits input per s-box, 2 bits output per s-box
  539. // we can select bits with zero masking and shifting operations
  540. // and synthetize s-boxes with optimized boolean functions.
  541. // this is the actual reason we do all the crazy transposition
  542. // stuff to switch between normal and bit slice representations.
  543. // this code really flies.
  544. fe=regs->A[aboff+3][0];fa=regs->A[aboff+0][2];fb=regs->A[aboff+5][1];fc=regs->A[aboff+6][3];fd=regs->A[aboff+8][0];
  545. /* 1000 1110 1110 0001 : lev 7: */ //tmp0=( fa^( fb^( ( ( ( fa|fb )^fc )|( fc^fd ) )^ALL_ONES ) ) );
  546. /* 1110 0010 0011 0011 : lev 6: */ //tmp1=( ( fa|fb )^( ( fc&( fa|( fb^fd ) ) )^ALL_ONES ) );
  547. /* 0011 0110 1000 1101 : lev 5: */ //tmp2=( fa^( ( fb&fd )^( ( fa&fd )|fc ) ) );
  548. /* 0101 0101 1001 0011 : lev 5: */ //tmp3=( ( fa&fc )^( fa^( ( fa&fb )|fd ) ) );
  549. /* 1000 1110 1110 0001 : lev 7: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFOR(FFXOR(FFOR(fa,fb),fc),FFXOR(fc,fd)),FF1())));
  550. /* 1110 0010 0011 0011 : lev 6: */ tmp1=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fa,FFXOR(fb,fd))),FF1()));
  551. /* 0011 0110 1000 1101 : lev 5: */ tmp2=FFXOR(fa,FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),fc)));
  552. /* 0101 0101 1001 0011 : lev 5: */ tmp3=FFXOR(FFAND(fa,fc),FFXOR(fa,FFOR(FFAND(fa,fb),fd)));
  553. s1a=FFXOR(tmp0,FFAND(fe,tmp1));
  554. s1b=FFXOR(tmp2,FFAND(fe,tmp3));
  555. //dump_mem("s1as1b-fe",&fe,BYPG,BYPG);
  556. //dump_mem("s1as1b-fa",&fa,BYPG,BYPG);
  557. //dump_mem("s1as1b-fb",&fb,BYPG,BYPG);
  558. //dump_mem("s1as1b-fc",&fc,BYPG,BYPG);
  559. //dump_mem("s1as1b-fd",&fd,BYPG,BYPG);
  560. fe=regs->A[aboff+1][1];fa=regs->A[aboff+2][2];fb=regs->A[aboff+5][3];fc=regs->A[aboff+6][0];fd=regs->A[aboff+8][1];
  561. /* 1001 1110 0110 0001 : lev 6: */ //tmp0=( fa^( ( fb&( fc|fd ) )^( fc^( fd^ALL_ONES ) ) ) );
  562. /* 0000 0011 0111 1011 : lev 5: */ //tmp1=( ( fa&( fb^fd ) )|( ( fa|fb )&fc ) );
  563. /* 1100 0110 1101 0010 : lev 6: */ //tmp2=( ( fb&fd )^( ( fa&fd )|( fb^( fc^ALL_ONES ) ) ) );
  564. /* 0001 1110 1111 0101 : lev 5: */ //tmp3=( ( fa&fd )|( fa^( fb^( fc&fd ) ) ) );
  565. /* 1001 1110 0110 0001 : lev 6: */ tmp0=FFXOR(fa,FFXOR(FFAND(fb,FFOR(fc,fd)),FFXOR(fc,FFXOR(fd,FF1()))));
  566. /* 0000 0011 0111 1011 : lev 5: */ tmp1=FFOR(FFAND(fa,FFXOR(fb,fd)),FFAND(FFOR(fa,fb),fc));
  567. /* 1100 0110 1101 0010 : lev 6: */ tmp2=FFXOR(FFAND(fb,fd),FFOR(FFAND(fa,fd),FFXOR(fb,FFXOR(fc,FF1()))));
  568. /* 0001 1110 1111 0101 : lev 5: */ tmp3=FFOR(FFAND(fa,fd),FFXOR(fa,FFXOR(fb,FFAND(fc,fd))));
  569. s2a=FFXOR(tmp0,FFAND(fe,tmp1));
  570. s2b=FFXOR(tmp2,FFAND(fe,tmp3));
  571. fe=regs->A[aboff+0][3];fa=regs->A[aboff+1][0];fb=regs->A[aboff+4][1];fc=regs->A[aboff+4][3];fd=regs->A[aboff+5][2];
  572. /* 0100 1011 1001 0110 : lev 5: */ //tmp0=( fa^( fb^( ( fc&( fa|fd ) )^fd ) ) );
  573. /* 1101 0101 1000 1100 : lev 7: */ //tmp1=( ( fa&fc )^( ( fa^fd )|( ( fb|fc )^( fd^ALL_ONES ) ) ) );
  574. /* 0010 0111 1101 1000 : lev 4: */ //tmp2=( fa^( ( ( fb^fc )&fd )^fc ) );
  575. /* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
  576. /* 0100 1011 1001 0110 : lev 5: */ tmp0=FFXOR(fa,FFXOR(fb,FFXOR(FFAND(fc,FFOR(fa,fd)),fd)));
  577. /* 1101 0101 1000 1100 : lev 7: */ tmp1=FFXOR(FFAND(fa,fc),FFOR(FFXOR(fa,fd),FFXOR(FFOR(fb,fc),FFXOR(fd,FF1()))));
  578. /* 0010 0111 1101 1000 : lev 4: */ tmp2=FFXOR(fa,FFXOR(FFAND(FFXOR(fb,fc),fd),fc));
  579. /* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
  580. s3a=FFXOR(tmp0,FFAND(FFNOT(fe),tmp1));
  581. s3b=FFXOR(tmp2,FFAND(fe,tmp3));
  582. fe=regs->A[aboff+2][3];fa=regs->A[aboff+0][1];fb=regs->A[aboff+1][3];fc=regs->A[aboff+3][2];fd=regs->A[aboff+7][0];
  583. /* 1011 0101 0100 1001 : lev 7: */ //tmp0=( fa^( ( fc&( fa^fd ) )|( fb^( fc|( fd^ALL_ONES ) ) ) ) );
  584. /* 0010 1101 0110 0110 : lev 6: */ //tmp1=( ( fa&fb )^( fb^( ( ( fa|fc )&fd )^fc ) ) );
  585. /* 0110 0111 1101 0000 : lev 7: */ //tmp2=( fa^( ( fb&fc )|( ( ( fa&( fb^fd ) )|fc )^fd ) ) );
  586. /* 1111 1111 1111 1111 : lev 0: */ //tmp3=ALL_ONES;
  587. /* 1011 0101 0100 1001 : lev 7: */ tmp0=FFXOR(fa,FFOR(FFAND(fc,FFXOR(fa,fd)),FFXOR(fb,FFOR(fc,FFXOR(fd,FF1())))));
  588. /* 0010 1101 0110 0110 : lev 6: */ tmp1=FFXOR(FFAND(fa,fb),FFXOR(fb,FFXOR(FFAND(FFOR(fa,fc),fd),fc)));
  589. /* 0110 0111 1101 0000 : lev 7: */ tmp2=FFXOR(fa,FFOR(FFAND(fb,fc),FFXOR(FFOR(FFAND(fa,FFXOR(fb,fd)),fc),fd)));
  590. /* 1111 1111 1111 1111 : lev 0: */ tmp3=FF1();
  591. s4a=FFXOR(tmp0,FFAND(fe,FFXOR(tmp1,tmp0)));
  592. s4b=FFXOR(FFXOR(s4a,tmp2),FFAND(fe,tmp3));
  593. fe=regs->A[aboff+4][2];fa=regs->A[aboff+3][3];fb=regs->A[aboff+5][0];fc=regs->A[aboff+7][1];fd=regs->A[aboff+8][2];
  594. /* 1000 1111 0011 0010 : lev 7: */ //tmp0=( ( ( fa&( fb|fc ) )^fb )|( ( ( fa^fc )|fd )^ALL_ONES ) );
  595. /* 0110 1011 0000 1011 : lev 6: */ //tmp1=( fb^( ( fc^fd )&( fc^( fb|( fa^fd ) ) ) ) );
  596. /* 0001 1010 0111 1001 : lev 6: */ //tmp2=( ( fa&fc )^( fb^( ( fb|( fa^fc ) )&fd ) ) );
  597. /* 0101 1101 1101 0101 : lev 4: */ //tmp3=( ( ( fa^fb )&( fc^ALL_ONES ) )|fd );
  598. /* 1000 1111 0011 0010 : lev 7: */ tmp0=FFOR(FFXOR(FFAND(fa,FFOR(fb,fc)),fb),FFXOR(FFOR(FFXOR(fa,fc),fd),FF1()));
  599. /* 0110 1011 0000 1011 : lev 6: */ tmp1=FFXOR(fb,FFAND(FFXOR(fc,fd),FFXOR(fc,FFOR(fb,FFXOR(fa,fd)))));
  600. /* 0001 1010 0111 1001 : lev 6: */ tmp2=FFXOR(FFAND(fa,fc),FFXOR(fb,FFAND(FFOR(fb,FFXOR(fa,fc)),fd)));
  601. /* 0101 1101 1101 0101 : lev 4: */ tmp3=FFOR(FFAND(FFXOR(fa,fb),FFXOR(fc,FF1())),fd);
  602. s5a=FFXOR(tmp0,FFAND(fe,tmp1));
  603. s5b=FFXOR(tmp2,FFAND(fe,tmp3));
  604. fe=regs->A[aboff+2][1];fa=regs->A[aboff+3][1];fb=regs->A[aboff+4][0];fc=regs->A[aboff+6][2];fd=regs->A[aboff+8][3];
  605. /* 0011 0110 0010 1101 : lev 6: */ //tmp0=( ( ( fa&fc )&fd )^( ( fb&( fa|fd ) )^fc ) );
  606. /* 1110 1110 1011 1011 : lev 3: */ //tmp1=( ( ( fa^fc )&fd )^ALL_ONES );
  607. /* 0101 1000 0110 0111 : lev 6: */ //tmp2=( ( fa&( fb|fc ) )^( fb^( ( fb&fc )|fd ) ) );
  608. /* 0001 0011 0000 0001 : lev 5: */ //tmp3=( fc&( ( fa&( fb^fd ) )^( fb|fd ) ) );
  609. /* 0011 0110 0010 1101 : lev 6: */ tmp0=FFXOR(FFAND(FFAND(fa,fc),fd),FFXOR(FFAND(fb,FFOR(fa,fd)),fc));
  610. /* 1110 1110 1011 1011 : lev 3: */ tmp1=FFXOR(FFAND(FFXOR(fa,fc),fd),FF1());
  611. /* 0101 1000 0110 0111 : lev 6: */ tmp2=FFXOR(FFAND(fa,FFOR(fb,fc)),FFXOR(fb,FFOR(FFAND(fb,fc),fd)));
  612. /* 0001 0011 0000 0001 : lev 5: */ tmp3=FFAND(fc,FFXOR(FFAND(fa,FFXOR(fb,fd)),FFOR(fb,fd)));
  613. s6a=FFXOR(tmp0,FFAND(fe,tmp1));
  614. s6b=FFXOR(tmp2,FFAND(fe,tmp3));
  615. fe=regs->A[aboff+1][2];fa=regs->A[aboff+2][0];fb=regs->A[aboff+6][1];fc=regs->A[aboff+7][2];fd=regs->A[aboff+7][3];
  616. /* 0111 1000 1001 0110 : lev 5: */ //tmp0=( fb^( ( fc&fd )|( fa^( fc^fd ) ) ) );
  617. /* 0100 1001 0101 1011 : lev 6: */ //tmp1=( ( fb|fd )&( ( fa&fc )|( fb^( fc^fd ) ) ) );
  618. /* 0100 1001 1011 1001 : lev 5: */ //tmp2=( ( fa|fb )^( ( fc&( fb|fd ) )^fd ) );
  619. /* 1111 1111 1101 1101 : lev 3: */ //tmp3=( fd|( ( fa&fc )^ALL_ONES ) );
  620. /* 0111 1000 1001 0110 : lev 5: */ tmp0=FFXOR(fb,FFOR(FFAND(fc,fd),FFXOR(fa,FFXOR(fc,fd))));
  621. /* 0100 1001 0101 1011 : lev 6: */ tmp1=FFAND(FFOR(fb,fd),FFOR(FFAND(fa,fc),FFXOR(fb,FFXOR(fc,fd))));
  622. /* 0100 1001 1011 1001 : lev 5: */ tmp2=FFXOR(FFOR(fa,fb),FFXOR(FFAND(fc,FFOR(fb,fd)),fd));
  623. /* 1111 1111 1101 1101 : lev 3: */ tmp3=FFOR(fd,FFXOR(FFAND(fa,fc),FF1()));
  624. s7a=FFXOR(tmp0,FFAND(fe,tmp1));
  625. s7b=FFXOR(tmp2,FFAND(fe,tmp3));
  626. /*
  627. we have just done this:
  628. int sbox1[0x20] = {2,0,1,1,2,3,3,0, 3,2,2,0,1,1,0,3, 0,3,3,0,2,2,1,1, 2,2,0,3,1,1,3,0};
  629. int sbox2[0x20] = {3,1,0,2,2,3,3,0, 1,3,2,1,0,0,1,2, 3,1,0,3,3,2,0,2, 0,0,1,2,2,1,3,1};
  630. int sbox3[0x20] = {2,0,1,2,2,3,3,1, 1,1,0,3,3,0,2,0, 1,3,0,1,3,0,2,2, 2,0,1,2,0,3,3,1};
  631. int sbox4[0x20] = {3,1,2,3,0,2,1,2, 1,2,0,1,3,0,0,3, 1,0,3,1,2,3,0,3, 0,3,2,0,1,2,2,1};
  632. int sbox5[0x20] = {2,0,0,1,3,2,3,2, 0,1,3,3,1,0,2,1, 2,3,2,0,0,3,1,1, 1,0,3,2,3,1,0,2};
  633. int sbox6[0x20] = {0,1,2,3,1,2,2,0, 0,1,3,0,2,3,1,3, 2,3,0,2,3,0,1,1, 2,1,1,2,0,3,3,0};
  634. int sbox7[0x20] = {0,3,2,2,3,0,0,1, 3,0,1,3,1,2,2,1, 1,0,3,3,0,1,1,2, 2,3,1,0,2,3,0,2};
  635. s12 = sbox1[ (((A3>>0)&1)<<4) | (((A0>>2)&1)<<3) | (((A5>>1)&1)<<2) | (((A6>>3)&1)<<1) | (((A8>>0)&1)<<0) ]
  636. |sbox2[ (((A1>>1)&1)<<4) | (((A2>>2)&1)<<3) | (((A5>>3)&1)<<2) | (((A6>>0)&1)<<1) | (((A8>>1)&1)<<0) ];
  637. s34 = sbox3[ (((A0>>3)&1)<<4) | (((A1>>0)&1)<<3) | (((A4>>1)&1)<<2) | (((A4>>3)&1)<<1) | (((A5>>2)&1)<<0) ]
  638. |sbox4[ (((A2>>3)&1)<<4) | (((A0>>1)&1)<<3) | (((A1>>3)&1)<<2) | (((A3>>2)&1)<<1) | (((A7>>0)&1)<<0) ];
  639. s56 = sbox5[ (((A4>>2)&1)<<4) | (((A3>>3)&1)<<3) | (((A5>>0)&1)<<2) | (((A7>>1)&1)<<1) | (((A8>>2)&1)<<0) ]
  640. |sbox6[ (((A2>>1)&1)<<4) | (((A3>>1)&1)<<3) | (((A4>>0)&1)<<2) | (((A6>>2)&1)<<1) | (((A8>>3)&1)<<0) ];
  641. s7 = sbox7[ (((A1>>2)&1)<<4) | (((A2>>0)&1)<<3) | (((A6>>1)&1)<<2) | (((A7>>2)&1)<<1) | (((A7>>3)&1)<<0) ];
  642. */
  643. // use 4x4 xor to produce extra nibble for T3
  644. extra_B[3]=FFXOR(FFXOR(FFXOR(regs->B[aboff+2][0],regs->B[aboff+5][1]),regs->B[aboff+6][2]),regs->B[aboff+8][3]);
  645. extra_B[2]=FFXOR(FFXOR(FFXOR(regs->B[aboff+5][0],regs->B[aboff+7][1]),regs->B[aboff+2][3]),regs->B[aboff+3][2]);
  646. extra_B[1]=FFXOR(FFXOR(FFXOR(regs->B[aboff+4][3],regs->B[aboff+7][2]),regs->B[aboff+3][0]),regs->B[aboff+4][1]);
  647. extra_B[0]=FFXOR(FFXOR(FFXOR(regs->B[aboff+8][2],regs->B[aboff+5][3]),regs->B[aboff+2][1]),regs->B[aboff+7][0]);
  648. for(dbg=0;dbg<4;dbg++){
  649. DBG(fprintf(stderr,"extra_B[%i]=",dbg));
  650. DBG(dump_mem("",(unsigned char *)&extra_B[dbg],BYPG,BYPG));
  651. }
  652. // T1 = xor all inputs
  653. // in1, in2, D are only used in T1 during initialisation, not generation
  654. for(b=0;b<4;b++){
  655. regs->A[aboff-1][b]=FFXOR(regs->A[aboff+9][b],regs->X[b]);
  656. }
  657. #ifdef STREAM_INIT
  658. for(b=0;b<4;b++){
  659. regs->A[aboff-1][b]=FFXOR(FFXOR(regs->A[aboff-1][b],regs->D[b]),((j % 2) ? in2[b] : in1[b]));
  660. }
  661. #endif
  662. for(dbg=0;dbg<4;dbg++){
  663. DBG(fprintf(stderr,"next_A0[%i]=",dbg));
  664. DBG(dump_mem("",(unsigned char *)&regs->A[aboff-1][dbg],BYPG,BYPG));
  665. }
  666. // T2 = xor all inputs
  667. // in1, in2 are only used in T1 during initialisation, not generation
  668. // if p=0, use this, if p=1, rotate the result left
  669. for(b=0;b<4;b++){
  670. regs->B[aboff-1][b]=FFXOR(FFXOR(regs->B[aboff+6][b],regs->B[aboff+9][b]),regs->Y[b]);
  671. }
  672. #ifdef STREAM_INIT
  673. for(b=0;b<4;b++){
  674. regs->B[aboff-1][b]=FFXOR(regs->B[aboff-1][b],((j % 2) ? in1[b] : in2[b]));
  675. }
  676. #endif
  677. for(dbg=0;dbg<4;dbg++){
  678. DBG(fprintf(stderr,"next_B0[%i]=",dbg));
  679. DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
  680. }
  681. // if p=1, rotate left (yes, this is what we're doing)
  682. tmp3=regs->B[aboff-1][3];
  683. regs->B[aboff-1][3]=FFXOR(regs->B[aboff-1][3],FFAND(FFXOR(regs->B[aboff-1][3],regs->B[aboff-1][2]),regs->p));
  684. regs->B[aboff-1][2]=FFXOR(regs->B[aboff-1][2],FFAND(FFXOR(regs->B[aboff-1][2],regs->B[aboff-1][1]),regs->p));
  685. regs->B[aboff-1][1]=FFXOR(regs->B[aboff-1][1],FFAND(FFXOR(regs->B[aboff-1][1],regs->B[aboff-1][0]),regs->p));
  686. regs->B[aboff-1][0]=FFXOR(regs->B[aboff-1][0],FFAND(FFXOR(regs->B[aboff-1][0],tmp3),regs->p));
  687. for(dbg=0;dbg<4;dbg++){
  688. DBG(fprintf(stderr,"next_B0[%i]=",dbg));
  689. DBG(dump_mem("",(unsigned char *)&regs->B[aboff-1][dbg],BYPG,BYPG));
  690. }
  691. // T3 = xor all inputs
  692. for(b=0;b<4;b++){
  693. regs->D[b]=FFXOR(FFXOR(regs->E[b],regs->Z[b]),extra_B[b]);
  694. }
  695. for(dbg=0;dbg<4;dbg++){
  696. DBG(fprintf(stderr,"D[%i]=",dbg));
  697. DBG(dump_mem("",(unsigned char *)&regs->D[dbg],BYPG,BYPG));
  698. }
  699. // T4 = sum, carry of Z + E + r
  700. for(b=0;b<4;b++){
  701. next_E[b]=regs->F[b];
  702. }
  703. tmp0=FFXOR(regs->Z[0],regs->E[0]);
  704. tmp1=FFAND(regs->Z[0],regs->E[0]);
  705. regs->F[0]=FFXOR(regs->E[0],FFAND(regs->q,FFXOR(regs->Z[0],regs->r)));
  706. tmp3=FFAND(tmp0,regs->r);
  707. tmp4=FFOR(tmp1,tmp3);
  708. tmp0=FFXOR(regs->Z[1],regs->E[1]);
  709. tmp1=FFAND(regs->Z[1],regs->E[1]);
  710. regs->F[1]=FFXOR(regs->E[1],FFAND(regs->q,FFXOR(regs->Z[1],tmp4)));
  711. tmp3=FFAND(tmp0,tmp4);
  712. tmp4=FFOR(tmp1,tmp3);
  713. tmp0=FFXOR(regs->Z[2],regs->E[2]);
  714. tmp1=FFAND(regs->Z[2],regs->E[2]);
  715. regs->F[2]=FFXOR(regs->E[2],FFAND(regs->q,FFXOR(regs->Z[2],tmp4)));
  716. tmp3=FFAND(tmp0,tmp4);
  717. tmp4=FFOR(tmp1,tmp3);
  718. tmp0=FFXOR(regs->Z[3],regs->E[3]);
  719. tmp1=FFAND(regs->Z[3],regs->E[3]);
  720. regs->F[3]=FFXOR(regs->E[3],FFAND(regs->q,FFXOR(regs->Z[3],tmp4)));
  721. tmp3=FFAND(tmp0,tmp4);
  722. regs->r=FFXOR(regs->r,FFAND(regs->q,FFXOR(FFOR(tmp1,tmp3),regs->r))); // ultimate carry
  723. /*
  724. we have just done this: (believe it or not)
  725. if (q) {
  726. F = Z + E + r;
  727. r = (F >> 4) & 1;
  728. F = F & 0x0f;
  729. }
  730. else {
  731. F = E;
  732. }
  733. */
  734. for(b=0;b<4;b++){
  735. regs->E[b]=next_E[b];
  736. }
  737. for(dbg=0;dbg<4;dbg++){
  738. DBG(fprintf(stderr,"F[%i]=",dbg));
  739. DBG(dump_mem("",(unsigned char *)&regs->F[dbg],BYPG,BYPG));
  740. }
  741. DBG(fprintf(stderr,"r="));
  742. DBG(dump_mem("",(unsigned char *)&regs->r,BYPG,BYPG));
  743. for(dbg=0;dbg<4;dbg++){
  744. DBG(fprintf(stderr,"E[%i]=",dbg));
  745. DBG(dump_mem("",(unsigned char *)&regs->E[dbg],BYPG,BYPG));
  746. }
  747. // this simple instruction is virtually shifting all the shift registers
  748. aboff--;
  749. /*
  750. we've just done this:
  751. A9=A8;A8=A7;A7=A6;A6=A5;A5=A4;A4=A3;A3=A2;A2=A1;A1=A0;A0=next_A0;
  752. B9=B8;B8=B7;B7=B6;B6=B5;B5=B4;B4=B3;B3=B2;B2=B1;B1=B0;B0=next_B0;
  753. */
  754. regs->X[0]=s1a;
  755. regs->X[1]=s2a;
  756. regs->X[2]=s3b;
  757. regs->X[3]=s4b;
  758. regs->Y[0]=s3a;
  759. regs->Y[1]=s4a;
  760. regs->Y[2]=s5b;
  761. regs->Y[3]=s6b;
  762. regs->Z[0]=s5a;
  763. regs->Z[1]=s6a;
  764. regs->Z[2]=s1b;
  765. regs->Z[3]=s2b;
  766. regs->p=s7a;
  767. regs->q=s7b;
  768. for(dbg=0;dbg<4;dbg++){
  769. DBG(fprintf(stderr,"X[%i]=",dbg));
  770. DBG(dump_mem("",(unsigned char *)&regs->X[dbg],BYPG,BYPG));
  771. }
  772. for(dbg=0;dbg<4;dbg++){
  773. DBG(fprintf(stderr,"Y[%i]=",dbg));
  774. DBG(dump_mem("",(unsigned char *)&regs->Y[dbg],BYPG,BYPG));
  775. }
  776. for(dbg=0;dbg<4;dbg++){
  777. DBG(fprintf(stderr,"Z[%i]=",dbg));
  778. DBG(dump_mem("",(unsigned char *)&regs->Z[dbg],BYPG,BYPG));
  779. }
  780. DBG(fprintf(stderr,"p="));
  781. DBG(dump_mem("",(unsigned char *)&regs->p,BYPG,BYPG));
  782. DBG(fprintf(stderr,"q="));
  783. DBG(dump_mem("",(unsigned char *)&regs->q,BYPG,BYPG));
  784. #ifdef STREAM_NORMAL
  785. // require 4 loops per output byte
  786. // 2 output bits are a function of the 4 bits of D
  787. // xor 2 by 2
  788. cb_g[8*i+7-2*j]=FFXOR(regs->D[2],regs->D[3]);
  789. cb_g[8*i+6-2*j]=FFXOR(regs->D[0],regs->D[1]);
  790. for(dbg=0;dbg<8;dbg++){
  791. DBG(fprintf(stderr,"op[%i]=",dbg));
  792. DBG(dump_mem("",(unsigned char *)&cb_g[8*i+dbg],BYPG,BYPG));
  793. }
  794. #endif
  795. DBG(fprintf(stderr,"---END INTERNAL LOOP\n"));
  796. } // INTERNAL LOOP
  797. DBG(fprintf(stderr,"--END EXTERNAL LOOP\n"));
  798. } // EXTERNAL LOOP
  799. // move 32 steps forward, ready for next call
  800. for(k=0;k<10;k++){
  801. for(b=0;b<4;b++){
  802. DBG(fprintf(stderr,"moving forward AB k=%i b=%i\n",k,b));
  803. regs->A[32+k][b]=regs->A[k][b];
  804. regs->B[32+k][b]=regs->B[k][b];
  805. }
  806. }
  807. ////////////////////////////////////////////////////////////////////////////////
  808. #ifdef STREAM_NORMAL
  809. for(j=0;j<64;j++){
  810. DBG(fprintf(stderr,"postcall prerot cb[%2i]=",j));
  811. DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
  812. }
  813. #if GROUP_PARALLELISM==32
  814. trasp64_32_88cw(cb);
  815. #endif
  816. #if GROUP_PARALLELISM==64
  817. trasp64_64_88cw(cb);
  818. #endif
  819. #if GROUP_PARALLELISM==128
  820. trasp64_128_88cw(cb);
  821. #endif
  822. for(j=0;j<64;j++){
  823. DBG(fprintf(stderr,"postcall postrot cb[%2i]=",j));
  824. DBG(dump_mem("",(unsigned char *)(cb+BYPG*j),BYPG,BYPG));
  825. }
  826. #endif
  827. #ifdef STREAM_INIT
  828. DBG(fprintf(stderr,":::::::::: END STREAM INIT\n"));
  829. #endif
  830. #ifdef STREAM_NORMAL
  831. DBG(fprintf(stderr,":::::::::: END STREAM NORMAL\n"));
  832. #endif
  833. }