/* * rijndael_gladman.h * * Duncan S Wong * * July 31, 2001 - Initial Port from Brian Gladman's Rijndael implementation * Aug 4, 2001 - version 1.0 * Apr 2, 2002 - version 1.1 */ #ifndef __RIJNDAELGLADMAN_H__ #define __RIJNDAELGLADMAN_H__ /* ----------------------------------------------------------------------- Copyright (c) 2001 Dr Brian Gladman , Worcester, UK TERMS Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. This software is provided 'as is' with no guarantees of correctness or fitness for purpose. ----------------------------------------------------------------------- 1. FUNCTION The AES algorithm Rijndael implemented for block and key sizes of 128, 192 and 256 bits (16, 24 and 32 bytes) by Brian Gladman. This is an implementation of the AES encryption algorithm (Rijndael) designed by Joan Daemen and Vincent Rijmen. This version is designed to provide both fixed and dynamic block and key lengths and can also run with either big or little endian internal byte order (see aes.h). It inputs block and key lengths in bytes with the legal values being 16, 24 and 32. 2. CONFIGURATION OPTIONS --------------------- a. Define UNROLL for full loop unrolling in encryption and decryption. b. Define PARTIAL_UNROLL to unroll two loops in encryption and decryption. c. Define FIXED_TABLES for compiled rather than dynamic tables. d. Define FF_TABLES to use tables for field multiplies and inverses. e. Define ARRAYS to use arrays to hold the local state block. If this is not defined, individually declared 32-bit words are used. f. Define FAST_VARIABLE if a high speed variable block implementation is needed (essentially three separate fixed block size code sequences) g. Define either ONE_TABLE or FOUR_TABLES for a fast table driven version using 1 table (2 kbytes of table space) or 4 tables (8 kbytes of table space) for higher speed. h. Define either ONE_LR_TABLE or FOUR_LR_TABLES for a further speed increase by using tables for the last rounds but with more table space (2 or 8 kbytes extra). i. If neither ONE_TABLE nor FOUR_TABLES is defined, a compact but slower version is provided. j. If fast decryption key scheduling is needed define ONE_IM_TABLE or FOUR_IM_TABLES for higher speed (1 or 4 kbytes extra). 3. USE OF DEFINES NOTE: some combinations of the following defines are disabled below. UNROLL or PARTIAL_UNROLL control the extent to which loops are unrolled in the main encryption and decryption routines. UNROLL does a complete unroll while PARTIAL_UNROLL uses a loop with two rounds in it. #define UNROLL #define PARTIAL_UNROLL If FIXED_TABLES is defined, the tables are comipled statically into the code, otherwise they are computed once when the code is first used. #define FIXED_TABLES If FF_TABLES is defined faster finite field arithmetic is performed by using tables. #define FF_TABLES If ARRAYS is defined the state variables for encryption are defined as arrays, otherwise they are defined as individual variables. The latter is useful on machines where these variables can be mapped to registers. #define ARRAYS If FAST_VARIABLE is defined with variable block length, faster but larger code is used for encryption and decryption. #define FAST_VARIABLE */ #define UNROLL //#define PARTIAL_UNROLL #define FIXED_TABLES // [swong] always on for this code //#define FF_TABLES // [swong] not needed anymore in this code #define ARRAYS #define FAST_VARIABLE /* This code uses three sets of tables, each of which can be a single table or four sub-tables to gain a further speed advantage. The defines ONE_TABLE and FOUR_TABLES control the use of tables in the main encryption rounds and have the greatest impact on speed. If neither is defined, tables are not used and the resulting code is then very slow. Defining ONE_TABLE gives a substantial speed increase using 2 kbytes of table space; FOUR_TABLES gives a further speed increase but uses 8 kbytes of table space. #define ONE_TABLE or #define FOUR_TABLES The defines ONE_LR_TABLE and FOUR_LR_TABLES apply to the last round only and their impact on speed is hence less. It is unlikely to be sensible to apply these options unless the correspnding option above is also used. #define ONE_LR_TABLE or #define FOUR_LR_TABLES The ONE_IM_TABLE and FOUR_IM_TABLES options use tables to speed up the generation of the decryption key schedule. This will only be useful in limited situations where decryption speed with frequent re-keying is needed. #define ONE_IM_TABLE or #define FOUR_IM_TABLES */ #define FOUR_TABLES //#define FOUR_LR_TABLES //#define FOUR_IM_TABLES //#define ONE_TABLE //#define ONE_LR_TABLE //#define ONE_IM_TABLE /* 3. THE CIPHER INTERFACE byte (an unsigned 8-bit type) word (an unsigned 32-bit type) enum aes_key: (encryption direction) enc (set key for encryption) dec (set key for decryption) both (set key for both) class or struct aes (structure for context) C subroutine calls: Int16 set_blk(const word block_length, aes *cx) (variable block size) Int16 set_key(const byte key[], const word key_length, const enum aes_key direction, aes *cx) Int16 encrypt(const byte input_blk[], byte output_blk[], const aes *cx) Int16 decrypt(const byte input_blk[], byte output_blk[], const aes *cx) IMPORTANT NOTE: If you are using this C interface and your compiler does not set the memory used for objects to zero before use, you will need to ensure that cx.mode is set to zero before using the C subroutine calls. The block length inputs to set_block and set_key are in numbers of BYTES, not bits. The calls to subroutines must be made in the above order but multiple calls can be made without repeating earlier calls if their parameters have not changed. If the cipher block length is variable but set_blk has not been called before cipher operations a value of 16 is assumed (that is, the AES block size). In contrast to earlier versions the block and key length parameters are now checked for correctness and the encryption and decryption routines check to ensure that an appropriate key has been set before they are called. 4. BYTE ORDER WITHIN 32 BIT WORDS The fundamental data processing units in Rijndael are 8-bit bytes. The input, the output and the key input are all enumerated arrays of bytes in which bytes are numbered starting at zero and increasing to one less than the number of bytes in the array in question. When these inputs and outputs are considered as bit sequences, the n'th byte contains bits 8n to 8n+7 of the sequence with the lower numbered bit mapped to the most significant bit within the byte (i.e. that having a numeric value of 128). However, Rijndael can be implemented more efficiently using 32-bit words to process 4 bytes at a time provided that the order of bytes within words is known. This order is called big-endian if the lowest numbered bytes in words have the highest numeric significance and little-endian if the opposite applies. This code can work in either order irrespective of the native order of the machine on which it runs. The byte order used internally is set by defining INTERNAL_BYTE_ORDER whereas the order for all inputs and outputs is specified by defining EXTERNAL_BYTE_ORDER, the only purpose of the latter being to determine if a byte order change is needed immediately after input and immediately before output to account for the use of a different internal byte order. In almost all situations both of these defines will be set to the native order of the processor on which the code is to run but other settings may somtimes be useful in special circumstances. #define INTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN #define EXTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN 5. CONFIGURATION OPTIONS (see also aes.c) a. define BLOCK_SIZE to set the cipher block size (16, 24 or 32 bytes) or leave this undefined for dynamically variable block size (this will result in much slower code). d. set INTERNAL_BYTE_ORDER to one of the above constants to set the internal byte order (the order used within the algorithm code) e. set EXTERNAL_BYTE_ORDER to one of the above constants to set the byte order used at the external interfaces for the input, output and key byte arrays. IMPORTANT NOTE: BLOCK_SIZE is in BYTES: 16, 24, 32 or undefined for aes.c and 16, 20, 24, 28, 32 or undefined for aespp.c. If left undefined a slower version providing variable block length is compiled #define BLOCK_SIZE 16 */ #define BLOCK_SIZE 16 #define MAX_BLOCK_SIZE 32 #define MAX_KEYMAT_SIZE 64 // [swong] # of ASCII char's needed to represent a key #define MAX_KEY_SIZE 32 // [swong] # of bytes needed to store a key #define MODE_ECB 1 /* Are we ciphering in ECB mode? */ #define MODE_CBC 2 /* Are we ciphering in CBC mode? */ #define AES_BIG_ENDIAN 1 /* do not change */ #define AES_LITTLE_ENDIAN 2 /* do not change */ // Intel x86 microprocessors //#define INTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN //#define EXTERNAL_BYTE_ORDER AES_LITTLE_ENDIAN // Sun machines, Motorolla DragonBall microprocessors #define INTERNAL_BYTE_ORDER AES_BIG_ENDIAN #define EXTERNAL_BYTE_ORDER AES_BIG_ENDIAN /* In this implementation the columns of the state array are each held in 32-bit words. The state array can be held in various ways: in an array of words, in a number of individual word variables or in a number of processor registers. The following define maps a variable name x and a column number c to the way the state array variable is to be held. */ #if defined(ARRAYS) #define s(x,c) x[c] #else #define s(x,c) x##c #endif #if defined(BLOCK_SIZE) && (BLOCK_SIZE == 20 || BLOCK_SIZE == 28) #error an illegal block size has been specified #endif #if defined(UNROLL) && defined (PARTIAL_UNROLL) #error both UNROLL and PARTIAL_UNROLL are defined #endif #if defined(ONE_TABLE) && defined (FOUR_TABLES) #error both ONE_TABLE and FOUR_TABLES are defined #endif #if defined(ONE_LR_TABLE) && defined (FOUR_LR_TABLES) #error both ONE_LR_TABLE and FOUR_LR_TABLES are defined #endif #if defined(ONE_IM_TABLE) && defined (FOUR_IM_TABLES) #error both ONE_IM_TABLE and FOUR_IM_TABLES are defined #endif // Disable at least some poor combinations of options #if !defined(ONE_TABLE) && !defined(FOUR_TABLES) //#define FIXED_TABLES //#undef UNROLL // [swong] should be added due to huge code size (see my paper) #undef ONE_LR_TABLE #undef FOUR_LR_TABLES #undef ONE_IM_TABLE #undef FOUR_IM_TABLES #elif !defined(FOUR_TABLES) #ifdef FOUR_LR_TABLES #undef FOUR_LR_TABLES #define ONE_LR_TABLE #endif #ifdef FOUR_IM_TABLES #undef FOUR_IM_TABLES #define ONE_IM_TABLE #endif #elif !defined(BLOCK_SIZE) #if defined(UNROLL) #define PARTIAL_UNROLL #undef UNROLL #endif #endif #if defined(BLOCK_SIZE) && ((BLOCK_SIZE & 3) || BLOCK_SIZE < 16 || BLOCK_SIZE > 32) #error an illegal block size has been specified #endif /* The number of key schedule words for different block and key lengths (allowing for the method of computation which requires the length to be a multiple of the key length): Key Schedule key length (bytes) Length 16 20 24 28 32 --------------------- block 16 | 44 60 54 56 64 length 20 | 60 60 66 70 80 (bytes) 24 | 80 80 78 84 96 28 | 100 100 102 98 112 32 | 120 120 120 126 120 Rcon Table key length (bytes) Length 16 20 24 28 32 --------------------- block 16 | 10 9 8 7 7 length 20 | 14 11 10 9 9 (bytes) 24 | 19 15 12 11 11 28 | 24 19 16 13 13 32 | 29 23 19 17 14 The following values assume that the key length will be variable and may be of maximum length (32 bytes). Nk = number_of_key_bytes / 4 Nc = number_of_columns_in_state / 4 Nr = number of encryption/decryption rounds Rc = number of elements in rcon table Ks = number of 32-bit words in key schedule */ #define Nr(Nk,Nc) ((Nk > Nc ? Nk : Nc) + 6) #define Rc(Nk,Nc) ((Nb * (Nr(Nk,Nc) + 1) - 1) / Nk) #define Ks(Nk,Nc) (Nk * (Rc(Nk,Nc) + 1)) #if !defined(BLOCK_SIZE) #define RC_LENGTH 29 #define KS_LENGTH 128 #else #define RC_LENGTH 5 * BLOCK_SIZE / 4 - (BLOCK_SIZE == 16 ? 10 : 11) #define KS_LENGTH 4 * BLOCK_SIZE #endif typedef UInt8 byte; /* must be an 8-bit storage unit */ typedef UInt32 word; /* must be a 32-bit storage unit */ #define aes_good 0 #define aes_bad 1 #define BAD_KEY_MAT 2 #define BAD_MODE 3 #define BAD_KEY_DIR 4 #define BAD_CIPHER_MODE 5 #define BAD_IV 6 /* upr(x,n): rotates bytes within words by n positions, moving bytes to higher index positions with wrap around into low positions ups(x,n): moves bytes by n positions to higher index positions in words but without wrap around bval(x,n): extracts a byte from a word */ #if(INTERNAL_BYTE_ORDER == AES_LITTLE_ENDIAN) #define upr(x,n) (((x) << 8 * (n)) | ((x) >> (32 - 8 * (n)))) #define ups(x,n) ((x) << 8 * (n)) #define bval(x,n) ((byte)((x) >> 8 * (n))) #define byte_swap(x) (upr(x,1) & 0x00ff00ff | upr(x,3) & 0xff00ff00) #define bytes2word(b0, b1, b2, b3) \ ((word)(b3) << 24 | (word)(b2) << 16 | (word)(b1) << 8 | (b0)) #else #define upr(x,n) (((x) >> 8 * (n)) | ((x) << (32 - 8 * (n)))) #define ups(x,n) ((x) >> 8 * (n)) #define bval(x,n) ((byte)((x) >> 24 - 8 * (n))) #define byte_swap(x) (upr(x,3) & 0x00ff00ff | upr(x,1) & 0xff00ff00) #define bytes2word(b0, b1, b2, b3) \ ((word)(b0) << 24 | (word)(b1) << 16 | (word)(b2) << 8 | (b3)) #endif #if(INTERNAL_BYTE_ORDER == EXTERNAL_BYTE_ORDER) #define word_in(x) *(word*)(x) #define word_out(x,v) *(word*)(x) = (v) #else #define word_in(x) byte_swap(*(word*)(x)) #define word_out(x,v) *(word*)(x) = byte_swap(v) #endif enum aes_const { Nrow = 4, /* the number of rows in the cipher state */ Mcol = 8, /* maximum number of columns in the state */ #if defined(BLOCK_SIZE) /* set up a statically defined block size */ Ncol = BLOCK_SIZE / 4, Shr0 = 0, /* the cyclic shift values for rows 0, 1, 2 & 3 */ Shr1 = 1, Shr2 = BLOCK_SIZE == 32 ? 3 : 2, Shr3 = BLOCK_SIZE == 32 ? 4 : 3 #endif }; enum aes_direction { enc = 1, /* set if encryption is needed */ dec = 2, /* set if decryption is needed */ both = 3 /* set if both are needed */ }; // [swong] Database Resource - 20KB Transformation Tables typedef struct structTablesType { word *ft_tab[4]; word *it_tab[4]; word *fl_tab[4]; word *il_tab[4]; word *im_tab[4]; } TablesType; typedef TablesType* TablesTypePtr; typedef struct { Int16 blockLen; // [swong] block size in # bytes Int16 Nkey; /* the number of words in the key input block */ Int16 Nrnd; /* the number of cipher rounds */ word e_key[KS_LENGTH]; /* the encryption key schedule */ word d_key[KS_LENGTH]; /* the decryption key schedule */ Char keyMaterial[MAX_KEYMAT_SIZE+1]; // [swong] Raw key data in ASCII, e.g., user input or KAT values #if !defined(BLOCK_SIZE) Int16 Ncol; /* the number of columns in the cipher state */ #endif byte direction; // [swong] encrypt, decrypt or both byte mode; // [swong] MODE_ECB or MODE_CBC byte IV[MAX_BLOCK_SIZE]; // [swong] A possible Initialization Vector for ciphering void *gP; TablesType tablesP; } RijndaelGladmanCTX; Int16 makeKey(RijndaelGladmanCTX *ctx, byte mode, byte direction, Int16 key_n_bits, Char *keyMaterial, Char *IV); // [swong] Int16 encrypt(const byte in_blk[], byte out_blk[], RijndaelGladmanCTX *ctx); Int16 decrypt(const byte in_blk[], byte out_blk[], RijndaelGladmanCTX *ctx); Int16 set_blk(const word n_bytes, RijndaelGladmanCTX *ctx); // [swong] Int16 blockEncrypt(RijndaelGladmanCTX *ctx, byte *input, Int32 input_n_bytes, byte *outBuffer); Int16 blockDecrypt(RijndaelGladmanCTX *ctx, byte *input, Int32 input_n_bytes, byte *outBuffer); #endif