3、高性能関数


最も右側のビット(0101 1110=>0101 1100)をオフにすると、符号なし整数が2のべき乗または0であるか、すなわち結果が0であるかを判断することができる.
//         (0101 1110=>0101 1100),           2   0,      0;
#define TurnOffLastRightBit(x)          ((x) & ((x)-1))

最も右側のビットを開きます(0101 0111=>0101 1111).
//         (0101 0111=>0101 1111);
#define TurnOnLastRightBit(x)           ((x) | ((x)+1))

ワードグループの末尾の1(0101 0111=>0101 0000)を閉じると、符号なし整数が2^n-1または0であるか否かを判断したり、ある数のすべてのビットが1であるか否かを判断したり、結果が0であるか否かを判断したりすることができる.
//        1(0101 0111=>0101 0000),           2^n-1 0,                1,      0;
#define TurnOffTailBit(x)               ((x) & ((x)+1))

ワードグループの末尾の0を開きます(0101 1000=>0101 1111).
//        0(0101 1000=>0101 1111);
#define TurnOnTailBit(x)                ((x) | ((x)-1))

変換xが最も右側で値が0の位置が1となり、残りのビットは0(1010 0111=>0000 1000)となり、xに0のビットがなければ0となる.
//   x      0     1,     0(1010 0111=>0000 1000), x   0   ,    0;
#define TrunLastRightBit0(x)            (~(x) & ((x)+1))

変換xが最も右側で1のビットメタは0になり、残りのビットメタは1(1010 1000=>1111 0111)になり、xに1のビットメタがなければ、結果の各ビットメタは1である.
//   x     1     0,     1(1010 1000=>1111 0111), x   1   ,          1;
#define TrunLastRightBit1(x)            (~(x) | ((x)-1))

変換xの末尾のすべてが0のビットは1になり、残りのビットは0(1010 0111=>1111 1000)になり、xに0のビットがなければ0になる.
//   x     0     1,     0(1010 0111=>1111 1000), x  0   ,    0;
#define TrunTailBit0(x)                 (~(x) & ((x)-1))

変換xの末尾のすべてが1のビットメタは0になり、残りのビットメタは1(1010 0111=>1111 1000)になり、xに1のビットメタがなければ、結果の各ビットメタは1である.
//   x     1     0,     1(1010 0111=>1111 1000), x  1   ,          1;
#define TrunTailBit1(x)                 (~(x) | ((x)+1))

xが最も右側で1のビットを取得し、残りのビットは0(0101 1000=>0000 1000)に設定し、xに1のビットがなければ結果は0である.
//   x     1   ,     0(0101 1000=>0000 1000), x  1   ,    0;
#define GetLastRightBit1(x)             ((x) & (~(x)+1))

xが最も右側で0のビットを取得し、残りのビットは1(0101 0111=>1111 0111)に置き、xに0のビットがなければ結果は1である.
//   x     0   ,     1(0101 0111=>1111 0111), x  0   ,    1;
#define GetLastRightBit0(x)             ((x) | (~(x)-1))

xが最も右側で1のビットメタとその右側が1であり、その左側が0(0101 1000=>0000 1111)であり、xに1のビットメタがなければ、結果の各ビットメタは1である.
//   x     1         1,     0(0101 1000=>0000 1111), x  1   ,          1;
#define GetTailBit1(x)                  ((x) ^ ((x)-1))

xが最も右側で0のビットメタとその右側が1であり、その左側が0(0101 0111=>0000 1111)であり、xに0のビットメタがなければ、結果の各ビットメタは1である.
//   x     0         1,     0(0101 0111=>0000 1111), x  0   ,          1;
#define GetTailBit0(x)                  ((x) ^ ((x)+1))

xの右側に連続して表示され、1のビットを0(0101 1100=>0100 0000)に設定します./*(((x)&-(x))+(x))&(x))*/
//   x        1    0(0101 1100=>0100 0000); /* ((((x) & -(x)) + (x)) & (x)) */
#define TurnOffRightContinueBit(x)      ((((x) | ((x)-1)) + 1) & (x))

異なるレジスタ内の対応するビットセグメントを交換する.
//              ;
#define SwapBitField(x, y, mask)        {(x) = (x) ^ (y); (y) = (y) ^ ((x) & (mask)); (x) = (x) ^ (y);}

絶対値関数;
//     ;
#define ABS(x)                          (((x) ^ ((signed)(x) >> 31)) - ((signed)(x) >> 31))
#define NABS(x)                         (((signed)(x) >> 31) - ((x) ^ ((signed)(x) >> 31)))

平均関数;
//      ;
#define AVEF(x, y)                      (((x) & (y)) + ((unsigned)((x) ^ (y)) >> 1))
#define AVEC(x, y)                      (((x) | (y)) - (((x) ^ (y)) >> 1))

シンボル関数;
//     ;
#define SIGN(x)                         (((x) >> 31) | ((unsigned)(-(x)) >> 31))

比較関数;
//     ;
#define CMP(x, y)                       (((x) > (y)) - ((x) < (y)))

バイナリ転送グレイコード;
//        ;
#define B2G(b, g)                       (g) = ((b) ^ ((unsigned)(b) >> 1))
#define G2B(g, b)   \
{   \
    (b) = (g) ^ ((unsigned)(g) >> 1);   \
    (b) = (b) ^ ((unsigned)(b) >> 2);   \
    (b) = (b) ^ ((unsigned)(b) >> 4);   \
    if(sizeof(g) > 1) (b) = (b) ^ ((unsigned)(b) >> 8);   \
    if(sizeof(g) > 2) (b) = (b) ^ ((unsigned)(b) >> 16);   \
}

三角関数;
#define PI              3.14159265358979323846
#define SIN(x)          sin((x) * PI / 180)
#define COS(x)          cos((x) * PI / 180)
#define TAN(x)          tan((x) * PI / 180)
#define ASIN(x)         (asin(x) / PI * 180)
#define ACOS(x)         (acos(x) / PI * 180)
#define ATAN(x)         (atan(x) / PI * 180)

最大公約数
template
T GCD(T a, T b)
{
    while(b ^= a ^= b ^= a %= b);
    return a;
}

最小公倍数
//      ;
template
T LCM(T a, int b)
{
    return a * b / GCD(a, b);
}

統計値が1のビット数.
int PopCount(unsigned int x)
{
#if 0
    int count = 0;

    for (; x; count ++) {
        x &= x - 1;
    }

    return count;
#endif
#if 0
    x = (x & 0x55555555) + ((x >> 1) & 0x55555555);
    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
    x = (x & 0x0f0f0f0f) + ((x >> 4) & 0x0f0f0f0f);
    x = (x & 0x00ff00ff) + ((x >> 8) & 0x00ff00ff);
    x = (x & 0x0000ffff) + ((x >> 16) & 0x0000ffff);

    return x;
#endif
#if 0
    static const int ones[] = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2,\
     3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,\
     3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2,\
     3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5,\
     5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5,\
     6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,\
     2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3,\
     4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3,\
     3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5,\
     6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6,\
     5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};

    return (ones[x & 0xff] + ones[(x >> 8) & 0xff] + ones[(x >> 16) & 0xff] + ones[(x >> 24) & 0xff]);
#endif
#if 0
    x = x - ((x >> 1) & 0x55555555);
    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
    x = (x + (x >> 4)) & 0x0f0f0f0f;
    x = x + (x >> 8);
    x = x + (x >> 16);
    return x & 0x0000003f;
#endif
#if 0
    unsigned int n;
    n = (x >> 1) & 033333333333;    // Count bits in;
    x = x - n;                      // each 3-bit;
    n = (n >> 1) & 033333333333;    // field;
    x = x - n;
    x = (x + (x >> 3)) & 030707070707;  // 6-bit sums;
    return x % 63;                  // Add 6-bit sums;
#endif
#if 1
    unsigned int n;
    n = (x >> 1) & 0x77777777;      // Count bits in;
    x = x - n;                      // each 4-bit;
    n = (n >> 1) & 0x77777777;      // field;
    x = x - n;
    n = (n >> 1) & 0x77777777;
    x = x - n;
    x = (x + (x >> 4)) & 0x0f0f0f0f;    // Get byte sums;
    x = x*0x01010101;                   // Add the bytes;
    return x >> 24;
#endif
}

Hamming Distance;
// Hamming Distance;
#define HammDist(x, y)      PopCount(x ^ y)

2つのワードグループのクラスタカウントの差、pop(x)-pop(y)を計算する.
//             ,pop(x) - pop(y);
int PopDiff(unsigned int x, unsigned int y)
{
    x = x - ((x >> 1) & 0x55555555);
    x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
    y = ~y;
    y = y - ((y >> 1) & 0x55555555);
    y = (y & 0x33333333) + ((y >> 2) & 0x33333333);
    x = x + y;
    x = x + (x >> 8);
    x = x + (x >> 16);
    return (x & 0x0000007f) - 32;
}

pop(x)とpop(y)の大きさを比較する.
//   pop(x) pop(y)   ;
int PopCmpr(unsigned int xp, unsigned int yp)
{
    unsigned int x, y;
    x = xp & ~yp;           // Clear bits where;
    y = yp & ~xp;
    while (1) {
        if(x == 0) return y | -(int)y;
        if(y == 0) return 1;
        x = x & (x - 1);    // Clear one bit;
        y = y & (y - 1);
    }
}

キャリホールド加算器(carry-save adder,CSA);
//        (carry-save adder, CSA);
#define CSA(h, l, a, b, c)  \
{   \
    unsigned int u = a ^ b; \
    unsigned int v = c;     \
    h = (a & b) | (u & v);  \
    l = u ^ v;              \
}

配列の中の種群のカウントを求めて、毎回2つの字のグループの要素を処理します;

//          ,          ;
int PopArray(unsigned int A[], int n)
{
#if 0
    int tot, i;
    unsigned int ones, twos;

    tot = 0;
    ones = 0;
    for(i=0; i<=n-2; i+=2){
        CSA(twos, ones, ones, A[i], A[i+1]);
        tot = tot + PopCount(twos);
    }
    tot = 2 * tot + PopCount(ones);

    // If there's a last one, add it in;
    if(n & 1) tot = tot + PopCount(A[i]);

    return tot;
#else
    int tot, i;
    unsigned int ones, twos, twosA, twosB,
            fours, foursA, foursB, eights;

    tot = 0;
    fours = twos = ones = 0;

    for(i=0; i<=n-8; i+=8){
        CSA(twosA, ones, ones, A[i], A[i+1]);
        CSA(twosB, ones, ones, A[i+2], A[i+3]);
        CSA(foursA, twos, twos, twosA, twosB);
        CSA(twosA, ones, ones, A[i+4], A[i+5]);
        CSA(twosB, ones, ones, A[i+6], A[i+7]);
        CSA(foursB, twos, twos, twosA, twosB);
        CSA(eights, fours, fours, foursA, foursB);
        tot += PopCount(eights);
    }
    tot = 8*tot + 4*PopCount(fours) + 2*PopCount(twos) + PopCount(ones);

    // Simly ad in the last 0 to 7 elements;
    for(i=i; i

ワードグループの値が「1」のビット数が奇数であるか偶数であるかを判断する.

//        ‘1’            ;
int ParityCheck(unsigned int x)
{
#if 1
    unsigned int y;
    y = x ^ (x >> 1);
    y = y ^ (y >> 2);
    y = y ^ (y >> 4);
    y = y ^ (y >> 8);
    y = y ^ (y >> 16);
    return y;
#else
    x = x ^ (x >> 1);
    x = (x ^ (x >> 2)) & 0x11111111;
    x = x * 0x11111111;
    return (x >> 28) & 1;
#endif
}

xプリアンブル0の個数(大端)を計算する.
//   x  0   (  );
unsigned int Nlz(unsigned int x)
{
 #if 0
    int n;

    if(x == 0) return 32;
    n = 1;
    if((x >> 16) == 0){n += 16; x <<= 16;}
    if((x >> 24) == 0){n += 8; x <<= 8;}
    if((x >> 28) == 0){n += 4; x <<= 4;}
    if((x >> 30) == 0){n += 2; x <<= 2;}
    n -= (x >> 31);
    return n;
#endif
#if 0
    unsigned int y;
    int n = 32;

    y = x >> 16; if(y != 0){n -= 16; x = y;}
    y = x >> 8; if(y != 0){n -= 8; x = y;}
    y = x >> 4; if(y != 0){n -= 4; x = y;}
    y = x >> 2; if(y != 0){n -= 2; x = y;}
    y = x >> 1; if(y != 0) return n - 2;

    return n - x;
#endif
#if 0
    static char table[256] = {
        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
        5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
    };
    return 32 - table[x & 0xff] - table[(x >> 8) & 0xff] - table[(x >> 16) & 0xff] - table[(x >> 24) & 0xff];
#endif
#if 0
    int y, n;
    n = 0;
    y = x;
    while(1){
        if(x < 0) return n;
        if(y == 0) return 32 - n;
        n++;
        x <<= 1;
        y >>= 1;
    }
#endif
#if 0
    int y, m, n;
    y = -(x >> 16);         // If left half of x is 0,
    m = (y >> 16) & 16;     // set n = 16. If left half
    n = 16 - m;             // is nonzero, set n = 0 and
    x = x >> m;             // shift x right 16.
                            // Now x is of the from 0000xxxx.
    y = x - 0x100;          // If positions 8-15 are 0,
    m = (y >> 16) & 8;      // add 8 to n and shift x left 8.
    n = n + m;
    x = x << m;

    y = x - 0x1000;         // If positions 12-15 are 0,
    m = (y >> 16) & 4;      // add 4 to n and shift x left 4.
    n = n + m;
    x = x << m;

    y = x - 0x4000;         // If positions 14-15 are 0,
    m = (y >> 16) & 2;      // add 2 to n and shift x left 2.
    n = n + m;
    x = x << m;

    y = x >> 14;            // Set y = 0, 1, 2, or 3.
    m = y & ~(y >> 1);      // Set m = 0, 1, 2, or w resp.
    return n + 2 - m;
#endif
#if 1
#define u   ' '
    static char table[64] = {
        32, 31, u, 16, u, 30, 3, u,     15, u, u, u, 29, 10, 2, u,
        u, u, 12, 14, 21, u, 19, u,     u, 28, u, 25, u, 9, 1, u,
        17, u, 4, u, u, u, 11, u,       13, 22, 20, u, 26, u, u, 18,
        5, u, u, 23, u, 27, u, 6,       u, 24, 7, u, 8, u, 0, u
    };
#undef  u
    x = x | (x >> 1);       // Propagate leftmost
    x = x | (x >> 2);       // 1-bit to the right.
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
    x = x * 0x06eb14f9;     // Multiplier is 7*255**3.
    return table[x >> 26];
#endif
}

xテール0の個数(小端)を計算する.
//   x  0   (  );
unsigned int Ntz(unsigned int x)
{
#if 0
   int n;

   if(x == 0) return 32;
   n = 1;
   if((x & 0x0000ffff) == 0){n += 16; x >>= 16;}
   if((x & 0x000000ff) == 0){n += 8; x >>= 8;}
   if((x & 0x0000000f) == 0){n += 4; x >>= 4;}
   if((x & 0x00000003) == 0){n += 2; x >>= 2;}
   return n - (x & 1);
#endif
#if 0
   unsigned y, bz, b4, b3, b2, b1, b0;

   y = x & -x;              // Isolate rightmost 1-bit;
   bz = y ? 0 : 1;          // 1 if y = 0;
   b4 = (y & 0x0000ffff) ? 0 : 16;
   b3 = (y & 0x00ff00ff) ? 0 : 8;
   b2 = (y & 0x0f0f0f0f) ? 0 : 4;
   b1 = (y & 0x33333333) ? 0 : 2;
   b0 = (y & 0x55555555) ? 0 : 1;
   return bz + b4 + b3 + b2 + b1 + b0;
#endif
#if 0
#define u   ' '
    static char table[64] = {
        32, 0, 1, 12, 2, 6, u, 13,     3, u, 7, u, u, u, u, 14,
        10, 4, u, u, 8, u, u, 25,     u, u, u, u, u, 25, 27, 15,
        31, 11, 5, u, u, u, u, u,       9, u, u, 24, u, u, 20, 26,
        30, u, u, u, u, 23, u, 19,       29, u, 22, 18, 28, 17, 16, u
    };
#undef  u
//    x = (x & -x) * 0x0450fbaf;  // 0x0450fbaf=17*65*655;
    x = (x & -x);
    x = (x << 4) + x;       // x = x*17;
    x = (x << 6) + x;       // x = x*65;
    x = (x << 16) - x;      // x = x*65535;
    return table[x >> 26];
#endif
#if 1
    static char table[32] = {
        0, 1, 2, 24, 3, 19, 6, 25,      22, 4, 20, 10, 16, 7, 12, 26,
        31, 23, 18, 5, 21, 9, 15, 11,   30, 17, 8, 14, 29, 13, 28, 27
    };
    if(x == 0) return 32;
    x = (x & -(int)x) * 0x04d7651f;      // 0x04d7651f=2047*5*256+1;
    return table[x >> 27];
#endif
}

一番左の0値バイト(大端)を探します.
//      0   (  );
int ZByteL(unsigned int x)
{
#if 1
    if((x >> 24) == 0) return 0;
    else if((x & 0x00ff0000) == 0) return 1;
    else if((x & 0x0000ff00) == 0) return 2;
    else if((x & 0x000000ff) == 0) return 3;
    else return 4;
#endif
#if 0
    unsigned int y;
    int n;

    // Original byte: 00 80 other;
    y = (x & 0x7f7f7f) + 0x7f7f7f7f;
    y = ~(y | x | 0x7f7f7f7f);
    n = Ntz(y) >> 3;
    return n;
#endif
#if 0
    unsigned int y;
    int t1, t2, t3, t4;

    y = (x & 0x7f7f7f7f) + 0x7f7f7f7f;
    y = y | x;              // Leading 1 on nonzero bytes;
    t1 = y >> 31;           // t1 = a;
    t2 = (y >> 23) & t1;    // t2 = ab;
    t3 = (y >> 15) & t2;    // t3 = abc;
    t4 = (y >> 7) & t3;     // t4 = abcd;
    return t1 + t2 + t3 + t4;
#endif
}

最右側の0値バイト(小端)を探します.
//      0   (  );
int ZByteR(unsigned int x)
{
#if 0
    if((x >> 24) == 0) return 3;
    else if((x & 0x00ff0000) == 0) return 2;
    else if((x & 0x0000ff00) == 0) return 1;
    else if((x & 0x000000ff) == 0) return 0;
    else return 4;
#endif
#if 1
    unsigned int y;
    int t1, t2, t3, t4;

    y = (x & 0x7f7f7f7f) + 0x7f7f7f7f;
    y = y | x;              // Leading 1 on nonzero bytes;
//    PrintBitNum(y);
    t1 = (y >> 7) & 0x01;   // t1 = d;
//    PrintBitNum(t1);
    t2 = (y >> 15) & t1;    // t2 = cd;
//    PrintBitNum(t2);
    t3 = (y >> 23) & t2;    // t3 = bcd;
//    PrintBitNum(t3);
    t4 = (y >> 31) & t3;    // t4 = abcd;
//    PrintBitNum(t4);
    return t1 + t2 + t3 + t4;
#endif
}

最初の所与の長さの全1ビット列(大端)を探します.
//           1  (  );
int Ffstr1(unsigned int x, int n)
{
#if 0
    int k, p;

    p = 0;
    while(x != 0){
        k = Nlz(x);         // Skip over initial 0's;
        x = x << k;         // (if any);
        p = p + k;
        k = Nlz(~x);        // Count first/next group of 1's;
        if(k >= n)          // If enough,
            return p;       // return;
        x = x << k;         // Not enough 1's, skip over;
        p = p + k;          // them;
    }
    return 32;
#else
    int s;
    while(n > 1){
        s = n >> 1;
        x = x & (x << s);
        n = n - s;
    }
    return Nlz(x);
#endif
}

全長「全1ビット列」の長さを探します.
//     “ 1  ”   ;
int Maxstr1(unsigned int x)
{
    int k;
    for(k=0; x != 0; k++) x = x & 2*x;
    return k;
}

最も長い「全1ビット列」の長さと開始位置を探します.
//     “ 1  ”        ;
int Fmaxstr1(unsigned int x, int *apos)
{
    unsigned int y;
    int s;

    if(x == 0){*apos = 32; return 0;}
    y = x & (x << 1);
    if(y == 0){s = 1; goto L1;}
    x = y & (y << 2);
    if(x == 0){s = 2; x = y; goto L2;}
    y = x & (x << 4);
    if(y == 0){s = 4; goto L4;}
    x = y & (y << 8);
    if(x == 0){s = 8; x = y; goto L8;}
    if(x == 0xfff8000){*apos = 0; return 32;}
    s = 16;

L16:
    y = x & (x << 8);
    if(y != 0){s = s + 8; x = y;}
L8:
    y = x & (x << 4);
    if(y != 0){s = s + 4; x = y;}
L4:
    y = x & (x << 2);
    if(y != 0){s = s + 2; x = y;}
L2:
    y = x & (x << 1);
    if(y != 0){s = s + 1; x = y;}
L1:
    *apos = Nlz(x);
    return s;
}

最短の「全1ビット列」の長さと開始位置を探します.
//    “ 1  ”        ;
int Fminstr1(unsigned int x, int *apos)
{
    int k;
    unsigned int b, e;

    if(x == 0){*apos = 32; return 0;}
    b = ~(x >> 1) & x;      // 0-1 transitions;
    e = x & ~(x << 1);      // 0-1 transitions;
    for(k=1; (b&e)==0; k++) e = e << 1;
    *apos = Nlz(b & e);
    return k;
}

x値より大きく、ビット1の個数が同じ数を計算する.
//    x     1       ;
int Snoob(int x)
{
    unsigned int smallest, ripple, ones;
                                    // x = xxx0 1111 0000;
    smallest = x & -x;              //     0000 0001 0000;
    ripple = x + smallest;          //     xxx1 0000 0000;
    ones = x ^ ripple;              //     0001 1111 0000;
    ones = (ones >> 2) / smallest;  //     0000 0000 0111;
    return ripple | ones;           //     xxx1 0000 0111;
}

数値を2の既知のサブべき乗の倍数に上げる/下げる.
unsigned int Flp2(unsigned int x)
{
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
    return x - (x >> 1);
}

unsigned int Clp2(unsigned int x)
{
    x = x - 1;
    x = x | (x >> 1);
    x = x | (x >> 2);
    x = x | (x >> 4);
    x = x | (x >> 8);
    x = x | (x >> 16);
    return x + 1;
}

圧縮と展開;

unsigned int Compress(unsigned int x, unsigned int m)
{
#if 0
    unsigned int r, s, b;       // Result, shift, mask bit;

    r = 0;
    s = 0;
    do{
        b = m & 1;
        r = r | ((x & b) << s);
        s = s + b;
        x = x >> 1;
        m = m >> 1;
    }while(m != 0);
    return r;
#else
    //      ;
    unsigned int mk, mp, mv, t;
    int i;

    x = x & m;              // Clear irrelevant bits;
    mk = ~m << 1;           // We will count 0's to right;

    for(i=0; i<5; i++){
        mp = mk ^ (mk << 1);            // Parallel suffix;
        mp = mp ^ (mp << 2);
        mp = mp ^ (mp << 4);
        mp = mp ^ (mp << 8);
        mp = mp ^ (mp << 16);
        mv = mp & m;                    // Bits to move;
        m = m ^ mv | (mv >> (1 << i));  // Compress m;
        t = x & mv;
        x = x ^ t | (t >> (1 << i));    // Compress x;
        mk = mk & ~mp;
    }
    return x;
#endif
}

unsigned int CompressLeft(unsigned int x, unsigned int m)
{
    //      ;
    unsigned int mk, mp, mv, t;
    int i;

    x = x & m;              // Clear irrelevant bits;
    mk = ~m >> 1;           // We will count 0's to right;

    for(i=0; i<5; i++){
        mp = mk ^ (mk >> 1);            // Parallel suffix;
        mp = mp ^ (mp >> 2);
        mp = mp ^ (mp >> 4);
        mp = mp ^ (mp >> 8);
        mp = mp ^ (mp >> 16);
        mv = mp & m;                    // Bits to move;
        m = m ^ mv | (mv << (1 << i));  // Compress m;
        t = x & mv;
        x = x ^ t | (t << (1 << i));    // Compress x;
        mk = mk & ~mp;
    }
    return x;
}

unsigned int Expand(unsigned int x, unsigned int m)
{
    unsigned int m0, mk, mp, mv, t;
    unsigned array[5];
    int i;

    m0 = m;         // Save original mask;
    mk = ~m << 1;   // We will count 0's to right;

    for(i=0; i<5; i++){
        mp = mk ^ (mk << 1);        // Parallel suffix;
        mp = mp ^ (mp << 2);
        mp = mp ^ (mp << 4);
        mp = mp ^ (mp << 8);
        mp = mp ^ (mp << 16);
        mv = mp & m;                // Bits to move;
        array[i] = mv;
        m = (m ^ mv) | (mv >> (1 << i));    // Compress m;
        mk = mk & ~mp;
    }

    for(i=4; i>=0; i--){
        mv = array[i];
        t = x << (1 << i);
        x = (x & ~mv) | (t & mv);
    }
    return x & m0;      // Crear out extraneous bits;
}

unsigned int ExpandLeft(unsigned int x, unsigned int m)
{
    unsigned int m0, mk, mp, mv, t;
    unsigned array[5];
    int i;

    m0 = m;         // Save original mask;
    mk = ~m >> 1;   // We will count 0's to right;

    for(i=0; i<5; i++){
        mp = mk ^ (mk >> 1);        // Parallel suffix;
        mp = mp ^ (mp >> 2);
        mp = mp ^ (mp >> 4);
        mp = mp ^ (mp >> 8);
        mp = mp ^ (mp >> 16);
        mv = mp & m;                // Bits to move;
        array[i] = mv;
        m = (m ^ mv) | (mv << (1 << i));    // Compress m;
        mk = mk & ~mp;
    }

    for(i=4; i>=0; i--){
        mv = array[i];
        t = x >> (1 << i);
        x = (x & ~mv) | (t & mv);
    }
    return x & m0;      // Crear out extraneous bits;
}

分羊法(sheep and goats operation,SAG);
//    (sheep and goats operation, SAG);
#define SAG(x, m)       (CompressLeft(x, m) | Compress(x, ~m))

CRC32;

unsigned int CRC32(unsigned char *message)
{
#if 1
    //   CRC-32  ;
    int i, j;
    unsigned int byte, crc;

    i = 0;
    crc = 0xffffffff;
    while(message[i] != 0){
        byte = message[i];          // Get next byte;
        byte = RevBit(byte);        // 32-bit reversal;
        for(j=0; j<=7; j++){
            if((int)(crc ^ byte) < 0)
                crc = (crc << 1) ^ 0x04c11db7;
            else
                crc = crc << 1;
            byte = byte << 1;       // Ready next msg bit;
        }
        i = i + 1;
    }
    return RevBit(~crc);
#endif
#if 0
    //         ;
    int i, j;
    unsigned int byte, crc, mask;

    i = 0;
    crc = 0xffffffff;
    while(message[i] != 0){
        byte = message[i];
        crc = crc ^ byte;
        for(j=7; j>=0; j--){
            mask = -(crc & 1);
            crc = (crc >> 1) ^ (0xedb88320 & mask);
        }
        i = i + 1;
    }
    return ~crc;
#endif
#if 0
    //      CRC  ;
    int i, j;
    unsigned int byte, crc, mask;
    static unsigned int table[256];

    // Set up the table, if necessay;
    if(table[1] == 0){
        for(byte=0; byte<=255; byte++){
            crc = byte;
            for(j=7; j>=0; j--){
                mask = -(crc & 1);
                crc = (crc >> 1) ^ (0xedb88320 & mask);
            }
            table[byte] = crc;
        }
    }

    // Through with table setup, now calculate the CRC;
    i = 0;
    crc = 0xffffffff;
    while((byte = message[i]) != 0){
        crc = (crc >> 8) ^ table[(crc ^ byte) & 0xff];
        i = i + 1;
    }
    return ~crc;
#endif
}

反転ビット;
//     ;
// 30RISC;
unsigned int RevBit(unsigned int x)
{
    x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
    x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
    x = (x & 0x0f0f0f0f) << 4 | (x >> 4) & 0x0f0f0f0f;
    x = (x << 24) | ((x & 0xff00) << 8) | ((x >>8) & 0xff00) | (x >> 24);
    return x;
}
// shlr, 25RISC;
unsigned int KnuthRevBit(unsigned int x)
{
    unsigned int t;
    x = ROTATE_LEFT32(x, 15); // shlr(x, 15)
    t = (x ^ (x >> 10)) & 0x003f801f; x = (t | (t << 10)) ^ x;
    t = (x ^ (x >> 4)) & 0x0e038421; x = (t | (t << 4)) ^ x;
    t = (x ^ (x >> 2)) & 0x22488842; x = (t | (t << 2)) ^ x;
    return x;
}

unsigned long long KnuthRevBit(unsigned long long x)
{
    unsigned long long t;
#if 1
    // 24 RISC;
    x = (x << 32) | (x >> 32); // swap register halves.
    x = (x & 0x0001ffff0001ffffLL) << 15 | (x & 0xfffe0000fffe0000LL) >> 17; // rotate left 15.
    t = (x ^ (x >> 10)) & 0x003f801f003f801fLL;
    x = (t | (t << 10)) ^ x;
    t = (x ^ (x >> 4)) & 0x0e0384210e038421LL;
    x = (t | (t << 4)) ^ x;
    t = (x ^ (x >> 2)) & 0x2248884222488842LL;
    x = (t | (t << 2)) ^ x;
#else
    // 25 RISC;
    x = (x << 31) | (x >> 33);
    t = (x ^ (x >> 20)) & 0x00000fff800007ffLL;
    x = (t | (t << 20)) ^ x;
    t = (x ^ (x >> 8)) & 0x00f8000f80700807LL;
    x = (t | (t << 8)) ^ x;
    t = (x ^ (x >> 4)) & 0x0808708080807008LL;
    x = (t | (t << 4)) ^ x;
    t = (x ^ (x >> 2)) & 0x1111111111111111LL;
    x = (t | (t << 2)) ^ x;
#endif
    return x;
}

unsigned int RevMask(unsigned int x)
{
    static unsigned char table[256] = {
        0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
        0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
        0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
        0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
        0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
        0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
        0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
        0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
        0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
        0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
        0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
        0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
        0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
        0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
        0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
        0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
        0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
        0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
        0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
        0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
        0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
        0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
        0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
        0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
        0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
        0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
        0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
        0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
        0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
        0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
        0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
        0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
    };
    unsigned int r = 0;

    for(int i=3; i>=0; i--){
        r = (r << 8) + table[x & 0xff];
        x = x >> 8;
    }
    return r;
}

ビットマトリクスの回転;
// RISC 40 + 21 + 40
void TransPose8(unsigned char a[8], int m, int n, unsigned char b[8])
{
    unsigned long long x = 0;
    int i;

    // Load 8 bytes from the input array and pack them into x.
    for(i=0; i<=7; i++){
        x = (x << 8) | a[m*i];
    }

printf("x=0x%016llx
", x); x = x & 0xaa55aa55aa55aa55LL | (x & 0x00aa00aa00aa00aaLL) << 7 | (x >> 7) & 0x00aa00aa00aa00aaLL; x = x & 0xcccc3333cccc3333LL | (x & 0x0000cccc0000ccccLL) << 14 | (x >> 14) & 0x0000cccc0000ccccLL; x = x & 0xf0f0f0f00f0f0f0fLL | (x & 0x00000000f0f0f0f0LL) << 28 | (x >> 28) & 0x00000000f0f0f0f0LL; printf("x=0x%016llx
", x); // Store result into output array b. for(i=7; i>=0; i--){ b[n*i] = x; x = x >> 8; } } #define TransPosMatrx8(x) \ x = (x & 0xaa55aa55aa55aa55LL) | ((x & 0x00aa00aa00aa00aaLL) << 7) | ((x >> 7) & 0x00aa00aa00aa00aaLL); \ x = (x & 0xcccc3333cccc3333LL) | ((x & 0x0000cccc0000ccccLL) << 14) | ((x >> 14) & 0x0000cccc0000ccccLL); \ x = (x & 0xf0f0f0f00f0f0f0fLL) | ((x & 0x00000000f0f0f0f0LL) << 28) | ((x >> 28) & 0x00000000f0f0f0f0LL); #define SwapIntArr(a) \ { \ a[0] = BLSwap32(a[0]); \ a[1] = BLSwap32(a[1]); \ a[2] = BLSwap32(a[2]); \ a[3] = BLSwap32(a[3]); \ a[4] = BLSwap32(a[4]); \ a[5] = BLSwap32(a[5]); \ a[6] = BLSwap32(a[6]); \ a[7] = BLSwap32(a[7]); \ a[8] = BLSwap32(a[8]); \ a[9] = BLSwap32(a[9]); \ a[10] = BLSwap32(a[10]); \ a[11] = BLSwap32(a[11]); \ a[12] = BLSwap32(a[12]); \ a[13] = BLSwap32(a[13]); \ a[14] = BLSwap32(a[14]); \ a[15] = BLSwap32(a[15]); \ a[16] = BLSwap32(a[16]); \ a[17] = BLSwap32(a[17]); \ a[18] = BLSwap32(a[18]); \ a[19] = BLSwap32(a[19]); \ a[20] = BLSwap32(a[20]); \ a[21] = BLSwap32(a[21]); \ a[22] = BLSwap32(a[22]); \ a[23] = BLSwap32(a[23]); \ a[24] = BLSwap32(a[24]); \ a[25] = BLSwap32(a[25]); \ a[26] = BLSwap32(a[26]); \ a[27] = BLSwap32(a[27]); \ a[28] = BLSwap32(a[28]); \ a[29] = BLSwap32(a[29]); \ a[30] = BLSwap32(a[30]); \ a[31] = BLSwap32(a[31]); \ } // RISC 1696 void TransPose32(unsigned int a[32]) { int j, k; unsigned int m, t; SwapIntArr(a); m = 0x0000ffff; for (j = 16; j != 0; j = j >> 1, m = m ^ (m << j)) { // printf("j=%d m=%08x
", j, m); for (k = 0; k<32; k = (k + j + 1) & ~j) { // printf("swap(a%d, a%d, %d, m);
", k, k+j, j); t = (a[k] ^ (a[k + j] >> j)) & m; a[k] = a[k] ^ t; a[k + j] = a[k + j] ^ (t << j); } } SwapIntArr(a); }