/* phaseout.c
created lun. juin  5 04:41:28 CEST 2017 by Yann Guidon (whygee@f-cpu.org)
version mar. juin  6 05:55:18 CEST 2017 : merged phase-out and bitstream
version mer. juin  7 08:35:59 CEST 2017 : stripped down
version jeu. juin  8 13:11:28 CEST 2017 : cleaner, works better
version mar. juil. 4 05:19:23 CEST 2017 : tiny optimisations

bitstream insertion and extraction with/out phase-out encoding, 64-bits version

*/

#include <stdint.h>

#ifdef BYTESWAP
#include <arpa/inet.h>
#else
#define htonl
#define ntohl
#endif

#ifndef BUFFER_LENGTH
#define BUFFER_LENGTH (1<<12)
#endif

int Reg_Offset;
uint64_t Reg_buffer;
int Stream_length, Stream_max;
uint32_t Buffer32[BUFFER_LENGTH];

////////////////////////////////////////////////////////////
//                         ENCODE                         //
////////////////////////////////////////////////////////////

void init_encode() {
  Stream_length = 0;
  Reg_Offset = 63;
  Reg_buffer = 0;
}

void flush_bitstream() {
  // at most one word to purge
  if (Reg_Offset < 63) { // could  be 64 in some edge cases ?
      Buffer32[Stream_length++] = (uint32_t)(Reg_buffer >> 31);
  }
}

int send_phaseout(unsigned int val, unsigned int lim) {
  uint32_t k=0, l=lim, mask;

#ifdef DEBUG
  if (val>lim)
    fprintf(stderr, "\n val=%d > lim=%d\n", val, lim);
  if(Reg_Offset<0)
    fprintf(stderr, "\n Reg_Offset=%d  ???\n", Reg_Offset);
#endif

  if (lim) {
    // generate mask from lim
    if (l & ~255) { k =8; l=lim>>8; }
    if (l & ~ 15) { k+=4; l>>=4; }
    if (l & ~  3) { k+=2; l>>=2; }
    if (l & ~  1) { k+=1; l>>=1; }
    mask=(1<<k)-1;

    if ( (val>>1) > (lim & mask) )
      val+=mask-lim;
    else
      k++;

    // Insertion
    Reg_Offset -= k;
    Reg_buffer |= ((uint64_t)val) << Reg_Offset;

    if (Reg_Offset < 32) { // another expression: if (~Reg_Offset & 32)
      Buffer32[Stream_length++] = htonl((uint32_t)(Reg_buffer >> 31));
      Reg_buffer <<= 32;
      Reg_Offset |= 32;
    }
    // Don't forget to check the buffer size...
  }

  return k; // return the number of bits actually emitted,
     // which might be useful for the calling code.
}

////////////////////////////////////////////////////////////
//                         DECODE                         //
////////////////////////////////////////////////////////////

//  "Preload" version:
void init_decode() {
  Reg_Offset = 32;
  Reg_buffer = ntohl(Buffer32[0]);
  Stream_length = 1;
}

unsigned int receive_phaseout(unsigned int lim) {
  uint32_t
    val=0,   // default value if direct return
    val1,    // val >> 1
    k,       // number of bits
    l=lim,   // temporary limit
    mask,
    mask1;   // mask >> 1

  if (lim) {
    // generate mask from lim
    k=1;
    if (l & ~255) { k =9; l=lim>>8; }
    if (l & ~ 15) { k+=4; l>>=4; }
    if (l & ~  3) { k+=2; l>>=2; }
    if (l & ~  1) { k+=1; l>>=1; }
    Reg_Offset -= k;
    mask=(1<<k)-1;
    mask1 = mask>>1;
    val = (Reg_buffer >> Reg_Offset) & mask;
    val1 = val>>1;

    if (Reg_Offset < 16) { // if ((Reg_Offset & ~15)==0) {
      // 16 is the max number width,
      Reg_Offset |= 32;  // can be a simple OR because it's modulo 64
      Reg_buffer  = (Reg_buffer << 32)
                  | ntohl(Buffer32[Stream_length++]); // No boundary check !
    }
    // adjust (phase-out)
    if (val1 > (lim & mask1)) {
      val = (val1+lim)-mask1;
      Reg_Offset++;
    }
  }

  return val;
}

#ifdef BYTESWAP
#undef htonl
#undef ntohl
#endif
