/* ------------------------------------------------------------------
   Hebbian Button Pattern Learner (HBPL) — RP2040 (Arduino-Pico)
   ---------------------------------------------------------------
   • 2 buttons (A & B) → positional one-hot history window (N=6 → 12 units)
   • 2 LEDs (PWM) → belief-derived brightness; dims under uncertainty
   • Online Hebbian learning + anti-Hebbian penalty (teacher-forced)
   • Fast activity decay: history activity decays exponentially with time
   • Slow weight decay: time-based half-life (independent of tap speed)
   • Weight clipping to keep values bounded
   • Logistic belief from activation difference (stable + cheap)
   ---------------------------------------------------------------
   Target platform: RP2040, Arduino-Pico framework (PlatformIO)
   Author: fl@C@  -  https://hackaday.io/flatCat
-------------------------------------------------------------------*/

#include <Arduino.h>

/* ------------------------------------------------------------------
   Teaching notes (consistent with the paper's teaching intent)
   -------------------------------
   This firmware intentionally behaves like a "transparent teaching artifact":

   1) No train/infer modes.
      Every valid tap both evaluates the current prediction (LEDs) and updates
      learning (weights).

   2) Learn from PRE-tap context.
      The model predicts from the current history trace, then the tap provides
      the label, then the tap is shifted into the history. This preserves
      "next-symbol prediction" semantics.

   3) Ambiguity is a no-op.
      Simultaneous A+B presses (within the same debounce window) are ignored so
      we do not "teach" from ambiguous inputs.

   4) LED output is pedagogical.
      Brightness is designed to communicate confidence/uncertainty to a human,
      not to be a strict probability meter.
-------------------------------------------------------------------*/

/* ---------- Pins / Config ---------- */
static constexpr uint8_t  N_HISTORY = 6;
static constexpr uint8_t  N_INPUTS  = (uint8_t)(2 * N_HISTORY); // 2*N_HISTORY presynaptic units (positional one-hot)

static constexpr uint8_t  IN_A_PIN  = 12;   // Button A (active LOW)
static constexpr uint8_t  IN_B_PIN  = 19;   // Button B (active LOW)
static constexpr uint8_t  LED_A_PIN = 9;   // PWM LED A
static constexpr uint8_t  LED_B_PIN = 22;   // PWM LED B

static constexpr uint32_t DEBOUNCE_MS        = 25;   // tap event debounce

/* ---------- Learning knobs ---------- */
// Hebbian step sizes (in weight units per fully-active history unit, activity in Q15 [0..32767])
static constexpr int16_t ETA      = 900;  // strengthen correct neuron
static constexpr int16_t ANTI_ETA = 450;  // weaken wrong neuron (smaller than ETA)

// Weight bounds
static constexpr int16_t W_CLIP   = 12000;

// Confidence “temperature”: higher = more uncertainty (softer)
static constexpr int16_t TEMP     = 2600;

/* Time-based weight decay (half-life) */
static constexpr uint32_t W_HALF_LIFE_MS = 180000; // 3 minutes half-life (tune)

/* Decay tick cadence */
static constexpr uint32_t DECAY_TICK_MS = 20;      // 50 Hz decay updates
// If the main loop ever stalls (debug break, USB hiccup, etc.), prevent a single
// giant dt from collapsing state to zero in one step. Option 2: allow "some"
// decay, but clamp dt so it can't instantly wipe history/weights/drive.
static constexpr uint32_t MAX_DECAY_DT_MS = 1000;

/* Activity decay (fast working-memory fade) */
static constexpr uint32_t ACT_HALF_LIFE_MS = 2500; // ~seconds-scale fade (tune)

/* LED brightness scaling */
// When uncertain (pA≈pB), show equal-DIM rather than equal-bright.
// This is a small floor so the system can be visibly "unsure" without blasting both LEDs.
static constexpr uint16_t CONF_FLOOR_U16 = 5000;   // ~7.6% of max (tune)

/* Optional generative continuation (demo-only; disabled by default) */
static constexpr bool     ENABLE_GENERATION      = false;
// Adaptive idle detection:
// Enter "self-advance" only after a silence that is long relative to the user's recent tempo.
static constexpr uint32_t IDLE_MIN_MS            = 900;  // never enter idle sooner than this
static constexpr uint16_t IDLE_MULT_X10          = 22;   // idle if silence > 2.2× last inter-tap interval
static constexpr uint32_t IDLE_PAD_MS            = 200;  // extra slack for human timing variability
static constexpr uint32_t IDLE_MAX_MS            = 4000; // cap so it still eventually enters idle
static constexpr uint16_t GEN_MIN_CONF_U16       = 6000; // below this, don't advance (too uncertain)
static constexpr uint16_t GEN_MIN_DRIVE_Q15      = 1200; // below this, generation is effectively off
static constexpr uint32_t GEN_DRIVE_HALF_LIFE_MS = 5000; // fade-out of "self drive" after taps stop
static constexpr uint32_t PULSE_MS               = 70;   // visual pulse on each generated step

static_assert(TEMP > 0, "TEMP must be > 0");
static_assert(W_HALF_LIFE_MS > 0, "W_HALF_LIFE_MS must be > 0");
static_assert(ACT_HALF_LIFE_MS > 0, "ACT_HALF_LIFE_MS must be > 0");
static_assert(GEN_DRIVE_HALF_LIFE_MS > 0, "GEN_DRIVE_HALF_LIFE_MS must be > 0");

/* ---------- State ---------- */
// History activity (Q15): positional one-hot with exponential decay.
// Indexing: idx = pos*2 + sym, where sym 0=A, 1=B, pos 0=newest .. N_HISTORY-1=oldest.
static int16_t act[N_INPUTS] = {0};

// Two weight vectors (one per output neuron)
static int16_t wA[N_INPUTS] = {0};
static int16_t wB[N_INPUTS] = {0};

static uint32_t last_edge_ms  = 0;
static uint32_t last_decay_ms = 0;
static uint32_t last_act_ms   = 0;
static uint32_t last_user_tap_ms = 0;
static uint32_t last_user_dt_ms  = 450; // default "beat" until we observe taps

static uint32_t last_gen_drive_ms = 0;
static int16_t  gen_drive_q15 = 0;      // decays after user stops; powers self-advance

static uint32_t next_gen_ms = 0;

static uint32_t pulse_until_ms = 0;
static bool     pulse_led_is_A = true;

static bool last_symbol_is_A = true; // tie-breaker for pure uncertainty

static bool prev_a_pressed = false;
static bool prev_b_pressed = false;

/* ---------- Helpers ---------- */
static inline int16_t clamp_i16(int32_t v, int16_t lo, int16_t hi) {
  if (v < lo) return lo;
  if (v > hi) return hi;
  return (int16_t)v;
}

static inline uint16_t clamp_u16_from_i32(int32_t v) {
  if (v < 0) return 0;
  if (v > 65535) return 65535;
  return (uint16_t)v;
}

// Smooth, cheap sigmoid approximation mapping int16 -> [0..65535]
static inline uint16_t sigmoid_u16(int16_t x) {
  // s ≈ 0.5 + x / (2*(|x|+K))  (scaled to 0..65535)
  const int32_t K = 9000;
  int32_t ax = (x < 0) ? -x : x;
  int32_t s  = 32768 + ( (int32_t)x * 32768 ) / (ax + K);
  if (s < 0) s = 0;
  if (s > 65535) s = 65535;
  return (uint16_t)s;
}

/* ---------- Activity / History ops ---------- */
static inline void clear_activity() {
  for (uint8_t i = 0; i < N_INPUTS; i++) act[i] = 0;
}

static inline void shift_in_tap_amp(bool tap_is_A, uint16_t amp_q15) {
  // Shift older positions (pos k becomes pos k+1)
  for (int pos = (int)N_HISTORY - 1; pos >= 1; --pos) {
    const uint8_t dst = (uint8_t)(pos * 2);
    const uint8_t src = (uint8_t)((pos - 1) * 2);
    act[dst + 0] = act[src + 0];
    act[dst + 1] = act[src + 1];
  }
  // Newest position is a one-hot "event" at full activity.
  act[0] = tap_is_A ? amp_q15 : 0;
  act[1] = tap_is_A ? 0 : amp_q15;
}

static inline void shift_in_tap(bool tap_is_A) {
  shift_in_tap_amp(tap_is_A, 32767);
}

/* ---------- Model ---------- */
static inline void compute_acts(int16_t &aA, int16_t &aB) {
  // aX = Σ wX[i] * act[i]   (act is Q15, so we >>15 to keep units near weights)
  int64_t sumA = 0, sumB = 0;
  for (uint8_t i = 0; i < N_INPUTS; i++) {
    const int16_t ai = act[i];
    if (!ai) continue;
    sumA += (int64_t)wA[i] * (int64_t)ai;
    sumB += (int64_t)wB[i] * (int64_t)ai;
  }
  const int32_t aA_i32 = (int32_t)(sumA >> 15);
  const int32_t aB_i32 = (int32_t)(sumB >> 15);
  aA = clamp_i16(aA_i32, -32000, 32000);
  aB = clamp_i16(aB_i32, -32000, 32000);
}

static inline void learn_from_truth(bool truth_is_A) {
  // Teacher-forced Hebbian with anti-Hebbian competition:
  //   correct weights += ETA * activity
  //   wrong weights   -= ANTI_ETA * activity
  // where activity is Q15 [0..32767].
  for (uint8_t i = 0; i < N_INPUTS; i++) {
    const int16_t ai = act[i];
    if (!ai) continue;

    const int32_t pos_delta  = ((int32_t)ETA      * (int32_t)ai) >> 15;
    const int32_t neg_delta  = ((int32_t)ANTI_ETA * (int32_t)ai) >> 15;

    if (truth_is_A) {
      wA[i] = clamp_i16((int32_t)wA[i] + pos_delta, -W_CLIP, W_CLIP);
      wB[i] = clamp_i16((int32_t)wB[i] - neg_delta, -W_CLIP, W_CLIP);
    } else {
      wB[i] = clamp_i16((int32_t)wB[i] + pos_delta, -W_CLIP, W_CLIP);
      wA[i] = clamp_i16((int32_t)wA[i] - neg_delta, -W_CLIP, W_CLIP);
    }
  }
}

static inline void time_based_weight_decay(uint32_t now_ms) {
  // Apply exponential-ish decay based on elapsed time dt:
  // factor ≈ 1 - ln(2)*dt/half_life  (Q16), clamped.
  uint32_t dt = now_ms - last_decay_ms;
  if (dt < DECAY_TICK_MS) return;
  last_decay_ms = now_ms;
  if (dt > MAX_DECAY_DT_MS) dt = MAX_DECAY_DT_MS;

  // Q16 ln(2)
  static constexpr uint32_t LN2_Q16 = 45426; // ln(2)*65536

  int32_t dec_q16 = (int32_t)((uint64_t)LN2_Q16 * dt / W_HALF_LIFE_MS);
  int32_t factor_q16 = 65536 - dec_q16;
  if (factor_q16 < 0) factor_q16 = 0;

  for (uint8_t i = 0; i < N_INPUTS; i++) {
    wA[i] = clamp_i16(((int32_t)wA[i] * factor_q16) >> 16, -W_CLIP, W_CLIP);
    wB[i] = clamp_i16(((int32_t)wB[i] * factor_q16) >> 16, -W_CLIP, W_CLIP);
  }
}

static inline void time_based_activity_decay(uint32_t now_ms) {
  // Activity decays toward 0 continuously in time (fast forgetting / working memory).
  uint32_t dt = now_ms - last_act_ms;
  if (dt < DECAY_TICK_MS) return;
  last_act_ms = now_ms;
  if (dt > MAX_DECAY_DT_MS) dt = MAX_DECAY_DT_MS;

  static constexpr uint32_t LN2_Q16 = 45426; // ln(2)*65536
  int32_t dec_q16 = (int32_t)((uint64_t)LN2_Q16 * dt / ACT_HALF_LIFE_MS);
  int32_t factor_q16 = 65536 - dec_q16;
  if (factor_q16 < 0) factor_q16 = 0;

  for (uint8_t i = 0; i < N_INPUTS; i++) {
    act[i] = (int16_t)clamp_i16(((int32_t)act[i] * factor_q16) >> 16, 0, 32767);
  }

  // Keep representation legible: enforce "winner per position" after decay.
  for (uint8_t pos = 0; pos < N_HISTORY; pos++) {
    const uint8_t idx = (uint8_t)(pos * 2);
    const int16_t a = act[idx + 0];
    const int16_t b = act[idx + 1];
    if (a >= b) {
      act[idx + 1] = 0;
    } else {
      act[idx + 0] = 0;
    }
  }
}

static inline void update_leds_from_acts(int16_t aA, int16_t aB) {
  // LED display philosophy:
  // - We compute a smooth belief from the activation difference.
  // - Then we deliberately map belief to brightness so that uncertainty is
  //   legible: when p(A)\approx p(B), both LEDs dim (rather than both bright).
  //   This helps students see "I don't know" as a distinct state.
  //
  // Belief computation:
  //   p(B) = sigmoid((aB - aA)/T), p(A)=1-p(B)
  int32_t raw = (int32_t)(aB - aA);
  int16_t diff = (int16_t)clamp_i16((int32_t)((int64_t)raw * 32768 / TEMP), -32000, 32000);
  uint16_t pB = sigmoid_u16(diff);
  uint16_t pA = (uint16_t)(65535 - pB);

  // Overall brightness: recent activity strength × confidence strength.
  // - activity_strength goes to 0 as taps stop (activity decays)
  // - confidence goes to 0 when uncertain (pA≈0.5)
  int32_t sum_act = 0;
  for (uint8_t i = 0; i < N_INPUTS; i++) sum_act += act[i]; // Q15
  const int32_t act_norm = (int32_t)((int64_t)sum_act * 65535 / ((int32_t)N_HISTORY * 32767));
  const uint16_t activity_strength = clamp_u16_from_i32(act_norm);

  // conf_u16 = 2*|pA-0.5| in [0..65535]
  const int32_t d = (int32_t)pA - 32768;
  const int32_t ad = (d < 0) ? -d : d;
  const uint16_t conf_u16 = clamp_u16_from_i32(ad * 2);

  // confidence_strength = CONF_FLOOR + conf*(1-CONF_FLOOR)
  const uint32_t confidence_strength =
      (uint32_t)CONF_FLOOR_U16 +
      ((uint32_t)conf_u16 * (uint32_t)(65535 - CONF_FLOOR_U16)) / 65535;

  const uint32_t overall = ((uint32_t)activity_strength * confidence_strength) / 65535;

  // Signed confidence mapping (so LEDs don't feel "coupled"):
  // - when uncertain (pA≈0.5), both LEDs go near-off (no default-to-A bias)
  // - as confidence grows, one LED takes over proportionally.
  const uint16_t fracA = clamp_u16_from_i32((d > 0) ? (d * 2) : 0);  // 0..65535
  const uint16_t fracB = clamp_u16_from_i32((d < 0) ? ((-d) * 2) : 0);
  const uint16_t outA = (uint16_t)(((uint32_t)overall * (uint32_t)fracA) / 65535);
  const uint16_t outB = (uint16_t)(((uint32_t)overall * (uint32_t)fracB) / 65535);

  // Use Arduino PWM (8-bit) for broad compatibility with PlatformIO's Arduino RP2040 core.
  analogWrite(LED_A_PIN, (uint8_t)(outA >> 8));
  analogWrite(LED_B_PIN, (uint8_t)(outB >> 8));
}

static inline uint32_t clamp_u32(uint32_t v, uint32_t lo, uint32_t hi) {
  if (v < lo) return lo;
  if (v > hi) return hi;
  return v;
}

static inline uint32_t idle_threshold_ms() {
  // idle_ms ≈ IDLE_PAD + IDLE_MULT * last_user_dt, clamped
  const uint32_t scaled = (last_user_dt_ms * (uint32_t)IDLE_MULT_X10) / 10;
  const uint32_t target = scaled + IDLE_PAD_MS;
  return clamp_u32(target, IDLE_MIN_MS, IDLE_MAX_MS);
}

static inline void time_based_gen_drive_decay(uint32_t now_ms) {
  if (gen_drive_q15 <= 0) return;
  uint32_t dt = now_ms - last_gen_drive_ms;
  if (dt < DECAY_TICK_MS) return;
  last_gen_drive_ms = now_ms;
  if (dt > MAX_DECAY_DT_MS) dt = MAX_DECAY_DT_MS;

  static constexpr uint32_t LN2_Q16 = 45426; // ln(2)*65536
  int32_t dec_q16 = (int32_t)((uint64_t)LN2_Q16 * dt / GEN_DRIVE_HALF_LIFE_MS);
  int32_t factor_q16 = 65536 - dec_q16;
  if (factor_q16 < 0) factor_q16 = 0;

  gen_drive_q15 = (int16_t)clamp_i16(((int32_t)gen_drive_q15 * factor_q16) >> 16, 0, 32767);
}

static inline void trigger_pulse(bool led_is_A, uint32_t now_ms) {
  pulse_led_is_A = led_is_A;
  pulse_until_ms = now_ms + PULSE_MS;
}

static inline void render_pulse_overlay(uint32_t now_ms) {
  if (now_ms >= pulse_until_ms) return;
  // simple full-bright pulse overlay (8-bit PWM)
  analogWrite(pulse_led_is_A ? LED_A_PIN : LED_B_PIN, 255);
}

/* ---------- Arduino lifecycle ---------- */
void setup() {
  Serial.begin(115200);

  pinMode(IN_A_PIN, INPUT_PULLUP);
  pinMode(IN_B_PIN, INPUT_PULLUP);

  pinMode(LED_A_PIN, OUTPUT);
  pinMode(LED_B_PIN, OUTPUT);

  uint32_t now = millis();
  last_edge_ms = now;
  last_decay_ms = now;
  last_act_ms = now;
  last_user_tap_ms = now;
  last_gen_drive_ms = now;
  next_gen_ms = 0;

  clear_activity();

  // Power-on: silent/dark (no belief, no prediction)
  analogWrite(LED_A_PIN, 0);
  analogWrite(LED_B_PIN, 0);

  // Tiny hardware sanity test: A then B. Helps catch swapped pins / dead channel quickly.
  analogWrite(LED_A_PIN, 255);
  delay(80);
  analogWrite(LED_A_PIN, 0);
  analogWrite(LED_B_PIN, 255);
  delay(80);
  analogWrite(LED_B_PIN, 0);

  randomSeed(micros());
}

void loop() {
  const uint32_t now = millis();

  // Slow forgetting (weights) independent of tap speed
  time_based_weight_decay(now);

  // Fast forgetting (activity/trace)
  time_based_activity_decay(now);

  // Generative continuation drive decays after user stops tapping
  time_based_gen_drive_decay(now);

  // Read buttons (active LOW)
  const bool a_pressed = (digitalRead(IN_A_PIN) == LOW);
  const bool b_pressed = (digitalRead(IN_B_PIN) == LOW);

  // Fire only on press edges (taps), not while held.
  bool a_tap = a_pressed && !prev_a_pressed;
  bool b_tap = b_pressed && !prev_b_pressed;
  prev_a_pressed = a_pressed;
  prev_b_pressed = b_pressed;

  // Predict continuously (LEDs fade while idle).
  int16_t actA, actB;
  compute_acts(actA, actB);
  update_leds_from_acts(actA, actB);
  render_pulse_overlay(now);

  // If user didn't tap, optionally self-advance (demo-only).
  //
  // For classroom use (the default), we return immediately on "no input":
  // the device is purely reactive and never invents taps.
  //
  // If ENABLE_GENERATION is enabled, the code below performs an *optional*
  // continuation demo that advances only when the model is confident, and only
  // after a sufficiently long silence.
  if (!a_tap && !b_tap) {
#if 1
    if (ENABLE_GENERATION) {
      const bool idle = (now - last_user_tap_ms) > idle_threshold_ms();

      // Only generate after we've seen at least one real tap, and while drive remains.
      if (idle && gen_drive_q15 >= (int16_t)GEN_MIN_DRIVE_Q15) {
        if (next_gen_ms == 0) next_gen_ms = now; // start immediately on entering idle

        if (now >= next_gen_ms) {
          // Hebbian-honest continuation: generate from the model (no stored sequence).
          compute_acts(actA, actB);
          int32_t raw = (int32_t)(actB - actA);
          int16_t diff = (int16_t)clamp_i16((int32_t)((int64_t)raw * 32768 / TEMP), -32000, 32000);
          uint16_t pB = sigmoid_u16(diff);
          uint16_t pA = (uint16_t)(65535 - pB);
          const int32_t d = (int32_t)pA - 32768;
          const uint16_t conf_u16 = clamp_u16_from_i32((((d < 0) ? -d : d) * 2));

          if (conf_u16 >= GEN_MIN_CONF_U16) {
            // Deterministic replay: argmax; tie-break alternates.
            const bool step_is_A = (d == 0) ? !last_symbol_is_A : (d > 0);
            last_symbol_is_A = step_is_A;
            shift_in_tap_amp(step_is_A, (uint16_t)gen_drive_q15);
            trigger_pulse(step_is_A, now);
          }

          // Beat period: follow last observed tapping tempo (clamped)
          const uint32_t beat_ms = clamp_u32(last_user_dt_ms, 180, 1600);
          next_gen_ms = now + beat_ms;
        }
      } else {
        next_gen_ms = 0;
      }

      return;
    }
#endif

    // Default (teaching) behavior: no generation without input.
    return;
  }

  if (a_tap && b_tap) {
    // Invalid ambiguous input: ignore (no learning update).
    return;
  }

  // Debounce tap events
  if (now - last_edge_ms < DEBOUNCE_MS) return;
  last_edge_ms = now;

  // IMPORTANT:
  // Learn using PRE-TAP history (what it used to predict),
  // then shift in this tap, then compute prediction for NEXT tap.
  const bool truth_is_A = a_tap;
  last_symbol_is_A = truth_is_A;

  // Update observed tapping tempo
  const uint32_t dt = now - last_user_tap_ms;
  if (dt > 20 && dt < 4000) last_user_dt_ms = dt;
  last_user_tap_ms = now;

  // Reset generative continuation drive on real taps
  gen_drive_q15 = 32767;
  last_gen_drive_ms = now;
  next_gen_ms = 0;

  learn_from_truth(truth_is_A);
  shift_in_tap(truth_is_A);

  // Predict next
  compute_acts(actA, actB);
  update_leds_from_acts(actA, actB);
}