data {
  int<lower=1> N;
  int<lower=1> T;
  int<lower=1, upper=T> Tsubj[N];
  real outcome[N, T];
  int<lower=0, upper=1> pressed[N, T];
  int<lower=1, upper=4> cue[N, T];
}
transformed data {
  vector[4] initV;
  initV  <- rep_vector(0.0, 4);
}
parameters {
  # declare as vectors for vectorizing
  vector[3] mu_p;  
  vector<lower=0>[3] sigma; 
  vector[N] xi_p; # noise 
  vector[N] ep_p; # learning rate 
  vector[N] rho_p; # rho, inv temp 
}
transformed parameters{
  vector<lower=0,upper=1>[N] xi;
  vector<lower=0,upper=1>[N] ep;
  vector<lower=0>[N] rho;
     
  for (i in 1:N) {
    xi[i]  <- Phi_approx( mu_p[1] + sigma[1] * xi_p[i] );
    ep[i]  <- Phi_approx( mu_p[2] + sigma[2] * ep_p[i] );
  }
  rho <- exp( mu_p[3] + sigma[3] * rho_p );
}
model {  
  # gng_m1: RW + noise model in Guitart-Masip et al 2012
  # hyper parameters
  mu_p  ~ normal(0, 1.0); 
  sigma ~ cauchy(0,5);
  
  # individual parameters w/ Matt trick
  xi_p  ~ normal(0, 1.0);   
  ep_p  ~ normal(0, 1.0);   
  rho_p ~ normal(0, 1.0);

  for (i in 1:N) {
    vector[4] wv_g;  # action wegith for go
    vector[4] wv_ng; # action wegith for nogo
    vector[4] qv_g;  # Q value for go
    vector[4] qv_ng; # Q value for nogo
    vector[4] pGo;   # prob of go (press) 

    wv_g  <- initV;
    wv_ng <- initV;
    qv_g  <- initV;
    qv_ng <- initV;
  
    for (t in 1:Tsubj[i])  {
      wv_g[ cue[i,t] ]  <- qv_g[ cue[i,t] ];
      wv_ng[ cue[i,t] ] <- qv_ng[ cue[i,t] ];  # qv_ng is always equal to wv_ng (regardless of action)      
      pGo[ cue[i,t] ]   <- inv_logit( wv_g[ cue[i,t] ] - wv_ng[ cue[i,t] ] ); 
      pGo[ cue[i,t] ]   <- pGo[ cue[i,t] ] * (1 - xi[i]) + xi[i]/2;  # noise
      pressed[i,t] ~ bernoulli( pGo[ cue[i,t] ] );
      
      # update action values
      if (pressed[i,t]) { # update go value 
        qv_g[ cue[i,t] ]  <- qv_g[ cue[i,t] ] + ep[i] * (rho[i] * outcome[i,t] - qv_g[ cue[i,t] ]);
      } else { # update no-go value  
        qv_ng[ cue[i,t] ] <- qv_ng[ cue[i,t] ] + ep[i] * (rho[i] * outcome[i,t] - qv_ng[ cue[i,t] ]);  
      }  
    } # end of t loop
  } # end of i loop
}
generated quantities {
  real<lower=0, upper=1> mu_xi;
  real<lower=0, upper=1> mu_ep;
  real<lower=0> mu_rho;
  real log_lik[N];
  
  mu_xi  <- Phi_approx(mu_p[1]);
  mu_ep  <- Phi_approx(mu_p[2]);
  mu_rho <- exp(mu_p[3]); 
  
  { # local section, this saves time and space
    for (i in 1:N) {
      vector[4] wv_g;  # action wegith for go
      vector[4] wv_ng; # action wegith for nogo
      vector[4] qv_g;  # Q value for go
      vector[4] qv_ng; # Q value for nogo
      vector[4] pGo;   # prob of go (press) 
  
      wv_g  <- initV;
      wv_ng <- initV;
      qv_g  <- initV;
      qv_ng <- initV;
    
      log_lik[i] <- 0;

      for (t in 1:T)  {
        wv_g[ cue[i,t] ]  <- qv_g[ cue[i,t] ];
        wv_ng[ cue[i,t] ] <- qv_ng[ cue[i,t] ];  # qv_ng is always equal to wv_ng (regardless of action)      
        pGo[ cue[i,t] ]   <- inv_logit( wv_g[ cue[i,t] ] - wv_ng[ cue[i,t] ] ); 
        pGo[ cue[i,t] ]   <- pGo[ cue[i,t] ] * (1 - xi[i]) + xi[i]/2;  # noise
        log_lik[i] <- log_lik[i] + bernoulli_log( pressed[i,t], pGo[ cue[i,t] ] );
        
        # update action values
        if (pressed[i,t]) { # update go value 
          qv_g[ cue[i,t] ]  <- qv_g[ cue[i,t] ] + ep[i] * (rho[i] * outcome[i,t] - qv_g[ cue[i,t] ]);
        } else { # update no-go value  
          qv_ng[ cue[i,t] ] <- qv_ng[ cue[i,t] ] + ep[i] * (rho[i] * outcome[i,t] - qv_ng[ cue[i,t] ]);  
        }  
      } # end of t loop
    } # end of i loop
  } # end of local section
}
