#' Simulate multiple trajectories from an interval-censored multi-state model
#' with Weibull transition intensities
#' 
#' @description Simulate multiple trajectories from a multi-state model quantified
#' by a transition matrix, with interval-censored transitions and Weibull 
#' distributed transition intensities. Allows for Weibull censoring in each of 
#' the states.
#' 
#' 
#' @param data A \code{data.frame} or \code{matrix} with named columns \code{time} 
#' and \code{id}, representing the observation times and corresponding subject id(entifier).
#' @param tmat A transition matrix as created by \code{\link[mstate:transMat]{transMat}}, 
#' with H rows and H columns indicating the states. The total number of possible 
#' transitions will be indicated by M.
#' @param startprobs A numeric vector of length H indicating the probability of 
#' each subject to start in any of the possible states. Must sum to 1. By default,
#' all subjects will start in state 1.
#' @param exact A numeric vector indicating which states are exactly observed. 
#' The transition time to exact states will be observed at exact times, regardless 
#' of the times in \code{obstimes}. No exact states if missing.
#' @param shape A numeric vector of length M indicating the shape of the Weibull 
#' transition intensity for the corresponding transition in \code{tmat}. See 
#' \code{help(dweibull)}.
#' @param scale A numeric vector of length M indicating the scale of the Weibull 
#' transition intensity for the corresponding transition in \code{tmat}. See 
#' \code{help(dweibull)}.
#' @param censshape A numeric vector of length H indicating the Weibull 
#' censoring shape in each of the states. If no censoring is required in some states, 
#' set corresponding entries to \code{NA}. If left missing, 
#' no censoring is applied. See details.
#' @param censscale A numeric vector of length H indicating the Weibull censoring 
#' scale in each of the states. If no censoring is required in some states, 
#' set corresponding entries to \code{NA}. If left missing, no censoring is applied.
#' See details.
#' @param n_subj (Optional) Instead of specifying \code{data}, specify the number 
#' of subjects to generate trajectories for. Requires \code{obs_pars}  to
#' also be specified.
#' @param obs_pars (Optional) A numeric vector of length 3 specifying what the 
#' time is between planned assessments, what the uniform deviation from this 
#' time is at the visits and the maximum visit time. 
#' Specifying \code{obs_pars = c(2, 0.5, 20)} will generate 
#' a grid of observation times (0, 2, 4, ..., 20) with a uniform[-0.5, 0.5] random 
#' variable added to each observation time, and cut-off at the end-points 0 and 20.
#' The observation times may not overlap, 
#' so the first argument must be at least twice as large as the second.
#' @param true_trajec Should the true (right-censored) trajectory be returned for
#' the subjects as well? Default = \code{FALSE}.
#' 
#' 
#' @importFrom igraph is_dag
#' @importFrom mstate to.trans2 msfit probtrans
#' 
#' 
#' @details  
#' Taking \code{(cens)shape} to be 1 for all transitions, we obtain exponential 
#' (censoring)/transitions with rate 1/\code{(cens)scale}.
#' 
#' If right-censoring parameters are specified, a right-censoring time is generated in 
#' each of the visited states. If the subject is right-censored, we assume the subject 
#' is no longer observed at later \code{obstimes}. Due to the interval-censored 
#' nature of the generation process, it may therefore appear as if the subject 
#' was right-censored in an earlier state.
#' 
#' Suppose a subject arrives in state g at time s. If we wish to generate 
#' a survival time from that state according to a Weibull intensity in a clock forward 
#' model, we can use the inverse transform of the conditional Weibull intensity.
#' More specifically, letting \eqn{a}{a} denote the shape and \eqn{\sigma}{\sigma} denote the scale, 
#' the conditional survival function for \eqn{t > s}{t > s} is given by
#' \deqn{S(t|s) = \mathbf{P}(T \geq t | T \geq s) = \exp(\left( \frac{s}{\sigma} \right)^a - \left( \frac{t}{\sigma} \right)^a)}{S(t|s) = P(T >= t | T >= s) = exp((s/\sigma)^a - (t/\sigma)^a)}
#' The corresponding cumulative intensity is then given by:
#' \deqn{A(t|s) = -\log(S(t|s)) = \left( \frac{t}{\sigma} \right)^a - \left( \frac{s}{\sigma} \right)^a}{A(t|s) = - log(S(t|s)) = (t/\sigma)^a - (s/\sigma)^a}
#' And the inverse cumulative intensity is then:
#' \deqn{A^{-1}(t|s) = \sigma \sqrt[a]{t + \left( \frac{s}{\sigma} \right)^a}}{A^(-1)(t|s) = \sigma (t + (s/\sigma)^a)^(1/a)}
#' A conditional survival time is then generated by:
#' \deqn{T|s = A^{-1}(-\log(U)|s)}{T|s = A^(-1)(-log(U)|s)}
#' with \eqn{U}{U} a sample from the standard uniform distribution.
#' If we additionally have covariates (or frailties), the \eqn{-\log(U)}{-log(U)}
#' above should be replaced by \eqn{\frac{-\log(U)}{\exp(\beta X)}}{(-log(U))/(exp(beta X))}
#' with \eqn{\beta}{beta} and \eqn{X}{X} the coefficients and covariates respectively.
#' 
#' @returns A \code{matrix} with 3 columns \code{time}, \code{state} and \code{id}, indicating 
#' the observation time, the corresponding state and subject identifier. If 
#' \code{true_trajec = TRUE}, a \code{list} with the matrix described above and a matrix 
#' representing the underlying right-censored trajectory.
#' 
#' @export
#' 
#' @examples 
#' require(mstate)
#' require(ggplot2)
#' #Generate from an illness-death model with exponential transitions with 
#' #rates 1/2, 1/10 and 1 for 10 subjects over a time grid.
#' gd <- sim_weibmsm(tmat = trans.illdeath(), shape = c(1,1,1),
#'                   scale = c(2, 10, 1), n_subj = 10, obs_pars = c(2, 0.5, 20), 
#'                   startprobs = c(0.9, 0.1, 0), true_trajec = TRUE)
#' 
#' #Observed trajectories
#' visualise_msm(gd$observed)
#' #True trajectories
#' visualise_msm(gd$true)
#' 
#' 
#' #Can supply data-frame with specified observation times
#' obs_df <- data.frame(time = c(0, 1, 3, 5, 0.5, 6, 9),
#'                      id = c(1, 1, 1, 1, 2, 2, 2))
#' gd <- sim_weibmsm(data = obs_df, tmat = trans.illdeath(), shape = c(1, 1, 1),
#'                   scale = c(2, 10, 1))
#' visualise_msm(gd)







sim_weibmsm <- function(data, tmat, startprobs, exact, shape, scale, 
                             censshape, censscale, n_subj, obs_pars, true_trajec = FALSE){
  
  # Alternative way of specifying data -------------
  
  if(!missing(n_subj)){
    if(missing(obs_pars)){
      stop("Please also specify 'obs_pars'.")
    }
    checkNumeric(n_subj, lower = 1, upper = Inf, len = 1)
    checkNumeric(obs_pars, len = 3, finite = TRUE, any.missing = FALSE)
    assert(obs_pars[1] >= 2*obs_pars[2])
    assert(is.finite(obs_pars[3]))
    n_visits <- floor(obs_pars[3]/obs_pars[2])+1
    visit_times <- pmin(obs_pars[3], pmax(0, replicate(n_subj, seq(0, obs_pars[3], obs_pars[2]) + runif(n_visits, -0.5, 0.5))))
    data <- data.frame(time = visit_times, id = rep(1:n_subj, each = n_visits))
  }
  
  
  # Argument Checks ---------------------------------------------------------
  
  tmat2 <- mstate::to.trans2(tmat)
  
  arg_checks <- makeAssertCollection()
  
  assertMatrix(tmat, min.rows = 2, min.cols = 2, all.missing = FALSE, add = arg_checks)
  assert(nrow(tmat) == ncol(tmat), add = arg_checks)
  
  H <- nrow(tmat)
  M <- nrow(tmat2)
  
  assertMultiClass(data, c("matrix", "data.frame"), add = arg_checks)
  assertSubset(colnames(data), c("time", "id"))
  
  if(!missing(startprobs)){
    assertNumeric(startprobs, lower = 0, upper = 1, any.missing = FALSE,
                  len = H, add = arg_checks)  
  } else{
    startprobs <- c(1, rep(0, H-1))  
  }
  
  assert(sum(startprobs) == 1, add = arg_checks)
  
  if(!missing(exact)){
    assertNumeric(exact, any.missing = FALSE, lower = 1, upper = H, min.len = 1,
                  max.len = H, unique = TRUE, add = arg_checks)
  }
  
  assertNumeric(shape, lower = 0, upper = Inf, any.missing = FALSE, len = M, add = arg_checks)
  assertNumeric(scale, lower = 0, upper = Inf, any.missing = FALSE, len = M, add = arg_checks)
  
  cens_idx <- FALSE
  if(!missing(censshape) & !missing(censscale)){
    cens_idx <- TRUE
  }
  
  if(!missing(censshape)){
    assertNumeric(censshape, lower = 0, upper = Inf, any.missing = TRUE, len = H, add = arg_checks)
  }
  if(!missing(censscale)){
    assertNumeric(censscale, lower = 0, upper = Inf, any.missing = TRUE, len = H, add = arg_checks)
  }
  
  if((missing(censshape) & !missing(censscale)) | (!missing(censshape) & missing(censscale)) ){
    stop("Please define both censshape and censscale.")
  }
  
  if (!arg_checks$isEmpty()) checkmate::reportAssertions(arg_checks)
  
  
  
  
  
  # Check whether tmat contains loops ------------------
  
  #We can check whether the MSM contains loops by checking whether the
  #graph corresponding to the adjacency matrix is acyclic (is a Directed Acyclic Graph (DAG)).
  contains_loops <- FALSE
  adjacency_matrix <- tmat
  adjacency_matrix[!is.na(tmat)] <- 1
  adjacency_graph <- graph_from_adjacency_matrix(adjacency_matrix)
  if(!is_dag(adjacency_graph)){
    contains_loops <- TRUE
    warning("Multi-state model contains cycles! Data will simulated, but cannot be analysed by `npmsm()`.")
  }
 
  
  # Post processing of arguments ---------------------------------------------------------
  if(missing(n_subj)){
    n_subj <- length(unique(data[, "id"]))  
  }
  data <- data[order(data[, "id"], data[, "time"]), ]
  
  #Determine absorbing states
  absorbing_states <- which(apply(is.na(tmat), 1, all))
  
  # Main Function ----------------------
  
  #We want to apply sim_weibmsm multiple times and just bind the results together.
  
  #First we generate the starting states for each subject
  start_states <- sample(1:H, size = n_subj, replace = TRUE, prob = startprobs)
  subj_ids <- unique(data[, "id"])
  
  #If we return the true trajectory
  if(true_trajec){
    true_list <- list("time" = NULL, "state" = NULL, "id" = NULL)
  }
  
  #Apply sim1_weibmsm multiple times
  res_list <- list("time" = NULL, "state" = NULL, "id" = NULL)
  subj_counter <- 1
  for(i in subj_ids){
    obstimes_subj <- data[data[, "id"] == i, "time"]
    subj_trajectory <- sim1_weibmsm(obstimes = obstimes_subj, tmat = tmat, tmat2 = tmat2,
                                    startstate = start_states[subj_counter],
                                    exact = exact, shape = shape, scale = scale,
                                    censshape = censshape, censscale = censscale, 
                                    true_trajec = TRUE)
    res_list[["time"]] <- c(res_list[["time"]], subj_trajectory[["observed_trajectory"]][, "time"])
    res_list[["state"]] <- c(res_list[["state"]], subj_trajectory[["observed_trajectory"]][, "state"])
    res_list[["id"]] <- c(res_list[["id"]], rep(i, nrow(subj_trajectory[["observed_trajectory"]])))
    
    if(true_trajec){
      true_list[["time"]] <- c(true_list[["time"]], subj_trajectory[["true_trajectory"]][, "time"])
      true_list[["state"]] <- c(true_list[["state"]], subj_trajectory[["true_trajectory"]][, "state"])
      true_list[["id"]] <- c(true_list[["id"]], rep(i, nrow(subj_trajectory[["true_trajectory"]])))
    }
    
    subj_counter <- subj_counter + 1
  }
  
  #Remove redundant observations
  observed <- remove_redundant_observations(gd = as.data.frame(res_list), tmat = tmat)
  if(true_trajec){
    out <- list(observed = observed,
                true = as.data.frame(true_list))
  } else{
    out <- observed
  }
  out
}



























