#' Create a quality control report (in PDF format).
#'
#' This is the main function of the package and the only thing you need to call directly if you are 
#' just interested in getting a QC report.
#' 
#' You need to provide the folder name of the 'txt' output, as generated by MaxQuant and 
#' optionally a YAML configuration object, which allows to (de)activate certain plots and holds other parameters.
#' The yaml_obj is complex and best obtained by running this function once using the default (empty list).
#' A full YAML configuration object will be written in the 'txt' folder you provide and can be loaded using
#' \code{\link[yaml]{yaml.load}}.
#' 
#' The PDF and the config file will be stored in the given txt folder.
#' 
#' @note You need write access to the txt folder!
#' 
#' For updates, bug fixes and feedback please visit \url{http://github.com/cbielow/PTXQC}.
#'
#' @param txt_folder Path to txt output folder of MaxQuant (e.g. "c:/data/Hek293/txt")
#' @param yaml_obj   A nested list object with configuration parameters for the report.
#'                   Useful to switch off certain plots or skip entire sections.
#' @return List with named filename strings, e.g. $yaml_file, $report_file etc..
#'          
#' @importFrom plyr ddply dlply ldply llply adply summarise mapvalues
#' @importFrom reshape2 melt
#' @importFrom rmarkdown render
#' @importFrom grDevices dev.off pdf
#' 
#' @export
#'
createReport = function(txt_folder, yaml_obj = list())
{
  DEBUG_PTXQC = FALSE
  ###
  ###  prepare the YAML config
  ###
  if (class(yaml_obj) != "list")
  {
    stop(paste0("Argument 'yaml_obj' is not of type list\n"));
  }
  yc = YAMLClass$new(yaml_obj)
  
  ## determines if a local mqpar.xml should be used to grep all YAML parameters whose name starts with "MQpar_" from the
  ## original mqpar.xml instead of the yaml.config. The "MQpar_..." param from the config
  ## will be ignored and the newly written yaml.config will contain the values from mqpar.xml.
  param_name_PTXQC_UseLocalMQPar = "PTXQC$UseLocalMQPar"
  param_def_PTXQC_UseLocalMQPar = TRUE
  param_useMQPAR = yc$getYAML(param_name_PTXQC_UseLocalMQPar, param_def_PTXQC_UseLocalMQPar)
  
  time_start = Sys.time()
  
  
  if (!any(file.info(txt_folder)$isdir, na.rm = TRUE))
  {
    stop(paste0("Argument txt_folder with value '", txt_folder, "' is not a valid directory\n"));
  }
  txt_files = list()
  txt_files$param = "parameters.txt"
  txt_files$summary = "summary.txt"
  txt_files$groups = "proteinGroups.txt"
  txt_files$evd = "evidence.txt"
  txt_files$msms = "msms.txt"
  txt_files$msmsScan = "msmsScans.txt"
  txt_files$mqpar = "mqpar.xml"
  txt_files = lapply(txt_files, function(x) paste(txt_folder, x, sep=.Platform$file.sep))
  
  ## create names of output files (report PDF, YAML, stats, etc...)
  fh_out = getReportFilenames(txt_folder)
  use_extended_report_filename = yc$getYAML("PTXQC$ReportFilename$extended", TRUE)
  fh_out$report_file = ifelse(use_extended_report_filename, fh_out$report_file_extended, fh_out$report_file_simple)
  
  unlink(fh_out$stats_file)
  cat("Statistics summary:", file=fh_out$stats_file, append = FALSE, sep="\n")
  
  ## prepare for readMQ()
  mq = MQDataReader$new()
  
  ## read manual filename shortening & sorting (if available)
  mq$readMappingFile(fh_out$filename_sorting)

  ####
  ####  prepare the metrics
  ####
  if (!DEBUG_PTXQC) {
    lst_qcMetrics_str = ls(name = getNamespace("PTXQC"), pattern="qcMetric_") 
  } else {
    lst_qcMetrics_str = ls(sys.frame(which = 0), pattern="qcMetric_") ## executed outside of package, i.e. not loaded...
  }
  if (length(lst_qcMetrics_str) == 0) stop("computeReport(): No metrics found! Very weird!")
  lst_qcMetrics = sapply(lst_qcMetrics_str, function(m) {
    q = get(m)
    if (class(q) %in% "refObjectGenerator"){
      return(q$new())
    }
    return(NULL)
  })
  df.meta = getMetaData(lst_qcMetrics = lst_qcMetrics)
  df.meta
  ## reorder metrics (required for indexing below!)
  lst_qcMetrics_ord = lst_qcMetrics[df.meta$.id]
  
  ## write/update order from YAML
  i = 1
  for (i in 1:nrow(df.meta))
  {
    cat(paste("meta id: ", df.meta$.id[i], "\n"))
    pname = paste0("order$", df.meta$.id[i])
    pval = df.meta$order[i]
    param = yc$getYAML(pname, pval)
    ## update
    if (is.numeric(param)) {
      lst_qcMetrics_ord[[i]]$orderNr = param  # for some reason, lst_qcMetrics[[df.meta$.id]] does not work
    } else {
      stop("YAML param '", pname, "' is not numeric (", param, "). Please fix the YAML configuration!")
    }
  }
  ## re-read meta (new ordering?)
  df.meta = getMetaData(lst_qcMetrics = lst_qcMetrics)
  ## reorder metrics (again; after param update)
  lst_qcMetrics_ord = lst_qcMetrics[df.meta$.id]
  
  
  ######
  ######  parameters.txt ...
  ######
  
  enabled_parameters = yc$getYAML("File$Parameters$enabled", TRUE) & file.exists(txt_files$param)
  if (enabled_parameters)
  {
    d_parAll = mq$readMQ(txt_files$param, type="par")
    lst_qcMetrics[["qcMetric_PAR"]]$setData(d_parAll)
  }
  
  add_fs_col = yc$getYAML("PTXQC$NameLengthMax_num", 10)
  
  ######
  ######  summary.txt ...
  ######
  
  enabled_summary = yc$getYAML("File$Summary$enabled", TRUE) & file.exists(txt_files$summary)
  if (enabled_summary)
  {
    d_smy = mq$readMQ(txt_files$summary, type="sm", add_fs_col = add_fs_col)
    #colnames(d_smy)
    #colnames(d_smy[[1]])
    
    id_rate_bad = yc$getYAML("File$Summary$IDRate$Thresh_bad_num", 20)
    id_rate_great = yc$getYAML("File$Summary$IDRate$Thresh_great_num", 35)
    
    ### MS/MS identified [%]
    lst_qcMetrics[["qcMetric_SM_MSMSIdRate"]]$setData(d_smy$raw, id_rate_bad, id_rate_great)
  }  
  
  
  ######
  ######  proteinGroups.txt ...
  ######
  
  GL_name_min_length = 8
  enabled_pg_ratioLabIncThresh = yc$getYAML("File$ProteinGroups$RatioPlot$LabelIncThresh_num", 4)

  enabled_proteingroups = yc$getYAML("File$ProteinGroups$enabled", TRUE) & file.exists(txt_files$groups)
  if (enabled_proteingroups)
  {
    
    d_pg = mq$readMQ(txt_files$groups, type="pg", col_subset=NA, filter="R")
      
    param_name_PG_intThresh = "File$ProteinGroups$IntensityThreshLog2_num"
    param_def_PG_intThresh = 25 ## default median intensity in log2 scale
    param_PG_intThresh = yc$getYAML(param_name_PG_intThresh, param_def_PG_intThresh)
    if (!is.numeric(param_PG_intThresh) || !(param_PG_intThresh %in% 1:100))
    { ## reset if value is weird
      cat("YAML value for '" %+% param_name_PG_intThresh %+% "' is invalid ('" %+% param_PG_intThresh %+% "'). Using default of " %+% param_def_PG_intThresh %+% ".")
      param_PG_intThresh = param_def_PG_intThresh
    }
    
    ##
    ## Raw/LFQ/Reporter intensity boxplots
    ##
    clusterCols = list()
    
    colsSIL = grepv("^intensity\\.[hlm](\\.|$)", colnames(d_pg))
    colsLF = grepv("^intensity\\..", colnames(d_pg))
    colsOneCond = "intensity" ## just one group -- we still want to know what the overall intensity is
    if (length(colsSIL)) {
      ## ignore intensity.l and alike if real groups are present
      plain_channel = grepv("^intensity\\.[hlm]$", colnames(d_pg))
      if (all(plain_channel == colsSIL)) colsW = colsSIL else colsW = setdiff(colsSIL, plain_channel)
    } else if (length(colsLF)) {
      colsW = colsLF
    }  else {
      colsW = colsOneCond
    }
    
    ## a global PG name mapping
    MAP_pg_groups = data.frame(long = colsW)
    MAP_pg_groups$short = shortenStrings(simplifyNames(delLCP(MAP_pg_groups$long, 
                                                              min_out_length = GL_name_min_length, 
                                                              add_dots = TRUE), 
                                                       min_out_length = GL_name_min_length))
    ##
    ## Contaminants plots on Raw intensity
    ##
    lst_qcMetrics[["qcMetric_PG_Cont"]]$setData(d_pg, colsW, MAP_pg_groups)


    ###
    ### Raw intensity boxplot
    ###
    
    clusterCols$raw.intensity = colsW ## cluster using intensity
    
    lst_qcMetrics[["qcMetric_PG_RawInt"]]$setData(d_pg, colsW, MAP_pg_groups, param_PG_intThresh)

    ##
    ## LFQ boxplots
    ##
    colsSIL = grepv("^lfq.intensity\\.[hlm](\\.|$)", colnames(d_pg))
    colsLF = grepv("^lfq.intensity\\..", colnames(d_pg))
    
    ## a global PG name mapping
    MAP_pg_groups_LFQ = NA
    if (length(c(colsSIL, colsLF)) > 0)
    {
      if (length(colsSIL)) {
        ## unlike intensity.l, there is no lfq.intensity.l which we could remove
        colsW = colsSIL
      } else colsW = colsLF
      MAP_pg_groups_LFQ = data.frame(long = colsW)
      MAP_pg_groups_LFQ$short = shortenStrings(simplifyNames(delLCP(MAP_pg_groups_LFQ$long, 
                                                                    min_out_length = GL_name_min_length, 
                                                                    add_dots = TRUE), 
                                                             min_out_length = GL_name_min_length))

      clusterCols$lfq.intensity = colsW ## cluster using LFQ
      
      lst_qcMetrics[["qcMetric_PG_LFQInt"]]$setData(d_pg, colsW, MAP_pg_groups_LFQ, param_PG_intThresh)
    }
    
    ##
    ## iTRAQ/TMT, reporter ion intensity boxplot
    ##
    ## either "reporter.intensity.0.groupname" or "reporter.intensity.0" (no groups)    
    colsITRAQ = grepv("^reporter.intensity.[0-9].|^reporter.intensity.[0-9]$", colnames(d_pg))
    ## a global PG name mapping
    MAP_pg_groups_ITRAQ = NA
    if (length(colsITRAQ) > 0)
    {
      MAP_pg_groups_ITRAQ = data.frame(long = c(colsITRAQ))
      MAP_pg_groups_ITRAQ$short = shortenStrings(simplifyNames(delLCP(MAP_pg_groups_ITRAQ$long, 
                                                                      min_out_length = GL_name_min_length, 
                                                                      add_dots = TRUE), 
                                                               min_out_length = GL_name_min_length))

      clusterCols$reporter.intensity = colsITRAQ ## cluster using reporters
      
      lst_qcMetrics[["qcMetric_PG_ITRAQInt"]]$setData(d_pg, colsITRAQ, MAP_pg_groups_ITRAQ, param_PG_intThresh)
    }
    
    
    ##
    ## PCA
    ##
    ## some clustering (its based on intensity / lfq.intensity columns..)
    ## todo: maybe add ratios -- requires loading from txt though..
    MAP_pg_groups_ALL = rbind(MAP_pg_groups, MAP_pg_groups_LFQ, MAP_pg_groups_ITRAQ)
    
    lst_qcMetrics[["qcMetric_PG_PCA"]]$setData(d_pg, clusterCols, MAP_pg_groups_ALL)

    
    ##################################
    ## ratio plots
    ##################################
    ## get ratio column
    ratio_cols = grepv("^ratio\\.[hm]\\.l", colnames(d_pg))  ## e.g. "ratio.m.l.ARK5exp" or "ratio.m.l.variability.ARK5exp"
    ## remove everything else
    ## e.g. we do not want ratio.h.l.variability.ARK5exp, i.e. the 'variability' property
    ratio_cols = grepv("^ratio.[hm].l.normalized", ratio_cols, invert = TRUE)
    ratio_cols = grepv("^ratio.[hm].l.count", ratio_cols, invert = TRUE)
    ratio_cols = grepv("^ratio.[hm].l.variability", ratio_cols, invert = TRUE)
    ratio_cols = grepv("^ratio.[hm].l.significance.a", ratio_cols, invert = TRUE) ## from MQ 1.0.1x
    ratio_cols = grepv("^ratio.[hm].l.significance.b", ratio_cols, invert = TRUE)
    ratio_cols = grepv("^ratio.[hm].l.iso.count", ratio_cols, invert = TRUE) ## from MQ 1.5.1.2
    ratio_cols = grepv("^ratio.[hm].l.type", ratio_cols, invert = TRUE)
    ratio_cols
    
    if (length(ratio_cols) > 0)
    {
      lst_qcMetrics[["qcMetric_PG_Ratio"]]$setData(df_pg = d_pg, ratio_cols = ratio_cols, thresh_LabelIncorp = enabled_pg_ratioLabIncThresh, GL_name_min_length = GL_name_min_length)
    }
  }
  
  ######
  ######  evidence.txt ...
  ######
  
  enabled_evidence = yc$getYAML("File$Evidence$enabled", TRUE) & file.exists(txt_files$evd)
  
  ## get scoring threshold (upper limit)
  param_name_EV_protThresh = "File$Evidence$ProteinCountThresh_num"
  param_def_EV_protThresh = 3500
  param_EV_protThresh = yc$getYAML(param_name_EV_protThresh, param_def_EV_protThresh)
  if (!is.numeric(param_EV_protThresh) || !(param_EV_protThresh %in% 1:1e5))
  { ## reset if value is weird
    cat("YAML value for '" %+% param_name_EV_protThresh %+% "' is invalid ('" %+% param_EV_protThresh %+% "'). Using default of " %+% param_def_EV_protThresh %+% ".")
    param_EV_protThresh = param_def_EV_protThresh
  }
  
  param_name_EV_intThresh = "File$Evidence$IntensityThreshLog2_num"
  param_def_EV_intThresh = 23 ## default median intensity in log2 scale
  param_EV_intThresh = yc$getYAML(param_name_EV_intThresh, param_def_EV_intThresh)
  if (!is.numeric(param_EV_intThresh) || !(param_EV_intThresh %in% 1:100))
  { ## reset if value is weird
    cat("YAML value for '" %+% param_name_EV_intThresh %+% "' is invalid ('" %+% param_EV_intThresh %+% "'). Using default of " %+% param_def_EV_intThresh %+% ".")
    param_EV_intThresh = param_def_EV_intThresh
  }
  
  ## get scoring threshold (upper limit)
  param_name_EV_pepThresh = "File$Evidence$PeptideCountThresh_num"
  param_def_EV_pepThresh = 15000
  param_EV_pepThresh = yc$getYAML(param_name_EV_pepThresh, param_def_EV_pepThresh)
  if (!is.numeric(param_EV_pepThresh) || !(param_EV_pepThresh %in% 1:1e6))
  { ## reset if value is weird
    cat("YAML value for '" %+% param_name_EV_pepThresh %+% "' is invalid ('" %+% param_EV_pepThresh %+% "'). Using default of " %+% param_def_EV_pepThresh %+% ".")
    param_EV_pepThresh = param_def_EV_pepThresh
  }
  
  if (enabled_evidence)
  {
    ## protein.names is only available from MQ 1.4 onwards
    d_evd = mq$readMQ(txt_files$evd, type="ev", filter="R", col_subset=c("proteins",
                                                                         numeric = "Retention.Length",
                                                                         numeric = "retention.time.calibration", 
                                                                         numeric = "Retention.time$", 
                                                                         numeric = "Match.Time.Difference",
                                                                         numeric = "^intensity$", "^Type$",
                                                                         numeric = "Mass\\.Error", 
                                                                         numeric = "^uncalibrated...calibrated." ,
                                                                         numeric = "^m.z$",
                                                                         numeric = "^score$", 
                                                                         numeric = "^fraction$",  ## only available when fractions were given
                                                                         "Raw.file", "^Protein.Group.IDs$", "Contaminant",
                                                                         numeric = "[RK]\\.Count", 
                                                                         numeric = "^Charge$", "modified.sequence",
                                                                         numeric = "^Mass$", "^protein.names$",
                                                                         numeric = "^ms.ms.count$"))
# 
#                                                                          numeric = "^ms.ms.count$",
#                                                                          numeric = "^reporter.intensity.[0-9]$"))
#     colnames(d_evd)
#     if (length(grepv("^reporter", colnames(d_evd))) > 0) {
#       dd = melt(d_evd[, c("fc.raw.file", grepv("^reporter", colnames(d_evd)))], id.vars = "fc.raw.file")
#       dd_labEff = ddply(dd, c("fc.raw.file", "variable"), function(x) {
#         data.frame(pc_labeled = sum(x$value == 0 | is.na(x$value)) / nrow(x) * 100)
#       })
#       ggplot(dd_labEff) + 
#         geom_bar(aes(variable, pc_labeled), stat="identity") +
#         coord_flip() +
#         facet_wrap(~ fc.raw.file)
#     
#       head(dd)
#       dd$value[is.na(dd$value)] = 0
#       ggplot(dd) + geom_boxplot(aes(variable, log(value+1))) + facet_wrap(~fc.raw.file)
#       ggplot(dd) + geom_boxplot(aes(variable, log(value+0))) + facet_wrap(~fc.raw.file)
#     
#       stop("")
#     }
    
    ### warn of special contaminants!
    ## these need to be in FASTA headers (description is not enough)!
    ## syntax:  list( contaminant1 = c(name, threshold), contaminant2 = c(...), ...)
    ##
    ##  if within the YAML file
    ##    SpecialContaminants: no
    ##  is set, then 'yaml_contaminants' will be 'FALSE'
    ##
    contaminant_default = list("cont_MYCO" = c(name="MYCOPLASMA", threshold=1)) # name (FASTA), threshold for % of unique peptides
    ##yaml_obj = list()
    ##contaminant_default = FALSE ## to switch it off by default
    yaml_contaminants = yc$getYAML("File$Evidence$SpecialContaminants", contaminant_default)
    
    if (class(yaml_contaminants) == "list")  ## SC are requested
    {
      if (!enabled_proteingroups)
      {
        ## fail hard; we could hack around this (e.g. by loading fasta headers from evidence.txt), but it wastes a lot of memory and time
        warning(paste0("Warning: reporting of special contaminants requires loading of proteinGroups.txt.",
                       "If you don't have this file and want to get rid of this warning, please disable contaminant lookup in the YAML file ('SpecialContaminants: no')."))
      } else {
        lst_qcMetrics[["qcMetric_EVD_UserContaminant"]]$setData(d_evd, d_pg, yaml_contaminants)
      }
    }
    
    ##
    ## intensity of peptides
    ##
    lst_qcMetrics[["qcMetric_EVD_PeptideInt"]]$setData(d_evd, param_EV_intThresh)


    ##
    ## peptide & protein counts
    ##
    ## contains NA if 'genuine' ID
    d_evd$hasMTD = !is.na(d_evd$match.time.difference)
    ## report Match-between-runs data only if if it was enabled
    reportMTD = any(d_evd$hasMTD)

    lst_qcMetrics[["qcMetric_EVD_ProteinCount"]]$setData(d_evd, param_EV_protThresh)

    lst_qcMetrics[["qcMetric_EVD_PeptideCount"]]$setData(d_evd, param_EV_pepThresh)

    ####
    #### peak length (not supported in MQ 1.0.13)
    ####
    if ("retention.length" %in% colnames(d_evd))  
    {
      lst_qcMetrics[["qcMetric_EVD_RTPeakWidth"]]$setData(d_evd)
    } ## end retention length (aka peak width)
    
    ##
    ## retention time calibration (to see if window was sufficiently large)
    ## (not supported in MQ 1.0.13)  
    ## Even if MBR=off, this column always contains numbers (usually 0, or very small)
    ##
    
    ## param
    param_name_EV_MatchingTolerance = "File$Evidence$MQpar_MatchingTimeWindow_num"
    param_def_EV_MatchingTolerance = 1
    param_EV_MatchingTolerance = yc$getYAML(param_name_EV_MatchingTolerance, param_def_EV_MatchingTolerance)
    if (param_useMQPAR) {
      v = getMQPARValue(txt_files$mqpar, "matchingTimeWindow") ## will also warn() if file is missing
      if (!is.null(v)) {
        param_EV_MatchingTolerance = yc$setYAML(param_name_EV_MatchingTolerance, as.numeric(v))
      }
    }
    param_name_mbr = "File$Evidence$MatchBetweenRuns_wA"
    param_evd_mbr = yc$getYAML(param_name_mbr, "auto")
    
    if (("retention.time.calibration" %in% colnames(d_evd)))
    {
      ## this should enable us to decide if MBR was used (we could also look up parameters.txt -- if present)
      MBR_HAS_DATA = (sum(d_evd$type == "MULTI-MATCH") > 0)

      if ((param_evd_mbr == FALSE) || (MBR_HAS_DATA == FALSE))
      {
        ## MBR is not evaluated
      } else
      {
        lst_qcMetrics[["qcMetric_EVD_MBRAlign"]]$setData(d_evd, param_EV_MatchingTolerance, mq$raw_file_mapping)

        ### 
        ###     MBR: ID transfer
        ###
        #debug (restore data): lst_qcMetrics[["qcMetric_EVD_RTPeakWidth"]]$setData(d_evd)
        avg_peak_width = lst_qcMetrics[["qcMetric_EVD_RTPeakWidth"]]$outData[["avg_peak_width"]]
        if (is.null(avg_peak_width)) {
          stop("RT peak width module did not run, but is required for MBR metrics. Enable it and try again or switch off MBR metrics!")
        } 
        lst_qcMetrics[["qcMetric_EVD_MBRIdTransfer"]]$setData(d_evd, avg_peak_width)

        
        ##
        ## MBR: Tree Clustering (experimental)
        ##  and
        ## MBR: additional evidence by matching MS1 by AMT across files
        ##
        lst_qcMetrics[["qcMetric_EVD_MBRaux"]]$setData(d_evd)

      } ## MBR has data
    } ## retention.time.difference column exists
    
    
    ##
    ## charge distribution
    ##
    ##  (this uses genuine peptides only -- no MBR!)
    ## 
    lst_qcMetrics[["qcMetric_EVD_Charge"]]$setData(d_evd)

    ##
    ## peptides per RT
    ##
    lst_qcMetrics[["qcMetric_EVD_IDoverRT"]]$setData(d_evd)

    ##
    ## barplots of mass error
    ##
    ## MQ seems to mess up mass recal on some (iTRAQ/TMT) samples, by reporting ppm errors which include modifications
    ## , thus one sees >1e5 ppm, e.g. 144.10 Da
    ##  this affects both 'uncalibrated.mass.error..ppm.'   and
    ##                    'mass.error..ppm.'
    ## HOWEVER, 'uncalibrated...calibrated.m.z..ppm.' seems unaffected, but is not available in all MQ versions :(
    ##    also, 'mass' and 'm/z' columns seem unaffected.
    ## We cannot always reconstruct mass_error[ppm] from 'm/z' and mass columns 
    ## since 'm/z' is just too close to the theoretical value or islacking precision of the stored numbers.
    ##
    ## The MQ list reports one case with high ppm error (8000), where the KR.count was at fault. We cannot
    ## reconstruct this.
    ##
    ## Also, MaxQuant will not report uncalibrated mass errors if the data are too sparse for a given Raw file.
    ## Then, 'uncalibrated.mass.error..ppm.' will be 'NaN' throughout -- but weirdly, calibrated masses will be reported.
    ##
    
    param_name_EV_PrecursorTolPPM = "File$Evidence$MQpar_firstSearchTol_num"
    param_def_EV_PrecursorTolPPM = 20
    param_EV_PrecursorTolPPM = yc$getYAML(param_name_EV_PrecursorTolPPM, param_def_EV_PrecursorTolPPM)
    if (param_useMQPAR) {
      v = getMQPARValue(txt_files$mqpar, "firstSearchTol") ## will also warn() if file is missing
      if (!is.null(v)) {
        param_EV_PrecursorTolPPM = yc$setYAML(param_name_EV_PrecursorTolPPM, as.numeric(v))
      }
    }
    
    param_name_EV_PrecursorOutOfCalSD = "File$Evidence$firstSearch_outOfCalWarnSD_num"
    param_def_EV_PrecursorOutOfCalSD = 2
    param_EV_PrecursorOutOfCalSD = yc$getYAML(param_name_EV_PrecursorOutOfCalSD, param_def_EV_PrecursorOutOfCalSD)
    
    ##
    ## MS1-out-of-calibration (i.e. the tol-window being too small)
    ##
    ## additionally use MS2-ID rate (should be below 1%)
    if (enabled_summary) {
      df_idrate = d_smy$raw[, c("fc.raw.file", "ms.ms.identified....")]
    } else {
      df_idrate = NULL
    }
    
    lst_qcMetrics[["qcMetric_EVD_PreCal"]]$setData(d_evd, df_idrate, param_EV_PrecursorTolPPM, param_EV_PrecursorOutOfCalSD)

    
    ##
    ## MS1 post calibration
    ##
    param_name_EV_PrecursorTolPPMmainSearch = "File$Evidence$MQpar_mainSearchTol_num"
    param_def_EV_PrecursorTolPPMmainSearch = NA  ## we do not dare to have a default, since it ranges from 6 - 4.5 ppm across MQ versions
    param_EV_PrecursorTolPPMmainSearch = yc$getYAML(param_name_EV_PrecursorTolPPMmainSearch, param_def_EV_PrecursorTolPPMmainSearch)
    if (param_useMQPAR) {
      v = getMQPARValue(txt_files$mqpar, "mainSearchTol") ## will also warn() if file is missing
      if (!is.null(v)) {
        param_EV_PrecursorTolPPMmainSearch = yc$setYAML(param_name_EV_PrecursorTolPPMmainSearch, as.numeric(v))
      }
    }
    if (is.na(param_EV_PrecursorTolPPMmainSearch))
    {
      warning("PTXQC: Cannot draw borders for calibrated mass error, since neither 'File$Evidence$MQpar_mainSearchTol_num' is set nor a mqpar.xml file is present!", immediate. = TRUE)
    }

    lst_qcMetrics[["qcMetric_EVD_PostCal"]]$setData(d_evd, df_idrate, param_EV_PrecursorTolPPM, param_EV_PrecursorOutOfCalSD, param_EV_PrecursorTolPPMmainSearch)


    ##
    ## Top5 contaminants
    ##
    lst_qcMetrics[["qcMetric_EVD_Top5Cont"]]$setData(d_evd)

    ##
    ## Oversampling: determine peaks repeatedly sequenced
    ##
    lst_qcMetrics[["qcMetric_EVD_MS2OverSampling"]]$setData(d_evd)

    ## trim down to the absolute required (we need to identify contaminants in MSMS.txt later on)
    if (!exists("DEBUG_PTXQC")) d_evd = d_evd[, c("id", "contaminant")]
    
    
    
    ##
    ## missing values
    ##
    lst_qcMetrics[["qcMetric_EVD_MissingValues"]]$setData(d_evd)
    
}


######
######  msms.txt ...
######

enabled_msms = yc$getYAML("File$MsMs$enabled", TRUE) & file.exists(txt_files$msms)
if (enabled_msms)
{
  ### missed cleavages (again)
  ### this is the real missed cleavages estimate ... but slow
  #d_msms_s = mq$readMQ(txt_files$msms, type="msms", filter = "", nrows=10)
  #colnames(d_msms_s)
  #head(d_msms)
  d_msms = mq$readMQ(txt_files$msms, type="msms", filter = "", col_subset=c(numeric = "Missed\\.cleavages",
                                                                            "^Raw.file$",
                                                                            "^mass.deviations",
                                                                            "^masses$", "^mass.analyzer$", "fragmentation", "reverse",
                                                                            numeric = "^evidence.id$" #,
                                                                            #numeric = "^reporter.intensity.[0-9]$"
                                                                            ), check_invalid_lines = FALSE)
  
  
# colnames(d_msms)
# ## unfortunately, these columns are empty (at least when reporters were quantified using MS3) -- and ms3.txt does not have them either
# dd = melt(d_msms[, c("fc.raw.file", grepv("^reporter", colnames(d_msms)))], id.vars = "fc.raw.file")
# dd_labEff = ddply(dd, c("fc.raw.file", "variable"), function(x) {
#   data.frame(pc_labeled = sum(x$value > 0 | (!is.na(x$value))) / nrow(x) * 100)
# })
# ggplot(dd_labEff) + 
#   geom_bar(aes(variable, pc_labeled), stat="identity") +
#   coord_flip() +
#   facet_wrap(~ fc.raw.file)

  
  ##
  ##  MS2 fragment decalibration
  ##
  lst_qcMetrics[["qcMetric_MSMS_MSMSDecal"]]$setData(df_msms = d_msms, fc_raw_files = mq$raw_file_mapping$to)

  ##
  ## missed cleavages per Raw file
  ##
  if (exists("d_evd")) {
    lst_qcMetrics[["qcMetric_MSMS_MissedCleavages"]]$setData(d_msms, d_evd)
  } else {
    lst_qcMetrics[["qcMetric_MSMS_MissedCleavages"]]$setData(d_msms)
  }

  ## save RAM: msms.txt is not required any longer
  rm(d_msms)
  rm(d_evd)
}



######
######  msmsScans.txt ...
######

enabled_msmsscans = yc$getYAML("File$MsMsScans$enabled", TRUE) & file.exists(txt_files$msmsScan)
if (enabled_msmsscans)
{
  #d_msmsScan_h = mq$readMQ(txt_files$msmsScan, type="msms", filter = "", nrows=2)
  #colnames(d_msmsScan_h)
  #head(d_msmsScan_h)
  d_msmsScan = mq$readMQ(txt_files$msmsScan, type = "msms", filter = "", 
                         col_subset = c(numeric = "^ion.injection.time", 
                                        numeric = "^retention.time$", 
                                        "^Identified", 
                                        "^Scan.event.number", 
                                        "^Raw.file",
                                        "^identified",
                                        "^score",
                                        "^total.ion.current",
                                        "^m.z",
                                        "^mass$"),
                         check_invalid_lines = FALSE)
#   
#   d_msmsScan$m2 = 
#   d_msmsScan$mdef = d_msmsScan$mass - trunc(d_msmsScan$mass)
#   head(d_msmsScan$mass)
#   ggplot(d_msmsScan[d_msmsScan$fc.raw.file == "file 01",]) + geom_point(aes(mass, mdef, col = identified), alpha=0.2)
#   
#   stop("")
#   ddply(d_msmsScan, "fc.raw.file", function(x){
#     xx <<- x
#     x = xx
#     x$score[is.na(x$score)] = runif(sum(is.na(x$score)), 0, 2)
#     print(ggplot(x) + geom_jitter(aes(score, log(total.ion.current+2))))
#     stop("")
#     return(NULL)
#   })
  
  
  param_name_MSMSScans_ionInjThresh = "File$MsMsScans$IonInjectionThresh_num"
  param_def_MSMSScans_ionInjThresh = 10 ## default ion injection threshold in milliseconds
  param_MSMSScans_ionInjThresh = yc$getYAML(param_name_MSMSScans_ionInjThresh, param_def_MSMSScans_ionInjThresh)
  if (!is.numeric(param_MSMSScans_ionInjThresh))
  { ## reset if value is weird
    cat("YAML value for '" %+% param_name_MSMSScans_ionInjThresh %+% "' is invalid ('" %+% param_MSMSScans_ionInjThresh %+% "'). Using default of " %+% param_def_MSMSScans_ionInjThresh %+% ".")
    param_MSMSScans_ionInjThresh = param_def_MSMSScans_ionInjThresh
  }
  
  ##
  ## MQ version 1.0.13 has very rudimentary MSMSscans.txt, with no header, so we need to skip the metrics of this file
  ##
  if (ncol(d_msmsScan) > 3)
  {
    # round RT to 2 min intervals
    d_msmsScan$rRT = round(d_msmsScan$retention.time/2)*2
    
    ##
    ## TopN over RT
    ##
    lst_qcMetrics[["qcMetric_MSMSScans_TopNoverRT"]]$setData(d_msmsScan)

    ##
    ## Injection time over RT
    ##
    lst_qcMetrics[["qcMetric_MSMSScans_IonInjTime"]]$setData(d_msmsScan, param_MSMSScans_ionInjThresh)

    ##
    ## TopN counts
    ##
    lst_qcMetrics[["qcMetric_MSMSScans_TopN"]]$setData(d_msmsScan)

    ##
    ## Scan event: % identified
    ##
    lst_qcMetrics[["qcMetric_MSMSScans_TopNID"]]$setData(d_msmsScan)

  } ## end MSMSscan from MQ > 1.0.13
  
  ## save RAM: msmsScans.txt is not required any longer
  rm(d_msmsScan)
}

  
  
#####################################################################
## list of qcMetric objects
print("#Metrics: ")
print(length(lst_qcMetrics))

hm = getQCHeatMap(lst_qcMetrics, raw_file_mapping = mq$raw_file_mapping)
#print(hm[["plot"]])
write.table(hm[["table"]], file = fh_out$heatmap_values_file, quote = TRUE, sep = "\t", row.names = FALSE)

## get MQ short name mapping plot (might be NULL if no mapping was required)
pl_nameMapping = mq$plotNameMapping()

##
## plot it!!!
##
out_formats_supported = c("html", "plainPDF")

param_name_PTXQC_OutputFormats = "PTXQC$OutputFormats"
param_def_PTXQC_OutputFormats =  out_formats_supported
param_OutputFormats = yc$getYAML(param_name_PTXQC_OutputFormats, param_def_PTXQC_OutputFormats)

param_name_PTXQC_PageNumbers = "PTXQC$PlainPDF$AddPageNumbers"
param_def_PTXQC_PageNumbers = "on"
param_PageNumbers = yc$getYAML(param_name_PTXQC_PageNumbers, param_def_PTXQC_PageNumbers)

cat("Creating Report file ...")


#
#param_OutputFormats = "html pdf"
#
out_formats = unlist(strsplit(param_OutputFormats, "[ ,]+"))
out_formats
out_format_requested = out_formats_supported[match(out_formats, out_formats_supported)]
if (any(is.na(out_format_requested)))
{
  stop("Output format(s) not supported: '", paste(out_formats[is.na(out_format_requested)], collapse="', '"), "'")
}


if ("html" %in% out_format_requested)
{
  fh_out$report_file_extension = c(fh_out$report_file_extension, ".html")
  
  if (DEBUG_PTXQC) {
    html_template = "C:/projects/QC/package/PTXQC/inst/reportTemplate/PTXQC_report_template.Rmd"
  } else {
    html_template = system.file("./reportTemplate/PTXQC_report_template.Rmd", package="PTXQC")
  }
  html_template
  ## Rmarkdown: convert to Markdown, and then to HTML or PDF...
  render(html_template, output_file = paste0(fh_out$report_file, ".html"))
}

if ("plainPDF" %in% out_format_requested)
{
  fh_out$report_file_extension = c(fh_out$report_file_extension, ".pdf")
  report_file_PDF = paste0(fh_out$report_file, ".pdf")
  ## give the user a chance to close open reports which are currently blocked for writing
  if (!wait_for_writable(report_file_PDF))
  {
    stop("Target file not writable")
  }
  
  if (param_PageNumbers == "on")
  {
    printWithPage = function(gg_obj, page_nr, filename = fh_out$report_file)
    {
      filename = basename(filename)
      printWithFooter(gg_obj, bottom_left = filename, bottom_right = page_nr)
    }
  } else {
    ## no page number and filename at bottom of each page
    printWithPage = function(gg_obj, page_nr, filename = fh_out$report_file)
    {
      print(gg_obj)
    }
  }
  pdf(report_file_PDF)
  printWithPage(hm[["plot"]], "p. 1")      # summary heatmap
  printWithPage(pl_nameMapping, "p. 2")    # short file mapping
  pc = 3; ## subsequent pages start at #4
  for (qcm in lst_qcMetrics_ord)
  {
    printWithPage(qcm$plots, paste("p.", pc))
    pc = pc + 1
  }
  dev.off();
  cat(" done\n")
}

## save plot object (for easier access, in case someone wants high-res plots)
## (...disabled for now until concrete use case pops up)
#cat("Dumping plot objects as Rdata file ...")
#save(file = fh_out$R_plots_file, list = "GPL")
#cat(" done\n")

### write YAML config
yc$writeYAML(fh_out$yaml_file)

## write shortnames and sorting of filenames
mq$writeMappingFile(fh_out$filename_sorting)

cat(paste("Report file created at\n\n    ", fh_out$report_file, ".*\n\n", sep=""))
cat(paste0("\n\nTime elapsed: ", round(as.double(Sys.time() - time_start, units="mins"), 1), " min\n\n"))

## return path to PDF report and YAML config, etc
return (fh_out)
}
