% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/clean.R
\name{clean}
\alias{clean}
\title{Produce a data cleaning overview document (deprecated version)}
\usage{
clean(data, output = c("pdf", "html"), render = TRUE, useVar = NULL,
  ordering = c("asIs", "alphabetical"), onlyProblematic = FALSE,
  labelled_as = c("factor"), mode = c("summarize", "visualize",
  "check"), smartNum = TRUE, preChecks = c("isKey", "isSingular",
  "isSupported"), file = NULL, replace = FALSE, vol = "",
  standAlone = TRUE, twoCol = TRUE, quiet = TRUE,
  openResult = TRUE, characterChecks = defaultCharacterChecks(),
  factorChecks = defaultFactorChecks(),
  labelledChecks = defaultLabelledChecks(),
  numericChecks = defaultNumericChecks(),
  integerChecks = defaultIntegerChecks(),
  logicalChecks = defaultLogicalChecks(),
  dateChecks = defaultDateChecks(), allChecks = NULL,
  characterSummaries = defaultCharacterSummaries(),
  factorSummaries = defaultFactorSummaries(),
  labelledSummaries = defaultLabelledSummaries(),
  numericSummaries = defaultNumericSummaries(),
  integerSummaries = defaultIntegerSummaries(),
  logicalSummaries = defaultLogicalSummaries(),
  dateSummaries = defaultDateSummaries(), allSummaries = NULL,
  allVisuals = "standardVisual", listChecks = TRUE, maxProbVals = 10,
  maxDecimals = 2, addSummaryTable = TRUE, reportTitle = NULL,
  treatXasY = NULL, ...)
}
\arguments{
\item{data}{The dataset to be checked. This dataset should be of class \code{data.frame},
\code{tibble} or \code{matrix}. If it is of classs \code{matrix}, it will be converted to a
\code{data.frame}.}

\item{output}{Output format. Options are \code{"pdf"} (the default), and \code{"html"}}

\item{render}{Should the output file be rendered (defaults to \code{TRUE}),
i.e. should a pdf/html document be generated and saved to the disc?}

\item{useVar}{Variables to clean. If \code{NULL} (the default), all variables in \code{data}
are included. If a vector of variable names is supplied, only the variables in \code{data} that are
also in \code{useVar} are included in the data cleaning overview document.}

\item{ordering}{Choose the ordering of the variables in the variable presentation. The options
are "asIs" (ordering as in the dataset) and "alphabetical" (alphabetical order).}

\item{onlyProblematic}{A logical. If \code{TRUE}, only the variables flagged as
problematic in the check step will be included in the variable list.}

\item{labelled_as}{A string explaining the way to handle labelled vectors.
Currently \code{"factor"} (the default) is the only possibility. This means that labelled
variables that appear factor-like (by having a non-\code{NULL} \code{labels}-attribute) will
be treated as factors, while other labelled variables will be treated as whatever base
variable class they inherit from.}

\item{mode}{Vector of tasks to perform among the three categories "summarize", "visualize" and "check".
The default, \code{c("summarize", "visualize", "check")}, implies that all three steps are
performed. The steps selected in \code{mode} will be performed for each variable in
\code{data} and their results are presented in the second part of the outputtet data cleaning
overview document. The "summarize" step is responsible for creating the summary table,
the "visualize" step is responsible for creating the plot and the "check" step is responsible
for performing checks on the variable and printing the results if any problems are found.}

\item{smartNum}{If \code{TRUE} (the default), numeric and integer variables with
less than 5 unique values are treated as factor variables in the checking,
visualization and summary steps, and a message notifying the reader of this is
printed in the data summary.}

\item{preChecks}{Vector of function names for check functions used in the pre-check stage.
The pre-check stage consists of variable checks that should be performed before the
summary/visualization/checking step. If any of these checks find problems, the variable
will not be summarized nor visualized nor checked.}

\item{file}{The filename of the outputted rmarkdown (.Rmd) file.
If set to \code{NULL} (the default), the filename will be the name of \code{data}
prefixed with "dataMaid_", if this qualifies as a valid file name (e.g. no special
characters allowed). Otherwise, \code{clean()} tries to create a valid filename by
substituing illegal characters. Note that a valid file is of type .Rmd, hence all
filenames should have a ".Rmd"-suffix.}

\item{replace}{If \code{FALSE} (the default), an error is thrown if one of the files
that we are about to be created (.Rmd overview file and possible also a .html or .pdf
file) already exist. If \code{TRUE}, no checks are performed and files on disc thus
might be overwritten.}

\item{vol}{Extra text string or numeric that is appended on the end of the output
file name(s). For example, if the dataset is called "myData", no file argument is
 supplied and \code{vol=2}, the output file will be called "dataMaid_myData2.Rmd"}

\item{standAlone}{A logical. If \code{TRUE}, the document begins with a
markdown YAML preamble such that it can be rendered as a stand alone rmarkdown
file, e.g. by calling \code{\link{render}}. If \code{FALSE}, this preamble is removed.
Moreover, no matter the input to the \code{render} argument, the document will now
not be rendered, as it has no preamble.}

\item{twoCol}{A logical. Should the results from the \emph{summarize} and \emph{visualize}
steps be presented in two columns? Defaults to \code{TRUE}.}

\item{quiet}{A logical. If \code{TRUE} (the default), only a few messages
are printed to the screen as \code{clean} runs. If \code{FALSE}, no messages are
suppressed. The third option, \code{silent}, renders the function completely
silent, such that only fatal errors are printed.}

\item{openResult}{A logical. If \code{TRUE} (the default), the last file produced
by \code{clean} is automatically opened by the end of the function run. This
means that if \code{render = TRUE}, the rendered pdf or html file is opened, while
if \code{render = FALSE}, the .Rmd file is opened.}

\item{characterChecks}{A vector of the names of error-checking functions to apply to
character vectors.}

\item{factorChecks}{A vector of the names of error-checking functions to apply to
integer vectors.}

\item{labelledChecks}{A vector of the names of error-checking functions to apply to
character vectors.}

\item{numericChecks}{A vector of the names of error-checking functions to apply to
numeric vectors.}

\item{integerChecks}{A vector of the names of error-checking functions to apply to
integer vectors.}

\item{logicalChecks}{A vector of the names of error-checking functions to apply to
logical vectors.}

\item{dateChecks}{A vector of the names of error-checking functions to apply to
Date vectors.}

\item{allChecks}{Vector of function names that should be used as check-functions
for all variable types. Note that this argument overwrites the arguments
\code{characterChekcs}, \code{factorChecks}, etc.}

\item{characterSummaries}{A vector of the names of summary functions to apply to
character vectors.}

\item{factorSummaries}{A vector of the names of summary functions to apply to
factor vectors.}

\item{labelledSummaries}{A vector of the names of summary functions to apply to
labelled vectors.}

\item{numericSummaries}{A vector of the names of summary functions to apply to
numeric vectors.}

\item{integerSummaries}{A vector of the names of summary functions to apply to
integer vectors.}

\item{logicalSummaries}{A vector of the names of summary functions to apply to
logical vectors.}

\item{dateSummaries}{A vector of the names of summary functions to apply to
Date vectors.}

\item{allSummaries}{Vector of function names that should be used as summary
functions for all variable types. Note that this argument overwrites the arguments
\code{characterSummaries}, \code{factorSummaries}, etc.}

\item{allVisuals}{A single function name. This funtion name is called for
creating the plots for each variable in the "visualize" step. The default,
\code{"standardVisual"} thus calls the \code{\link{visualFunction}}
\code{\link{standardVisual}} for each variable in \code{data}.}

\item{listChecks}{A logical. Controls whether what checks that were used for each
possible variable type are summarized in the output. Defaults to \code{TRUE}.}

\item{maxProbVals}{A positive integer or \code{Inf}. Maximum number of unique
values printed from check-functions. In the case of \code{Inf}, all problematic 
values are printed. Defaults to \code{10}.}

\item{maxDecimals}{A positive integer or \code{Inf}. Number of decimals used when
printing numerical values in the data summary and in problematic values from the
data checks. If \code{Inf}, no rounding is performed.}

\item{addSummaryTable}{A logical. If \code{TRUE} (the default), a summary table
of the variable checks is added between the Data Cleaning Summary and the
Variable List.}

\item{reportTitle}{A text string. If supplied, this will be the printed title of the
report. If left unspecified, the title with the name of the supplied dataset.}

\item{treatXasY}{A list that indicates how non-standard variable classes should be treated.
This parameter allows you to include variables that are not of class \code{factor}, \code{character}, 
\code{labelled}, \code{numeric}, \code{integer}, \code{logical} nor \code{Date} (or a class
that inherits from any of these classes). The names of the list are the new classes and the entries
are the names of the class, they should be treated as. If \code{clean()} should e.g. treat variables of 
class \code{raw} as characters and variables of class \code{complex} as numeric, you should put
\code{treatXasY = list(raw = "character", complex = "numeric")}.}

\item{\dots}{FIX ME-------- Other arguments that are passed on the to precheck,
checking, summary and visualization functions.WHAT ARGUMENTS ARE RELEVANT TO MENTION
 HERE?  ---------- FIX ME}
}
\value{
The function does not return anything. Its side effect (the production
of a data cleaning overview document) is the reason for running the function.
}
\description{
NOTE: This function has been replaced by \code{\link{makeDataReport}}. The current 
function is no longer updated and it is only included for backwards compatability.
}
\details{
Run a set of class-specific validation checks to check the
variables in a dataset for potential errors.  Performs checking
steps according to user input and/or data type of the inputted
variable.  The checks are saved to an R markdown file which can
rendered into an easy-to-read document.  This document also
includes summaries and visualizations of each variable in the
dataset.

For each variable, a set of pre-check (controlled by the
\code{preChecks} argument) is first run and then then a battery of
functions are applied depending on the variable class.  For each
variable type the summarize/visualize/check functions are applied
and and the results are written to an R markdown file.
}
\examples{
data(testData)
data(toyData)

check(toyData)

 \dontrun{
DF <- data.frame(x = 1:15)
clean(DF)
}

\dontrun{
data(testData)
clean(testData)
}

# Overwrite any existing files generated by clean
\dontrun{
clean(testData, replace=TRUE)
}

# Only include problematic variables in the output document
\dontrun{
clean(testData, replace=TRUE, onlyProblematic=TRUE)
}

# Add user defined check-function to the checks performed on character variables:
# Here we add functionality to search for the string wally (ignoring case)
\dontrun{
wheresWally <- function(v, ...) {
     res <- grepl("wally", v, ignore.case=TRUE)
     problem <- any(res)
     message <- "Wally was found in these data"
     checkResult(list(problem = problem,
                      message = message,
                      problemValues = v[res]))
}

wheresWally <- checkFunction(wheresWally,
                             description = "Search for the string 'wally' ignoring case",
                             classes = c("character")
                             )
# Add the newly defined function to the list of checks used for characters.
clean(testData, characterChecks=c(defaultCharacterChecks(), "wheresWally"),
      replace=TRUE)
}

#Handle non-supported variable classes using treatXasY: treat raw as character and
#treat complex as numeric. We also add a list variable, but as lists are not 
#handled through treatXasY, this variable will be caught in the preChecks and skipped:
\dontrun{
toyData$rawVar <- as.raw(c(1:14, 1))
toyData$compVar <- c(1:14, 1) + 2i
toyData$listVar <- as.list(c(1:14, 1))
clean(toyData, replace  = TRUE, treatXasY = list(raw = "character", complex = "numeric"))
}

}
