% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv2disk.frame.r
\name{csv_to_disk.frame}
\alias{csv_to_disk.frame}
\title{Convert CSV file(s) to disk.frame format}
\usage{
csv_to_disk.frame(infile, outdir = tempfile(fileext = ".df"),
  inmapfn = base::I,
  nchunks = recommend_nchunks(sum(file.size(infile))),
  in_chunk_size = NULL, shardby = NULL, compress = 50,
  overwrite = TRUE, header = TRUE, .progress = TRUE, ...)
}
\arguments{
\item{infile}{The input CSV file or files}

\item{outdir}{The directory to output the disk.frame to}

\item{inmapfn}{A function to be applied to the chunk read in from CSV before the chunk is being written out. Commonly used to perform simple transformations. Defaults to the identity function (ie. no transformation)}

\item{nchunks}{Number of chunks to output}

\item{in_chunk_size}{When reading in the file, how many lines to read in at once. This is different to nchunks which controls how many chunks are output}

\item{shardby}{The column(s) to shard the data by. For example suppose `shardby = c("col1","col2")`  then every row where the values `col1` and `col2` are the same will end up in the same chunk; this will allow merging by `col1` and `col2` to be more efficient}

\item{compress}{For fst backends it's a number between 0 and 100 where 100 is the highest compression ratio.}

\item{overwrite}{Whether to overwrite the existing directory}

\item{header}{Whether the files have header. Defaults to TRUE}

\item{.progress}{A logical, for whether or not to print a progress bar for multiprocess, multisession, and multicore plans. From {furrr}}

\item{...}{passed to data.table::fread, disk.frame::as.disk.frame, disk.frame::shard}
}
\description{
Convert CSV file(s) to disk.frame format
}
\examples{
tmpfile = tempfile()
write.csv(cars, tmpfile)
tmpdf = tempfile(fileext = ".df")
df = csv_to_disk.frame(tmpfile, outdir = tmpdf, overwrite = TRUE)

# clean up
fs::file_delete(tmpfile)
delete(df)
}
