% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/linear_SDA.R
\name{do.sda}
\alias{do.sda}
\title{Semi-Supervised Discriminant Analysis}
\usage{
do.sda(X, label, ndim = 2, type = c("proportion", 0.1), alpha = 1,
  beta = 1)
}
\arguments{
\item{X}{an \eqn{(n\times p)} matrix or data frame whose rows are observations
and columns represent independent variables.}

\item{label}{a length-\eqn{n} vector of data class labels.}

\item{ndim}{an integer-valued target dimension.}

\item{type}{a vector of neighborhood graph construction. Following types are supported;
\code{c("knn",k)}, \code{c("enn",radius)}, and \code{c("proportion",ratio)}.
Default is \code{c("proportion",0.1)}, connecting about 1/10 of nearest data points
among all data points. See also \code{\link{aux.graphnbd}} for more details.}

\item{alpha}{balancing parameter between model complexity and empirical loss.}

\item{beta}{Tikhonov regularization parameter.}
}
\value{
a named list containing
\describe{
\item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
\item{trfinfo}{a list containing information for out-of-sample prediction.}
\item{projection}{a \eqn{(p\times ndim)} whose columns are basis for projection.}
}
}
\description{
Semi-Supervised Discriminant Analysis (SDA) is a linear dimension reduction method
when label is partially missing, i.e., semi-supervised. The labeled data
points are used to maximize the separability between classes while
the unlabeled ones to estimate the intrinsic structure of the data.
Regularization in case of rank-deficient case is also supported via an \eqn{\ell_2}
scheme via \code{beta}.
}
\examples{
## generate data of 3 types with clear difference
dt1  = aux.gensamples(n=33)-100
dt2  = aux.gensamples(n=33)
dt3  = aux.gensamples(n=33)+100

## merge the data and create a label correspondingly
X      = rbind(dt1,dt2,dt3)
label  = c(rep(1,33), rep(2,33), rep(3,33))

## copy a label and let 20\% of elements be missing
nlabel = length(label)
nmissing = round(nlabel*0.20)
label_missing = label
label_missing[sample(1:nlabel, nmissing)]=NA

## compare true case with missing-label case
out1 = do.sda(X, label)
out2 = do.sda(X, label_missing)

## visualize
par(mfrow=c(1,2))
plot(out1$Y[,1], out1$Y[,2], main="true projection")
plot(out2$Y[,1], out2$Y[,2], main="20\% missing labels")

}
\references{
\insertRef{cai_semi-supervised_2007}{Rdimtools}
}
\author{
Kisung You
}
