% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/stabit.R
\name{stabit}
\alias{stabit}
\title{Matching model and selection correction for group formation}
\usage{
stabit(
  x,
  m.id = "m.id",
  g.id = "g.id",
  R = "R",
  selection = NULL,
  outcome = NULL,
  simulation = "none",
  seed = 123,
  max.combs = Inf,
  method = "NTU",
  binary = FALSE,
  offsetOut = 0,
  offsetSel = 0,
  marketFE = FALSE,
  censored = 0,
  gPrior = FALSE,
  dropOnes = FALSE,
  interOut = 0,
  interSel = 0,
  standardize = 0,
  niter = 10,
  verbose = FALSE
)
}
\arguments{
\item{x}{data frame with individual-level characteristics of all group members including
market- and group-identifiers.}

\item{m.id}{character string giving the name of the market identifier variable. Defaults to \code{"m.id"}.}

\item{g.id}{character string giving the name of the group identifier variable. Defaults to \code{"g.id"}.}

\item{R}{dependent variable in outcome equation. Defaults to \code{"R"}.}

\item{selection}{list containing variables and pertaining operators in the selection equation. The format is 
\code{operation = "variable"}. See the Details and Examples sections.}

\item{outcome}{list containing variables and pertaining operators in the outcome equation. The format is 
\code{operation = "variable"}. See the Details and Examples sections.}

\item{simulation}{should the values of dependent variables in selection and outcome equations be simulated? Options are \code{"none"} for no simulation, \code{"NTU"} for non-transferable utility matching, \code{"TU"} for transferable utility or \code{"random"} for random matching of individuals to groups. Simulation settings are (i) all model coefficients set to \code{alpha=beta=1}; (ii) covariance between error terms \code{delta=0.5}; (iii) error terms \code{eta} and \code{xi} are draws from a standard normal distribution.}

\item{seed}{integer setting the state for random number generation if \code{simulation=TRUE}.}

\item{max.combs}{integer (divisible by two) giving the maximum number of feasible groups to be used for generating group-level characteristics.}

\item{method}{estimation method to be used. Either \code{"NTU"} or \code{"TU"} for selection correction using non-transferable or transferable utility matching as selection rule; \code{"outcome"} for estimation of the outcome equation only; or \code{"model.frame"} for no estimation.}

\item{binary}{logical: if \code{TRUE} outcome variable is taken to be binary; if \code{FALSE} outcome variable is taken to be continuous.}

\item{offsetOut}{vector of integers indicating the indices of columns in \code{X} for which coefficients should be forced to 1. Use 0 for none.}

\item{offsetSel}{vector of integers indicating the indices of columns in \code{W} for which coefficients should be forced to 1. Use 0 for none.}

\item{marketFE}{logical: if \code{TRUE} market-level fixed effects are used in outcome equation; if \code{FALSE} no market fixed effects are used.}

\item{censored}{draws of the \code{delta} parameter that estimates the covariation between the error terms in selection and outcome equation are 0:not censored, 1:censored from below, 2:censored from above.}

\item{gPrior}{logical: if \code{TRUE} the g-prior (Zellner, 1986) is used for the variance-covariance matrix.}

\item{dropOnes}{logical: if \code{TRUE} one-group-markets are exluded from estimation.}

\item{interOut}{two-colum matrix indicating the indices of columns in \code{X} that should be interacted in estimation. Use 0 for none.}

\item{interSel}{two-colum matrix indicating the indices of columns in \code{W} that should be interacted in estimation. Use 0 for none.}

\item{standardize}{numeric: if \code{standardize>0} the independent variables will be standardized by dividing by \code{standardize} times their standard deviation. Defaults to no standardization \code{standardize=0}.}

\item{niter}{number of iterations to use for the Gibbs sampler.}

\item{verbose}{.}
}
\description{
The function provides a Gibbs sampler for a structural matching model that 
estimates preferences and corrects for sample selection bias when the selection process 
is a one-sided matching game; that is, group/coalition formation.

The input is individual-level data of all group members from one-sided matching marktes; that is, 
from group/coalition formation games. 

In a first step, the function generates a model matrix with characteristics of \emph{all feasible} 
groups of the same size as the observed groups in the market. 

For example, in the stable roommates problem with \eqn{n=4} students \eqn{\{1,2,3,4\}}{{1,2,3,4}} 
sorting into groups of 2, we have \eqn{ {4 \choose 2}=6 }{choose(4,2) = 6} feasible groups: 
(1,2)(3,4) (1,3)(2,4) (1,4)(2,3).

In the group formation problem with \eqn{n=6} students \eqn{\{1,2,3,4,5,6\}}{{1,2,3,4,5,6}} 
sorting into groups of 3, we have \eqn{ {6 \choose 3} =20}{choose(6,3) = 20} feasible groups. 
For the same students sorting into groups of sizes 2 and 4, we have \eqn{ {6 \choose 2} + 
{6 \choose 4}=30}{choose(6,2) + choose(6,4) = 30} feasible groups.

The structural model consists of a selection and an outcome equation. The \emph{Selection Equation} 
determines which matches are observed (\eqn{D=1}) and which are not (\eqn{D=0}).
\deqn{ \begin{array}{lcl}
       D &= & 1[V \in \Gamma] \\
       V &= & W\alpha + \eta
       \end{array}
     }{ D = 1[V in \Gamma] with V = W\alpha + \eta
     }
Here, \eqn{V} is a vector of latent valuations of \emph{all feasible} matches, ie observed and 
unobserved, and \eqn{1[.]} is the Iverson bracket. 
A match is observed if its match valuation is in the set of valuations \eqn{\Gamma}
that satisfy the equilibrium condition (see Klein, 2015a). This condition differs for matching
games with transferable and non-transferable utility and can be specified using the \code{method} 
argument. 
The match valuation \eqn{V} is a linear function of \eqn{W}, a matrix of characteristics for 
\emph{all feasible} groups, and \eqn{\eta}, a vector of random errors. \eqn{\alpha} is a paramter 
vector to be estimated.

The \emph{Outcome Equation} determines the outcome for \emph{observed} matches. The dependent
variable can either be continuous or binary, dependent on the value of the \code{binary}
argument. In the binary case, the dependent variable \eqn{R} is determined by a threshold 
rule for the latent variable \eqn{Y}.
\deqn{ \begin{array}{lcl}
       R &= & 1[Y > c] \\
       Y &= & X\beta + \epsilon
       \end{array}
     }{ R = 1[Y > c] with Y = X\beta + \epsilon
     }
Here, \eqn{Y} is a linear function of \eqn{X}, a matrix of characteristics for \emph{observed} 
matches, and \eqn{\epsilon}, a vector of random errors. \eqn{\beta} is a paramter vector to 
be estimated.

The structural model imposes a linear relationship between the error terms of both equations 
as \eqn{\epsilon = \delta\eta + \xi}, where \eqn{\xi} is a vector of random errors and \eqn{\delta}
is the covariance paramter to be estimated. If \eqn{\delta} were zero, the marginal distributions
of \eqn{\epsilon} and \eqn{\eta} would be independent and the selection problem would vanish.
That is, the observed outcomes would be a random sample from the population of interest.
}
\details{
Operators for variable transformations in \code{selection} and \code{outcome} arguments.
\describe{
\item{\code{add}}{sum over all group members and divide by group size.}
\item{\code{int}}{sum over all possible two-way interactions \eqn{x*y} of group members and divide by the number of those, given by \code{choose(n,2)}.}
\item{\code{ieq}}{sum over all possible two-way equality assertions \eqn{1[x=y]} and divide by the number of those.}
\item{\code{ive}}{sum over all possible two-way interactions of vectors of variables of group members and divide by number of those.}
\item{\code{inv}}{...}
\item{\code{dst}}{sum over all possible two-way distances between players and divide by number of those, where distance is defined as \eqn{e^{-|x-y|}}{exp(-|x-y|)}.}
}
}
\examples{
\dontrun{
## --- SIMULATED EXAMPLE ---

## 1. Simulate one-sided matching data for 200 markets (m=200) with 2 groups
##    per market (gpm=2) and 5 individuals per group (ind=5). True parameters 
##    in selection equation is wst=1, in outcome equation wst=0. 

## 1-a. Simulate individual-level, independent variables
 idata <- stabsim(m=200, ind=5, seed=123, gpm=2)
 head(idata)
 
## 1-b. Simulate group-level variables 
 mdata <- stabit(x=idata, simulation="NTU", method="model.frame",
 selection = list(add="wst"), outcome = list(add="wst"), verbose=FALSE)
 head(mdata$OUT)
 head(mdata$SEL)


## 2. Bias from sorting

## 2-a. Naive OLS estimation
 lm(R ~ wst.add, data=mdata$OUT)$coefficients

## 2-b. epsilon is correlated with independent variables
 with(mdata$OUT, cor(epsilon, wst.add))
 
## 2-c. but xi is uncorrelated with independent variables
 with(mdata$OUT, cor(xi, wst.add))

## 3. Correction of sorting bias when valuations V are observed

## 3-a. 1st stage: obtain fitted value for eta
lm.sel <- lm(V ~ -1 + wst.add, data=mdata$SEL)
lm.sel$coefficients

eta <- lm.sel$resid[mdata$SEL$D==1]

## 3-b. 2nd stage: control for eta
 lm(R ~ wst.add + eta, data=mdata$OUT)$coefficients


## 4. Run Gibbs sampler
 fit1 <- stabit(x=idata, method="NTU", simulation="NTU", censored=1, 
                selection = list(add="wst"), outcome = list(add="wst"), 
                niter=2000, verbose=FALSE)


## 5. Coefficient table
 summary(fit1)


## 6. Plot MCMC draws for coefficients
 plot(fit1)

## --- REPLICATION, Klein (2015a) ---

## 1. Load data 
 data(baac00); head(baac00)
 
## 2. Run Gibbs sampler
 klein15a <- stabit(x=baac00, selection = list(inv="pi",ieq="wst"), 
        outcome = list(add="pi",inv="pi",ieq="wst",
        add=c("loan_size","loan_size2","lngroup_agei")), offsetOut=1,
        method="NTU", binary=TRUE, gPrior=TRUE, marketFE=TRUE, niter=800000)

## 3. Marginal effects
 summary(klein15a, mfx=TRUE)
 
## 4. Plot MCMC draws for coefficients
 plot(klein15a)
}
}
\references{
Klein, T. (2015a). \href{https://ideas.repec.org/p/cam/camdae/1521.html}{Does Anti-Diversification Pay? A One-Sided Matching Model of Microcredit}.
\emph{Cambridge Working Papers in Economics}, #1521.

Zellner, A. (1986). \emph{On assessing prior distributions and Bayesian regression analysis with g-prior distributions}, 
volume 6, pages 233--243. North-Holland, Amsterdam.
}
\author{
Thilo Klein
}
\keyword{regression}
