% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/percentage_point_gap.R
\name{di_ppg}
\alias{di_ppg}
\title{Calculate disproportionate impact per the percentage point gap (PPG) method.}
\usage{
di_ppg(
  success,
  group,
  cohort,
  weight,
  reference = c("overall", "hpg", "all but current", unique(group)),
  data,
  min_moe = 0.03,
  use_prop_in_moe = FALSE,
  prop_sub_0 = 0.5,
  prop_sub_1 = 0.5,
  check_valid_reference = TRUE
)
}
\arguments{
\item{success}{A vector of success indicators (\code{1}/\code{0} or \code{TRUE}/\code{FALSE}) or an unquoted reference (name) to a column in \code{data} if it is specified.  It could also be a vector of counts, in which case \code{weight} (group size) should also be specified.}

\item{group}{A vector of group names of the same length as \code{success} or an unquoted reference (name) to a column in \code{data} if it is specified.}

\item{cohort}{(Optional) A vector of cohort names of the same length as \code{success} or an unquoted reference (name) to a column in \code{data} if it is specified.  Disproportionate impact is calculated for every group within each cohort.  When \code{cohort} is not specified, then the analysis assumes a single cohort.}

\item{weight}{(Optional) A vector of case weights of the same length as \code{success} or an unquoted reference (name) to a column in \code{data} if it is specified.  If \code{success} consists of counts instead of success indicators (1/0), then \code{weight} should also be specified to indicate the group size.}

\item{reference}{Either \code{'overall'} (default), \code{'hpg'} (highest performing group), \code{'all but current'} (success rate of everyone excluding the comparison group; also known as 'ppg minus 1'), a value from \code{group} (specifying a reference group), a single proportion (eg, 0.50), or a vector of proportions (one for each cohort).  Reference is used as a point of comparison for disproportionate impact for each group.  When \code{cohort} is specified:
\itemize{
  \item \code{'overall'} will use the overall success rate of each cohort group as the reference;
  \item \code{'hpg'} will use the highest performing group in each cohort as reference;
  \item  \code{'all but current'} will use the calculated success rate of each cohort group excluding the comparison group
  \item the success rate of the specified reference group from \code{group} in each cohort will be used;
  \item the specified proportion will be used for all cohorts;
  \item the specified vector of proportions will refer to the reference point for each cohort in alphabetical order (so the number of proportions should equal to the number of unique cohorts).
}}

\item{data}{(Optional) A data frame containing the variables of interest.  If \code{data} is specified, then \code{success}, \code{group}, and \code{cohort} will be searched within it.}

\item{min_moe}{The minimum margin of error (MOE) to be used in the calculation of disproportionate impact and is passed to \link{ppg_moe}.  Defaults to \code{0.03}.}

\item{use_prop_in_moe}{A logical value indicating whether or not the MOE formula should use the observed success rates (\code{TRUE}).  Defaults to \code{FALSE}, which uses 0.50 as the proportion in the MOE formula.  If \code{TRUE}, the success rates are passed to the \code{proportion} argument of \link{ppg_moe}.}

\item{prop_sub_0}{For cases where \code{proportion} is 0, substitute with \code{prop_sub_0} (defaults to 0.5) to account for the zero MOE.  This is relevant only when \code{use_prop_in_moe=TRUE}.}

\item{prop_sub_1}{For cases where \code{proportion} is 1, substitute with \code{prop_sub_1} (defaults to 0.5) to account for the zero MOE.  This is relevant only when \code{use_prop_in_moe=TRUE}.}

\item{check_valid_reference}{Check whether \code{reference} is a valid value; defaults to \code{TRUE}.  This argument exists to be used in \link{di_iterate} as when iterating DI calculations, there may be some scenarios where a specified reference group does not contain any students.}
}
\value{
A data frame consisting of:
\itemize{
  \item \code{cohort} (if used),
  \item \code{group},
  \item \code{n} (sample size),
  \item \code{success} (number of successes for the cohort-group),
  \item \code{pct} (proportion of successes for the cohort-group),
  \item \code{reference_group} (reference group used in DI calculation),
  \item \code{reference} (reference value used in DI calculation),
  \item \code{moe} (margin of error),
  \item \code{pct_lo} (lower 95\% confidence limit for pct),
  \item \code{pct_hi} (upper 95\% confidence limit for pct),
  \item \code{di_indicator} (1 if there is disproportionate impact, ie, when \code{pct_hi <= reference}), 
  \item \code{success_needed_not_di} (the number of additional successes needed in order to no longer be considered disproportionately impacted as compared to the reference), and
  \item \code{success_needed_full_parity} (the number of additional successes needed in order to achieve full parity with the reference).
}
}
\description{
Calculate disproportionate impact per the percentage point gap (PPG) method.
}
\details{
This function determines disproportionate impact based on the percentage point gap (PPG) method, as described in \href{https://www.cccco.edu/-/media/CCCCO-Website/About-Us/Divisions/Digital-Innovation-and-Infrastructure/Research/Files/PercentagePointGapMethod2017.ashx}{this} reference from the California Community Colleges Chancellor's Office.  It assumes that a higher rate is good ("success").  For rates that are deemed negative (eg, rate of drop-outs, high is bad), then consider looking at the converse of the non-success (eg, non drop-outs, high is good) instead in order to leverage this function properly.  Note that the margin of error (MOE) is calculated using using \code{1.96*sqrt(0.25^2/n)}, with a \code{min_moe} used as the minimum by default.
}
\examples{
library(dplyr)
data(student_equity)
# Vector
di_ppg(success=student_equity$Transfer
  , group=student_equity$Ethnicity) \%>\% as.data.frame
# Tidy and column reference
di_ppg(success=Transfer, group=Ethnicity, data=student_equity) \%>\%
  as.data.frame
# Cohort
di_ppg(success=Transfer, group=Ethnicity, cohort=Cohort
 , data=student_equity) \%>\%
  as.data.frame
# With custom reference (single)
di_ppg(success=Transfer, group=Ethnicity, reference=0.54
  , data=student_equity) \%>\%
  as.data.frame
# With custom reference (multiple)
di_ppg(success=Transfer, group=Ethnicity, cohort=Cohort
  , reference=c(0.5, 0.55), data=student_equity) \%>\%
  as.data.frame
# min_moe
di_ppg(success=Transfer, group=Ethnicity, data=student_equity
  , min_moe=0.02) \%>\%
  as.data.frame
# use_prop_in_moe
di_ppg(success=Transfer, group=Ethnicity, data=student_equity
  , min_moe=0.02
  , use_prop_in_moe=TRUE) \%>\%
  as.data.frame
}
\references{
California Community Colleges Chancellor's Office (2017).  \href{https://www.cccco.edu/-/media/CCCCO-Website/About-Us/Divisions/Digital-Innovation-and-Infrastructure/Research/Files/PercentagePointGapMethod2017.ashx}{Percentage Point Gap Method}.
}
