Monday, October 20, 2014
\[ Y = \boldsymbol{X}\boldsymbol{\beta} + \boldsymbol{\epsilon} \]
\[ \begin{bmatrix} Y_1 \\ Y_2 \\ Y_3 \\ \vdots \\ Y_n \end{bmatrix} = \begin{bmatrix} 1 & X_{11} & X_{12} & \dots & X_{1p} \\ 1 & X_{21} & X_{22} & \dots & X_{2p} \\ \vdots & \vdots & \vdots & \ddots & \vdots \\ 1 & X_{n1} & X_{n2} & \dots & X_{np} \end{bmatrix} \begin{bmatrix} \beta_0 \\ \beta_1 \\ \beta_2 \\ \vdots \\ \beta_p \end{bmatrix} + \begin{bmatrix} \epsilon_1 \\ \epsilon_2 \\ \epsilon_3 \\ \vdots \\ \epsilon_n \end{bmatrix} \]
\[ \boldsymbol{\hat{\beta}} = (\boldsymbol{X}^T\boldsymbol{X})^{-1}\boldsymbol{X}^T\boldsymbol{Y} \]
Acres | FamilyIncome | FamilyType | NumBedrooms | NumChildren | NumPeople |
---|---|---|---|---|---|
1-10 | 150 | Married | 4 | 1 | 3 |
1-10 | 180 | Female Head | 3 | 2 | 4 |
1-10 | 280 | Female Head | 4 | 0 | 2 |
1-10 | 330 | Female Head | 2 | 1 | 2 |
1-10 | 330 | Male Head | 3 | 1 | 2 |
1-10 | 480 | Male Head | 0 | 3 | 4 |
\[ {p \choose 1} + {p \choose 2} + \dots + {p \choose p-1} + {p \choose p} \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 \right] \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda \sum_{j=1}^p \beta_j^2 \right] \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[\sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda \sum_{j=1}^p |\beta_j| \right] \]
\[ \text{Ridge: } \beta_j^2 \text{ Lasso: } |\beta_j| \]
\[ \min_{\beta_{0},\beta \in \mathbb{R}^{p+1}} \left[ \frac{1}{2N} \sum_{i=1}^N \left( y_i - \beta_0 -x_i^T\beta \right)^2 + \lambda P_{\alpha} \left(\beta \right) \right] \] where \[ P_{\alpha} \left(\beta \right) = \left(1 - \alpha \right) \frac{1}{2}||\Gamma\beta||_{\mathit{l}_2}^2 + \alpha ||\Gamma\beta||_{\mathit{l}_1} \]
# build X acsX <- build.x(FamilyIncome ~ NumBedrooms + NumChildren + NumPeople + NumRooms + NumUnits + NumVehicles + NumWorkers + OwnRent + YearBuilt + ElectricBill + FoodStamp + HeatingFuel + Insurance + Language - 1, data=acs, contrasts=FALSE) # build Y acsY <- build.y(FamilyIncome ~ NumBedrooms + NumChildren + NumPeople + NumRooms + NumUnits + NumVehicles + NumWorkers + OwnRent + YearBuilt + ElectricBill + FoodStamp + HeatingFuel + Insurance + Language - 1, data=acs) set.seed(1863561) # run the cross-validated glmnet acsCV1 <- cv.glmnet(x=acsX, y=acsY, family="gaussian", nfold=5)
\[ \tilde{\beta}_j = \hat{\beta}_j I\left( |\hat{\beta}_j| \le a_n \right) \]
\[ r_i = y_i - x_i^T\tilde{\boldsymbol{\beta}} \]
\[ e_i = r_i - \bar{r} \]
\[ y_i^* = x_i^T\tilde{\boldsymbol{\beta}} + e_i^* \]
\[ \hat{\boldsymbol{\beta}}^* = \underset{u \in \mathbb{R}^p}{argmin} \left[ \sum_{i=1}^N (y_i^* - x_i^Tu)^2 + \hat{\lambda}\sum_{j=1}^p|u_j| \right] \]
\[ \left[ \hat{\beta} + \tilde{\beta} - \hat{\beta}_{1-\alpha/2}^*, \hat{\beta} + \tilde{\beta} - \hat{\beta}_{\alpha/2}^* \right] \]
\[ \tilde{\beta}_A(\lambda_{k+1}) = \underset{\beta_A \in \mathbb{R}^{|A|}}{\operatorname{argmin}} \left[\frac{1}{2} ||y - \boldsymbol{X}_A\beta_A||_2^2 + \lambda_{k+1}||\beta_A||_1 \right] \]
\[ T_k = \left( \mathopen\lt y,\boldsymbol{X}\hat{\beta}(\lambda_{k+1}) \mathclose\gt - \mathopen\lt y,\boldsymbol{X}_A\tilde{\beta}_A(\lambda_{k+1}) \mathclose\gt \right) / \sigma^2 \]
\[ T_k \overset{d}{\rightarrow} \text{Exp}(1) \]
set.seed(1863561) acsLasso <- lars(x=acsX, y=acsY) acsTest <- covTest(acsLasso, x = acsX, y = acsY)
Predictor_Number Drop_in_covariance P-value 39 2270.8716 0.000 4 55.6591 0.000 9 311.0317 0.000 28 45.0275 0.000 8 5.1157 0.006
Predictor_Number Drop_in_covariance P-value 12 0 1 -15 NA NA 7 0 1 -35 NA NA -30 NA NA
\[ \hat{\beta}^{en} = \underset{\beta \in \mathbb{R}^{p}}{\operatorname{argmin}} \left[\frac{1}{2} ||y - \boldsymbol{X}\beta||_2^2 + \lambda||\beta||_1 + \frac{\gamma}{2}||\beta||_2^2 \right] \]
\[ T_k = \left( \mathopen\lt y,\boldsymbol{X}\hat{\beta}^{en}(\lambda_{k+1},\gamma) \mathclose\gt - \mathopen\lt y,\boldsymbol{X}_A\tilde{\beta}_A^{en}(\lambda_{k+1},\gamma) \mathclose\gt \right) / \sigma^2 \]
\[ (1 + \gamma) \cdot T_k \overset{d}{\rightarrow} \text{Exp}(1) \]