Visualizing Models

Jared P. Lander

February 6, 2014

The Problem

Coefficients

                Coefficient     house1     house2     house3     house4
1               (Intercept)  4.329e+01  4.445e+01  4.023e+01  4.293e+01
2                     Units -1.881e-01 -1.478e-01 -1.054e-01 -8.337e-02
3                      SqFt  2.103e-04  2.078e-04  9.630e-05 -1.135e-05
4              BoroBrooklyn  3.456e+01  3.231e+01  2.844e+01  2.760e+01
5             BoroManhattan  1.310e+02  1.271e+02  1.157e+02  1.144e+02
6                BoroQueens  3.299e+01  2.980e+01  2.855e+01  2.707e+01
7         BoroStaten Island -3.630e+00 -7.543e+00 -1.564e+01 -1.594e+01
8                Units:SqFt         NA -2.256e-08         NA         NA
9       ClassR4-CONDOMINIUM         NA         NA  1.919e+01  1.677e+01
10      ClassR9-CONDOMINIUM         NA         NA  7.023e-01  9.370e+00
11      ClassRR-CONDOMINIUM         NA         NA -1.451e+01 -2.958e+01
12        SqFt:BoroBrooklyn         NA         NA -4.099e-05 -3.164e-05
13       SqFt:BoroManhattan         NA         NA  8.441e-05  9.979e-05
14          SqFt:BoroQueens         NA         NA -6.385e-05 -3.998e-05
15   SqFt:BoroStaten Island         NA         NA -1.739e-05 -1.628e-05
16 SqFt:ClassR4-CONDOMINIUM         NA         NA         NA  8.537e-05
17 SqFt:ClassR9-CONDOMINIUM         NA         NA         NA -8.415e-06
18 SqFt:ClassRR-CONDOMINIUM         NA         NA         NA  1.632e-04

The data

  Neighborhood          Class Units YearBuilt   SqFt   Income IncomePerSqFt
1    FINANCIAL R9-CONDOMINIUM    42      1920  36500  1332615         36.51
2    FINANCIAL R4-CONDOMINIUM    78      1985 126420  6633257         52.47
3    FINANCIAL RR-CONDOMINIUM   500        NA 554174 17310000         31.24
4    FINANCIAL R4-CONDOMINIUM   282      1930 249076 11776313         47.28
5      TRIBECA R4-CONDOMINIUM   239      1985 219495 10004582         45.58
6      TRIBECA R4-CONDOMINIUM   133      1986 139719  5127687         36.70
  Expense ExpensePerSqFt NetIncome    Value ValuePerSqFt      Boro
1  342005           9.37    990610  7300000        200.0 Manhattan
2 1762295          13.94   4870962 30690000        242.8 Manhattan
3 3543000           6.39  13767000 90970000        164.2 Manhattan
4 2784670          11.18   8991643 67556006        271.2 Manhattan
5 2783197          12.68   7221385 54320996        247.5 Manhattan
6 1497788          10.72   3629899 26737996        191.4 Manhattan

Fit the models

# fit models
house1 <- lm(ValuePerSqFt ~ Units + SqFt + Boro, data = housing)
house2 <- lm(ValuePerSqFt ~ Units * SqFt + Boro, data = housing)
house3 <- lm(ValuePerSqFt ~ Units + SqFt * Boro + Class, data = housing)
house4 <- lm(ValuePerSqFt ~ Units + SqFt * Boro + SqFt * Class, data = housing)
summary(house1)

Call:
lm(formula = ValuePerSqFt ~ Units + SqFt + Boro, data = housing)

Residuals:
    Min      1Q  Median      3Q     Max 
-164.42  -22.69    1.42   26.97  261.12 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        4.33e+01   5.33e+00    8.12  7.0e-16 ***
Units             -1.88e-01   2.21e-02   -8.51  < 2e-16 ***
SqFt               2.10e-04   2.09e-05   10.08  < 2e-16 ***
BoroBrooklyn       3.46e+01   5.54e+00    6.24  5.0e-10 ***
BoroManhattan      1.31e+02   5.38e+00   24.33  < 2e-16 ***
BoroQueens         3.30e+01   5.66e+00    5.83  6.3e-09 ***
BoroStaten Island -3.63e+00   9.99e+00   -0.36     0.72    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 43.4 on 2619 degrees of freedom
Multiple R-squared:  0.601, Adjusted R-squared:   0.6 
F-statistic:  657 on 6 and 2619 DF,  p-value: <2e-16
summary(house2)

Call:
lm(formula = ValuePerSqFt ~ Units * SqFt + Boro, data = housing)

Residuals:
    Min      1Q  Median      3Q     Max 
-163.98  -22.67    1.52   26.29  261.71 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        4.45e+01   5.32e+00    8.36  < 2e-16 ***
Units             -1.48e-01   2.38e-02   -6.22  5.9e-10 ***
SqFt               2.08e-04   2.08e-05    9.99  < 2e-16 ***
BoroBrooklyn       3.23e+01   5.54e+00    5.84  6.0e-09 ***
BoroManhattan      1.27e+02   5.43e+00   23.39  < 2e-16 ***
BoroQueens         2.98e+01   5.69e+00    5.24  1.7e-07 ***
BoroStaten Island -7.54e+00   9.99e+00   -0.75     0.45    
Units:SqFt        -2.26e-08   5.03e-09   -4.48  7.6e-06 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 43.2 on 2618 degrees of freedom
Multiple R-squared:  0.604, Adjusted R-squared:  0.603 
F-statistic:  570 on 7 and 2618 DF,  p-value: <2e-16
summary(house3)

Call:
lm(formula = ValuePerSqFt ~ Units + SqFt * Boro + Class, data = housing)

Residuals:
    Min      1Q  Median      3Q     Max 
-151.93  -22.16    0.26   25.18  254.79 

Coefficients:
                        Estimate Std. Error t value Pr(>|t|)    
(Intercept)             4.02e+01   5.49e+00    7.33  3.1e-13 ***
Units                  -1.05e-01   2.36e-02   -4.47  8.0e-06 ***
SqFt                    9.63e-05   2.57e-05    3.75  0.00018 ***
BoroBrooklyn            2.84e+01   5.64e+00    5.04  4.9e-07 ***
BoroManhattan           1.16e+02   5.56e+00   20.80  < 2e-16 ***
BoroQueens              2.86e+01   5.92e+00    4.83  1.5e-06 ***
BoroStaten Island      -1.56e+01   1.55e+01   -1.01  0.31448    
ClassR4-CONDOMINIUM     1.92e+01   2.38e+00    8.07  1.1e-15 ***
ClassR9-CONDOMINIUM     7.02e-01   3.63e+00    0.19  0.84643    
ClassRR-CONDOMINIUM    -1.45e+01   5.80e+00   -2.50  0.01246 *  
SqFt:BoroBrooklyn      -4.10e-05   2.85e-05   -1.44  0.15000    
SqFt:BoroManhattan      8.44e-05   1.34e-05    6.29  3.6e-10 ***
SqFt:BoroQueens        -6.39e-05   2.87e-05   -2.22  0.02636 *  
SqFt:BoroStaten Island -1.74e-05   1.49e-04   -0.12  0.90703    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 42.1 on 2612 degrees of freedom
Multiple R-squared:  0.625, Adjusted R-squared:  0.624 
F-statistic:  335 on 13 and 2612 DF,  p-value: <2e-16
summary(house4)

Call:
lm(formula = ValuePerSqFt ~ Units + SqFt * Boro + SqFt * Class, 
    data = housing)

Residuals:
    Min      1Q  Median      3Q     Max 
-156.27  -21.98    0.18   25.06  255.04 

Coefficients:
                          Estimate Std. Error t value Pr(>|t|)    
(Intercept)               4.29e+01   5.78e+00    7.42  1.6e-13 ***
Units                    -8.34e-02   2.41e-02   -3.46  0.00055 ***
SqFt                     -1.13e-05   8.14e-05   -0.14  0.88912    
BoroBrooklyn              2.76e+01   5.62e+00    4.91  9.8e-07 ***
BoroManhattan             1.14e+02   5.55e+00   20.61  < 2e-16 ***
BoroQueens                2.71e+01   5.90e+00    4.59  4.8e-06 ***
BoroStaten Island        -1.59e+01   1.55e+01   -1.03  0.30329    
ClassR4-CONDOMINIUM       1.68e+01   3.00e+00    5.59  2.5e-08 ***
ClassR9-CONDOMINIUM       9.37e+00   4.74e+00    1.98  0.04796 *  
ClassRR-CONDOMINIUM      -2.96e+01   8.14e+00   -3.64  0.00028 ***
SqFt:BoroBrooklyn        -3.16e-05   2.85e-05   -1.11  0.26691    
SqFt:BoroManhattan        9.98e-05   1.43e-05    6.98  3.8e-12 ***
SqFt:BoroQueens          -4.00e-05   2.96e-05   -1.35  0.17698    
SqFt:BoroStaten Island   -1.63e-05   1.48e-04   -0.11  0.91262    
SqFt:ClassR4-CONDOMINIUM  8.54e-05   7.69e-05    1.11  0.26700    
SqFt:ClassR9-CONDOMINIUM -8.41e-06   7.91e-05   -0.11  0.91528    
SqFt:ClassRR-CONDOMINIUM  1.63e-04   8.55e-05    1.91  0.05646 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 41.9 on 2609 degrees of freedom
Multiple R-squared:  0.629, Adjusted R-squared:  0.627 
F-statistic:  276 on 16 and 2609 DF,  p-value: <2e-16
require(coefplot)
require(reshape2)
houseCoef <- multiplot(house1, house2, house3, house4, plot = F)
houseCoef <- houseCoef[, c("Value", "Coefficient", "Model")]
houseCoefs <- dcast(Coefficient ~ Model, data = houseCoef, value.var = "Value")
houseCoefs
                Coefficient     house1     house2     house3     house4
1               (Intercept)  4.329e+01  4.445e+01  4.023e+01  4.293e+01
2                     Units -1.881e-01 -1.478e-01 -1.054e-01 -8.337e-02
3                      SqFt  2.103e-04  2.078e-04  9.630e-05 -1.135e-05
4              BoroBrooklyn  3.456e+01  3.231e+01  2.844e+01  2.760e+01
5             BoroManhattan  1.310e+02  1.271e+02  1.157e+02  1.144e+02
6                BoroQueens  3.299e+01  2.980e+01  2.855e+01  2.707e+01
7         BoroStaten Island -3.630e+00 -7.543e+00 -1.564e+01 -1.594e+01
8                Units:SqFt         NA -2.256e-08         NA         NA
9       ClassR4-CONDOMINIUM         NA         NA  1.919e+01  1.677e+01
10      ClassR9-CONDOMINIUM         NA         NA  7.023e-01  9.370e+00
11      ClassRR-CONDOMINIUM         NA         NA -1.451e+01 -2.958e+01
12        SqFt:BoroBrooklyn         NA         NA -4.099e-05 -3.164e-05
13       SqFt:BoroManhattan         NA         NA  8.441e-05  9.979e-05
14          SqFt:BoroQueens         NA         NA -6.385e-05 -3.998e-05
15   SqFt:BoroStaten Island         NA         NA -1.739e-05 -1.628e-05
16 SqFt:ClassR4-CONDOMINIUM         NA         NA         NA  8.537e-05
17 SqFt:ClassR9-CONDOMINIUM         NA         NA         NA -8.415e-06
18 SqFt:ClassRR-CONDOMINIUM         NA         NA         NA  1.632e-04

Clustering

  Alcohol Malic.acid   Ash Alcalinity.of.ash Magnesium Total.phenols Flavanoids
1   12.93      2.504 2.408             19.89    103.60         2.111      1.584
2   13.80      1.883 2.426             17.02    105.51         2.867      3.014
3   12.52      2.494 2.289             20.82     92.35         2.071      1.758
  Nonflavanoid.phenols Proanthocyanins Color.intensity    Hue
1               0.3884           1.503           5.650 0.8840
2               0.2853           1.910           5.703 1.0783
3               0.3901           1.452           4.087 0.9412
  OD280.OD315.of.diluted.wines Proline
1                        2.365   728.3
2                        3.114  1195.1
3                        2.491   458.2
  [1] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 2 2 1 2 2 2 2 2 2 1 1
 [38] 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 3 1 3 3 1 3 3 1 1 1 3 3 2
 [75] 1 3 3 3 1 3 3 1 1 3 3 3 3 3 1 1 3 3 3 3 3 1 1 3 1 3 1 3 3 3 1 3 3 3 3 1 3
[112] 3 1 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 1 3 3 1 1 1 1 3 3 3 1 1 3 3 1 1 3 1
[149] 1 3 3 3 3 1 1 1 3 1 1 1 3 1 3 1 1 3 1 1 1 1 3 3 1 1 1 1 1 3

The Data

  Cultivar Alcohol Malic.acid  Ash Alcalinity.of.ash Magnesium Total.phenols
1        1   14.23       1.71 2.43              15.6       127          2.80
2        1   13.20       1.78 2.14              11.2       100          2.65
3        1   13.16       2.36 2.67              18.6       101          2.80
4        1   14.37       1.95 2.50              16.8       113          3.85
5        1   13.24       2.59 2.87              21.0       118          2.80
6        1   14.20       1.76 2.45              15.2       112          3.27
  Flavanoids Nonflavanoid.phenols Proanthocyanins Color.intensity  Hue
1       3.06                 0.28            2.29            5.64 1.04
2       2.76                 0.26            1.28            4.38 1.05
3       3.24                 0.30            2.81            5.68 1.03
4       3.49                 0.24            2.18            7.80 0.86
5       2.69                 0.39            1.82            4.32 1.04
6       3.39                 0.34            1.97            6.75 1.05
  OD280.OD315.of.diluted.wines Proline
1                         3.92    1065
2                         3.40    1050
3                         3.17    1185
4                         3.45    1480
5                         2.93     735
6                         2.85    1450

Fit K-means

set.seed(278613)
wineK3 <- kmeans(x = wineTrain, centers = 3)
K-means clustering with 3 clusters of sizes 62, 47, 69

Cluster means:
  Alcohol Malic.acid   Ash Alcalinity.of.ash Magnesium Total.phenols Flavanoids
1   12.93      2.504 2.408             19.89    103.60         2.111      1.584
2   13.80      1.883 2.426             17.02    105.51         2.867      3.014
3   12.52      2.494 2.289             20.82     92.35         2.071      1.758
  Nonflavanoid.phenols Proanthocyanins Color.intensity    Hue
1               0.3884           1.503           5.650 0.8840
2               0.2853           1.910           5.703 1.0783
3               0.3901           1.452           4.087 0.9412
  OD280.OD315.of.diluted.wines Proline
1                        2.365   728.3
2                        3.114  1195.1
3                        2.491   458.2

Clustering vector:
  [1] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 2 2 1 2 2 2 2 2 2 1 1
 [38] 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 3 1 3 3 1 3 3 1 1 1 3 3 2
 [75] 1 3 3 3 1 3 3 1 1 3 3 3 3 3 1 1 3 3 3 3 3 1 1 3 1 3 1 3 3 3 1 3 3 3 3 1 3
[112] 3 1 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 1 3 3 1 1 1 1 3 3 3 1 1 3 3 1 1 3 1
[149] 1 3 3 3 3 1 1 1 3 1 1 1 3 1 3 1 1 3 1 1 1 1 3 3 1 1 1 1 1 3

Within cluster sum of squares by cluster:
[1]  566573 1360950  443167
 (between_SS / total_SS =  86.5 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
[6] "betweenss"    "size"         "iter"         "ifault"      

Correlation

             pce  psavert uempmed unemploy
pce       1.0000 -0.92712  0.5146  0.32442
psavert  -0.9271  1.00000 -0.3615 -0.07642
uempmed   0.5146 -0.36153  1.0000  0.78428
unemploy  0.3244 -0.07642  0.7843  1.00000

The Data

        date   pce    pop psavert uempmed unemploy
1 1967-06-30 507.8 198712     9.8     4.5     2944
2 1967-07-31 510.9 198911     9.8     4.7     2945
3 1967-08-31 516.7 199113     9.0     4.6     2958
4 1967-09-30 513.3 199311     9.8     4.9     3143
5 1967-10-31 518.5 199498     9.7     4.7     3066
6 1967-11-30 526.2 199657     9.4     4.8     3018
econCor <- cor(economics[, c(2, 4:6)])
econCor
             pce  psavert uempmed unemploy
pce       1.0000 -0.92712  0.5146  0.32442
psavert  -0.9271  1.00000 -0.3615 -0.07642
uempmed   0.5146 -0.36153  1.0000  0.78428
unemploy  0.3244 -0.07642  0.7843  1.00000

The Solution

Visualization

Coefficients


Call:
lm(formula = ValuePerSqFt ~ Units + SqFt + Boro, data = housing)

Residuals:
    Min      1Q  Median      3Q     Max 
-164.42  -22.69    1.42   26.97  261.12 

Coefficients:
                   Estimate Std. Error t value Pr(>|t|)    
(Intercept)        4.33e+01   5.33e+00    8.12  7.0e-16 ***
Units             -1.88e-01   2.21e-02   -8.51  < 2e-16 ***
SqFt               2.10e-04   2.09e-05   10.08  < 2e-16 ***
BoroBrooklyn       3.46e+01   5.54e+00    6.24  5.0e-10 ***
BoroManhattan      1.31e+02   5.38e+00   24.33  < 2e-16 ***
BoroQueens         3.30e+01   5.66e+00    5.83  6.3e-09 ***
BoroStaten Island -3.63e+00   9.99e+00   -0.36     0.72    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 43.4 on 2619 degrees of freedom
Multiple R-squared:  0.601, Adjusted R-squared:   0.6 
F-statistic:  657 on 6 and 2619 DF,  p-value: <2e-16

coefplot

as.matrix(coef(house1))
                        [,1]
(Intercept)        4.329e+01
Units             -1.881e-01
SqFt               2.103e-04
BoroBrooklyn       3.456e+01
BoroManhattan      1.310e+02
BoroQueens         3.299e+01
BoroStaten Island -3.630e+00
coefplot(house1, 
         title="Coefficient Plot: House 1", 
         sort="magnitude")

as.matrix(coef(house2))
                        [,1]
(Intercept)        4.445e+01
Units             -1.478e-01
SqFt               2.078e-04
BoroBrooklyn       3.231e+01
BoroManhattan      1.271e+02
BoroQueens         2.980e+01
BoroStaten Island -7.543e+00
Units:SqFt        -2.256e-08
coefplot(house2, 
         title="Coefficient Plot: House 2",
         sort="magnitude")

as.matrix(coef(house3))
                             [,1]
(Intercept)             4.023e+01
Units                  -1.054e-01
SqFt                    9.630e-05
BoroBrooklyn            2.844e+01
BoroManhattan           1.157e+02
BoroQueens              2.855e+01
BoroStaten Island      -1.564e+01
ClassR4-CONDOMINIUM     1.919e+01
ClassR9-CONDOMINIUM     7.023e-01
ClassRR-CONDOMINIUM    -1.451e+01
SqFt:BoroBrooklyn      -4.099e-05
SqFt:BoroManhattan      8.441e-05
SqFt:BoroQueens        -6.385e-05
SqFt:BoroStaten Island -1.739e-05
coefplot(house3, 
         title="Coefficient Plot: House 3",
         sort="magnitude")

as.matrix(coef(house4))
                               [,1]
(Intercept)               4.293e+01
Units                    -8.337e-02
SqFt                     -1.135e-05
BoroBrooklyn              2.760e+01
BoroManhattan             1.144e+02
BoroQueens                2.707e+01
BoroStaten Island        -1.594e+01
ClassR4-CONDOMINIUM       1.677e+01
ClassR9-CONDOMINIUM       9.370e+00
ClassRR-CONDOMINIUM      -2.958e+01
SqFt:BoroBrooklyn        -3.164e-05
SqFt:BoroManhattan        9.979e-05
SqFt:BoroQueens          -3.998e-05
SqFt:BoroStaten Island   -1.628e-05
SqFt:ClassR4-CONDOMINIUM  8.537e-05
SqFt:ClassR9-CONDOMINIUM -8.415e-06
SqFt:ClassRR-CONDOMINIUM  1.632e-04
coefplot(house4, 
         title="Coefficient Plot: House 4",
         sort="magnitude")

Numerous Models

multiplot

multiplot(house1, house2, house3, house4)
houseCoefs
                Coefficient     house1     house2     house3     house4
1               (Intercept)  4.329e+01  4.445e+01  4.023e+01  4.293e+01
2                     Units -1.881e-01 -1.478e-01 -1.054e-01 -8.337e-02
3                      SqFt  2.103e-04  2.078e-04  9.630e-05 -1.135e-05
4              BoroBrooklyn  3.456e+01  3.231e+01  2.844e+01  2.760e+01
5             BoroManhattan  1.310e+02  1.271e+02  1.157e+02  1.144e+02
6                BoroQueens  3.299e+01  2.980e+01  2.855e+01  2.707e+01
7         BoroStaten Island -3.630e+00 -7.543e+00 -1.564e+01 -1.594e+01
8                Units:SqFt         NA -2.256e-08         NA         NA
9       ClassR4-CONDOMINIUM         NA         NA  1.919e+01  1.677e+01
10      ClassR9-CONDOMINIUM         NA         NA  7.023e-01  9.370e+00
11      ClassRR-CONDOMINIUM         NA         NA -1.451e+01 -2.958e+01
12        SqFt:BoroBrooklyn         NA         NA -4.099e-05 -3.164e-05
13       SqFt:BoroManhattan         NA         NA  8.441e-05  9.979e-05
14          SqFt:BoroQueens         NA         NA -6.385e-05 -3.998e-05
15   SqFt:BoroStaten Island         NA         NA -1.739e-05 -1.628e-05
16 SqFt:ClassR4-CONDOMINIUM         NA         NA         NA  8.537e-05
17 SqFt:ClassR9-CONDOMINIUM         NA         NA         NA -8.415e-06
18 SqFt:ClassRR-CONDOMINIUM         NA         NA         NA  1.632e-04

Clustering

  Alcohol Malic.acid   Ash Alcalinity.of.ash Magnesium Total.phenols Flavanoids
1   12.93      2.504 2.408             19.89    103.60         2.111      1.584
2   13.80      1.883 2.426             17.02    105.51         2.867      3.014
3   12.52      2.494 2.289             20.82     92.35         2.071      1.758
  Nonflavanoid.phenols Proanthocyanins Color.intensity    Hue
1               0.3884           1.503           5.650 0.8840
2               0.2853           1.910           5.703 1.0783
3               0.3901           1.452           4.087 0.9412
  OD280.OD315.of.diluted.wines Proline
1                        2.365   728.3
2                        3.114  1195.1
3                        2.491   458.2
  [1] 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 2 2 1 1 2 2 1 2 2 2 2 2 2 1 1
 [38] 2 2 1 1 2 2 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 1 3 1 3 3 1 3 3 1 1 1 3 3 2
 [75] 1 3 3 3 1 3 3 1 1 3 3 3 3 3 1 1 3 3 3 3 3 1 1 3 1 3 1 3 3 3 1 3 3 3 3 1 3
[112] 3 1 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 1 3 3 1 1 1 1 3 3 3 1 1 3 3 1 1 3 1
[149] 1 3 3 3 3 1 1 1 3 1 1 1 3 1 3 1 1 3 1 1 1 1 3 3 1 1 1 1 1 3

Multidimensional Scaling

require(useful)
plot(wineK3, data = wineTrain)

Silhouette Plot

require(cluster)
wineDist <- daisy(x = wineTrain)^2
plot(silhouette(wineK3$cluster, wineDist), main = "Silhouette Plot")

Hierarchical Clustering

Dendrogram

wineH1 <- hclust(dist(wineTrain), method = "single")
plot(wineH1, labels = FALSE, main = "Single")
wineH2 <- hclust(dist(wineTrain), method = "complete")
plot(wineH2, labels = FALSE, main = "Complete")
wineH3 <- hclust(dist(wineTrain), method = "average")
plot(wineH3, labels = FALSE, main = "Average")
wineH4 <- hclust(dist(wineTrain), method = "centroid")
plot(wineH4, labels = FALSE, main = "Centroid")

Correlation

             pce  psavert uempmed unemploy
pce       1.0000 -0.92712  0.5146  0.32442
psavert  -0.9271  1.00000 -0.3615 -0.07642
uempmed   0.5146 -0.36153  1.0000  0.78428
unemploy  0.3244 -0.07642  0.7843  1.00000

Heatmap

require(scales)
econMelt <- melt(econCor, varnames=c("x", "y"), 
                    value.name="Correlation")
econMelt <- econMelt[order(econMelt$Correlation), ]

ggplot(econMelt, aes(x=x, y=y)) + 
    geom_tile(aes(fill=Correlation)) + 
    scale_fill_gradient2(low=muted("red"), mid="white", 
            high="steelblue", 
            guide=guide_colorbar(ticks=FALSE, barheight=10), 
            limits=c(-1, 1)) + 
    theme_minimal() + 
    labs(x=NULL, y=NULL)
heatmap(econCor)

Conclusions

Plots instead of tables

Serge Belongie:

Getting information out of a table is like getting sunshine out of a cucumber.

  • Linear Models: coefplot/multiplot
  • Clustering: multidimensional scaling, silhouette plot, dendrogram
  • Correlation: heatmap

R Packages

Jared P. Lander

The Tools