Giants

Compressed Data Online

Create Directory

# See if directory exists
dir.exists('FootballTemp')
[1] FALSE
# create it
dir.create('FootballTemp')
# check again
dir.exists('FootballTemp')
[1] TRUE

Download Files

download.file('http://www.jaredlander.com/data/Football1415.tar.gz', 
              destfile='FootballTemp/football.tar.gz', 
              method='curl')

Untar

getXKCD('1168')

Untar the File

untar('FootballTemp/football.tar.gz', exdir='FootballFiles')

Did They Extract?

dir('FootballFiles')
[1] "pbp-2014.csv" "pbp-2015.csv"

Delete Tar

unlink('FootballTemp/football.tar.gz')
dir('FootballTemp')
character(0)

Inspect One File

file.info('FootballFiles/pbp-2014.csv')
                               size isdir mode               mtime
FootballFiles/pbp-2014.csv 10280324 FALSE  666 2016-03-25 00:14:23
                                         ctime               atime exe
FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47  no

Inspect All Files

dir('FootballFiles') %>% file.info
             size isdir mode mtime ctime atime  exe
pbp-2014.csv   NA    NA <NA>  <NA>  <NA>  <NA> <NA>
pbp-2015.csv   NA    NA <NA>  <NA>  <NA>  <NA> <NA>

Inspect All Files

dir('FootballFiles', full.names=TRUE) %>% file.info
                               size isdir mode               mtime
FootballFiles/pbp-2014.csv 10280324 FALSE  666 2016-03-25 00:14:23
FootballFiles/pbp-2015.csv 10671016 FALSE  666 2016-03-25 00:14:23
                                         ctime               atime exe
FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47  no
FootballFiles/pbp-2015.csv 2016-04-04 22:48:47 2016-04-04 22:48:47  no

Better Names

file.rename(from=dir('FootballFiles', full.names=TRUE), 
            to=sprintf('FootballFiles/Football%s.csv', 14:15))
[1] TRUE TRUE

Better Names

dir('FootballFiles')
[1] "Football14.csv" "Football15.csv"

Make Copies

dir.create('FootballFiles/Backup')
file.copy(dir('FootballFiles', full.names=TRUE, pattern='\\.csv'), 
          sprintf('FootballFiles/Backup/Footballl%s.csv', 14:15))
[1] TRUE TRUE

Make Copies

dir('FootballFiles', recursive=TRUE)
[1] "Backup/Footballl14.csv" "Backup/Footballl15.csv" "Football14.csv"        
[4] "Football15.csv"        

Count Columns

count.fields('FootballFiles/Football14.csv', sep=',') %>% head(15)
 [1] 45 45 45 45 15 45 45 45 45 45 45 45 45 45 45
count.fields('FootballFiles/Football15.csv', sep=',') %>% head(15)
 [1] 45 45 45 45 45 45 45 45 45 NA 15 45 45 45 45

Line Count

system('wc -l FootballFiles/Football14.csv')
45696 FootballFiles/Football14.csv
system('wc -l FootballFiles/Football15.csv')
46278 FootballFiles/Football15.csv

Reference Files

dataPath <- 'FootballFiles'
file.path(dataPath, 'Football14.csv')
[1] "FootballFiles/Football14.csv"
file.path(dataPath, 'Football15.csv')
[1] "FootballFiles/Football15.csv"

Read Data

theFiles <- dir(dataPath, pattern='\\.csv', full.names=TRUE)
games <- theFiles %>% map_df(read.csv2, sep=',', header=TRUE, stringsAsFactors=FALSE)

See the Data

DT::datatable(data=games%>% slice(sample(nrow(games), size=500, replace=FALSE)), 
              rownames=FALSE,
              options = list(
                  dom = "rtiS",
                  scrollY = 400, scrollX=TRUE,
                  scrollCollapse = TRUE),
              filter=list(position='top')
)

See the Data

Pass vs Rush

Focus on One Team's Offense

oneOff <- games %>%
    filter(OffenseTeam == 'NYG', PlayType %in% c('PASS', 'RUSH')) %>%
    mutate(PlayType=factor(PlayType, levels=c('RUSH', 'PASS')), 
           Down=factor(Down, levels=c(1, 2, 3, 4)))

Probability of a Pass

passRushMod <- glm(PlayType ~ Down + ToGo - 1, data=oneOff, family=binomial)
coefplot(passRushMod, trans=arm::invlogit, title='Probability of Pass')

Scenarios

# make grid of scenarios
scenarios <- expand.grid(ToGo=1:15, Down=1:4) %>% as.tbl %>% 
    mutate(Down=factor(Down, levels=c(1, 2, 3, 4)))
# make prediction based on model
scenarioPredict <- predict(passRushMod, 
                           newdata=scenarios, type='response', se.fit=TRUE)
# build confidence intervals
scenarios <- scenarios %>% mutate(Prediction=scenarioPredict$fit, 
                                  Lower=Prediction - 2*scenarioPredict$se.fit,
                                  Upper=Prediction + 2*scenarioPredict$se.fit)

Scenarios

knitr::kable(head(scenarios))
ToGo Down Prediction Lower Upper
1 1 0.2754536 0.2135514 0.3373558
2 1 0.2959441 0.2371832 0.3547051
3 1 0.3172914 0.2621339 0.3724488
4 1 0.3394361 0.2882498 0.3906223
5 1 0.3623061 0.3153154 0.4092968
6 1 0.3858171 0.3430379 0.4285962

Probability of Pass

ggplot(scenarios, aes(x=ToGo)) + scale_y_continuous(label=scales::percent) +
    geom_ribbon(aes(ymin=Lower, ymax=Upper), fill='lightgrey') +
    geom_line(aes(y=Prediction)) + facet_wrap(~Down, nrow=2)

Get Eli's Stats

eliPage <- read_html('http://www.pro-football-reference.com/players/M/MannEl00.htm')
eliStats <- eliPage %>% html_nodes("#passing") %>% 
    html_table(header=TRUE) %>% getElement(1)
useful::topleft(eliStats, c=7, r=8)
   Year Age  Tm Pos No.  G GS
1  2004  23 NYG  qb  10  9  7
2  2005  24 NYG  QB  10 16 16
3  2006  25 NYG  QB  10 16 16
4  2007  26 NYG  QB  10 16 16
5 2008*  27 NYG  QB  10 16 16
6  2009  28 NYG  QB  10 16 16
7  2010  29 NYG  QB  10 16 16
8 2011*  30 NYG  QB  10 16 16

Save Them

dir.create('results')
ggsave('results/EliPass.png')
write.table(eliStats, file='results/eliStats.csv', sep=',', row.names=FALSE)
[1] TRUE
[1] TRUE

Commit Them

repo <- repository(getwd())
add(repo, file.path('results', c('eliPass.png', 'eliStats.csv')))
commit(repo, message='Tracking plot and csv')
push(repo)

Email Them

footballResults <- mime(
    To = "jared@landeranalytics.com",
    From = "jared@jaredlander.com",
    Subject = "Eli Results",
    body = "See the attached graph and data.") %>% 
    attach_file('results/EliPass.png') %>% 
    attach_file('results/eliStats.csv')
send_message(footballResults)

Things We've Done

  • Create Directories
  • Query Directories
  • Untar Files
  • Read XKCD
  • Delete Files
  • Get File Info
  • Move Files
  • Copy Files
  • Count Columns
  • Run System Commands
  • Build File Paths
  • Read Data
  • Munge Data
  • Fit a GLM
  • Make Predictions
  • Generate Plots
  • Save Files
  • Scrape a Website
  • Commit and Push to Git
  • Send an Email

Jared P. Lander

Packages

The Tools