# See if directory exists dir.exists('FootballTemp')
[1] FALSE
# create it dir.create('FootballTemp') # check again dir.exists('FootballTemp')
[1] TRUE
download.file('http://www.jaredlander.com/data/Football1415.tar.gz', destfile='FootballTemp/football.tar.gz', method='curl')
getXKCD('1168')
untar('FootballTemp/football.tar.gz', exdir='FootballFiles')
dir('FootballFiles')
[1] "pbp-2014.csv" "pbp-2015.csv"
unlink('FootballTemp/football.tar.gz') dir('FootballTemp')
character(0)
file.info('FootballFiles/pbp-2014.csv')
size isdir mode mtime FootballFiles/pbp-2014.csv 10280324 FALSE 666 2016-03-25 00:14:23 ctime atime exe FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no
dir('FootballFiles') %>% file.info
size isdir mode mtime ctime atime exe pbp-2014.csv NA NA <NA> <NA> <NA> <NA> <NA> pbp-2015.csv NA NA <NA> <NA> <NA> <NA> <NA>
dir('FootballFiles', full.names=TRUE) %>% file.info
size isdir mode mtime FootballFiles/pbp-2014.csv 10280324 FALSE 666 2016-03-25 00:14:23 FootballFiles/pbp-2015.csv 10671016 FALSE 666 2016-03-25 00:14:23 ctime atime exe FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no FootballFiles/pbp-2015.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no
file.rename(from=dir('FootballFiles', full.names=TRUE), to=sprintf('FootballFiles/Football%s.csv', 14:15))
[1] TRUE TRUE
dir('FootballFiles')
[1] "Football14.csv" "Football15.csv"
dir.create('FootballFiles/Backup') file.copy(dir('FootballFiles', full.names=TRUE, pattern='\\.csv'), sprintf('FootballFiles/Backup/Footballl%s.csv', 14:15))
[1] TRUE TRUE
dir('FootballFiles', recursive=TRUE)
[1] "Backup/Footballl14.csv" "Backup/Footballl15.csv" "Football14.csv" [4] "Football15.csv"
count.fields('FootballFiles/Football14.csv', sep=',') %>% head(15)
[1] 45 45 45 45 15 45 45 45 45 45 45 45 45 45 45
count.fields('FootballFiles/Football15.csv', sep=',') %>% head(15)
[1] 45 45 45 45 45 45 45 45 45 NA 15 45 45 45 45
system('wc -l FootballFiles/Football14.csv')
45696 FootballFiles/Football14.csv
system('wc -l FootballFiles/Football15.csv')
46278 FootballFiles/Football15.csv
dataPath <- 'FootballFiles' file.path(dataPath, 'Football14.csv')
[1] "FootballFiles/Football14.csv"
file.path(dataPath, 'Football15.csv')
[1] "FootballFiles/Football15.csv"
theFiles <- dir(dataPath, pattern='\\.csv', full.names=TRUE) games <- theFiles %>% map_df(read.csv2, sep=',', header=TRUE, stringsAsFactors=FALSE)
DT::datatable(data=games%>% slice(sample(nrow(games), size=500, replace=FALSE)), rownames=FALSE, options = list( dom = "rtiS", scrollY = 400, scrollX=TRUE, scrollCollapse = TRUE), filter=list(position='top') )
oneOff <- games %>% filter(OffenseTeam == 'NYG', PlayType %in% c('PASS', 'RUSH')) %>% mutate(PlayType=factor(PlayType, levels=c('RUSH', 'PASS')), Down=factor(Down, levels=c(1, 2, 3, 4)))
passRushMod <- glm(PlayType ~ Down + ToGo - 1, data=oneOff, family=binomial) coefplot(passRushMod, trans=arm::invlogit, title='Probability of Pass')
# make grid of scenarios scenarios <- expand.grid(ToGo=1:15, Down=1:4) %>% as.tbl %>% mutate(Down=factor(Down, levels=c(1, 2, 3, 4))) # make prediction based on model scenarioPredict <- predict(passRushMod, newdata=scenarios, type='response', se.fit=TRUE) # build confidence intervals scenarios <- scenarios %>% mutate(Prediction=scenarioPredict$fit, Lower=Prediction - 2*scenarioPredict$se.fit, Upper=Prediction + 2*scenarioPredict$se.fit)
knitr::kable(head(scenarios))
ToGo | Down | Prediction | Lower | Upper |
---|---|---|---|---|
1 | 1 | 0.2754536 | 0.2135514 | 0.3373558 |
2 | 1 | 0.2959441 | 0.2371832 | 0.3547051 |
3 | 1 | 0.3172914 | 0.2621339 | 0.3724488 |
4 | 1 | 0.3394361 | 0.2882498 | 0.3906223 |
5 | 1 | 0.3623061 | 0.3153154 | 0.4092968 |
6 | 1 | 0.3858171 | 0.3430379 | 0.4285962 |
ggplot(scenarios, aes(x=ToGo)) + scale_y_continuous(label=scales::percent) + geom_ribbon(aes(ymin=Lower, ymax=Upper), fill='lightgrey') + geom_line(aes(y=Prediction)) + facet_wrap(~Down, nrow=2)
eliPage <- read_html('http://www.pro-football-reference.com/players/M/MannEl00.htm')
eliStats <- eliPage %>% html_nodes("#passing") %>% html_table(header=TRUE) %>% getElement(1) useful::topleft(eliStats, c=7, r=8)
Year Age Tm Pos No. G GS 1 2004 23 NYG qb 10 9 7 2 2005 24 NYG QB 10 16 16 3 2006 25 NYG QB 10 16 16 4 2007 26 NYG QB 10 16 16 5 2008* 27 NYG QB 10 16 16 6 2009 28 NYG QB 10 16 16 7 2010 29 NYG QB 10 16 16 8 2011* 30 NYG QB 10 16 16
dir.create('results') ggsave('results/EliPass.png') write.table(eliStats, file='results/eliStats.csv', sep=',', row.names=FALSE)
[1] TRUE
[1] TRUE
repo <- repository(getwd()) add(repo, file.path('results', c('eliPass.png', 'eliStats.csv'))) commit(repo, message='Tracking plot and csv') push(repo)
footballResults <- mime( To = "jared@landeranalytics.com", From = "jared@jaredlander.com", Subject = "Eli Results", body = "See the attached graph and data.") %>% attach_file('results/EliPass.png') %>% attach_file('results/eliStats.csv') send_message(footballResults)