# See if directory exists
dir.exists('FootballTemp')
[1] FALSE
# create it
dir.create('FootballTemp')
# check again
dir.exists('FootballTemp')
[1] TRUE
download.file('http://www.jaredlander.com/data/Football1415.tar.gz',
destfile='FootballTemp/football.tar.gz',
method='curl')
getXKCD('1168')
untar('FootballTemp/football.tar.gz', exdir='FootballFiles')
dir('FootballFiles')
[1] "pbp-2014.csv" "pbp-2015.csv"
unlink('FootballTemp/football.tar.gz')
dir('FootballTemp')
character(0)
file.info('FootballFiles/pbp-2014.csv')
size isdir mode mtime
FootballFiles/pbp-2014.csv 10280324 FALSE 666 2016-03-25 00:14:23
ctime atime exe
FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no
dir('FootballFiles') %>% file.info
size isdir mode mtime ctime atime exe pbp-2014.csv NA NA <NA> <NA> <NA> <NA> <NA> pbp-2015.csv NA NA <NA> <NA> <NA> <NA> <NA>
dir('FootballFiles', full.names=TRUE) %>% file.info
size isdir mode mtime
FootballFiles/pbp-2014.csv 10280324 FALSE 666 2016-03-25 00:14:23
FootballFiles/pbp-2015.csv 10671016 FALSE 666 2016-03-25 00:14:23
ctime atime exe
FootballFiles/pbp-2014.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no
FootballFiles/pbp-2015.csv 2016-04-04 22:48:47 2016-04-04 22:48:47 no
file.rename(from=dir('FootballFiles', full.names=TRUE),
to=sprintf('FootballFiles/Football%s.csv', 14:15))
[1] TRUE TRUE
dir('FootballFiles')
[1] "Football14.csv" "Football15.csv"
dir.create('FootballFiles/Backup')
file.copy(dir('FootballFiles', full.names=TRUE, pattern='\\.csv'),
sprintf('FootballFiles/Backup/Footballl%s.csv', 14:15))
[1] TRUE TRUE
dir('FootballFiles', recursive=TRUE)
[1] "Backup/Footballl14.csv" "Backup/Footballl15.csv" "Football14.csv" [4] "Football15.csv"
count.fields('FootballFiles/Football14.csv', sep=',') %>% head(15)
[1] 45 45 45 45 15 45 45 45 45 45 45 45 45 45 45
count.fields('FootballFiles/Football15.csv', sep=',') %>% head(15)
[1] 45 45 45 45 45 45 45 45 45 NA 15 45 45 45 45
system('wc -l FootballFiles/Football14.csv')
45696 FootballFiles/Football14.csv
system('wc -l FootballFiles/Football15.csv')
46278 FootballFiles/Football15.csv
dataPath <- 'FootballFiles' file.path(dataPath, 'Football14.csv')
[1] "FootballFiles/Football14.csv"
file.path(dataPath, 'Football15.csv')
[1] "FootballFiles/Football15.csv"
theFiles <- dir(dataPath, pattern='\\.csv', full.names=TRUE) games <- theFiles %>% map_df(read.csv2, sep=',', header=TRUE, stringsAsFactors=FALSE)
DT::datatable(data=games%>% slice(sample(nrow(games), size=500, replace=FALSE)),
rownames=FALSE,
options = list(
dom = "rtiS",
scrollY = 400, scrollX=TRUE,
scrollCollapse = TRUE),
filter=list(position='top')
)
oneOff <- games %>%
filter(OffenseTeam == 'NYG', PlayType %in% c('PASS', 'RUSH')) %>%
mutate(PlayType=factor(PlayType, levels=c('RUSH', 'PASS')),
Down=factor(Down, levels=c(1, 2, 3, 4)))
passRushMod <- glm(PlayType ~ Down + ToGo - 1, data=oneOff, family=binomial) coefplot(passRushMod, trans=arm::invlogit, title='Probability of Pass')
# make grid of scenarios
scenarios <- expand.grid(ToGo=1:15, Down=1:4) %>% as.tbl %>%
mutate(Down=factor(Down, levels=c(1, 2, 3, 4)))
# make prediction based on model
scenarioPredict <- predict(passRushMod,
newdata=scenarios, type='response', se.fit=TRUE)
# build confidence intervals
scenarios <- scenarios %>% mutate(Prediction=scenarioPredict$fit,
Lower=Prediction - 2*scenarioPredict$se.fit,
Upper=Prediction + 2*scenarioPredict$se.fit)
knitr::kable(head(scenarios))
| ToGo | Down | Prediction | Lower | Upper |
|---|---|---|---|---|
| 1 | 1 | 0.2754536 | 0.2135514 | 0.3373558 |
| 2 | 1 | 0.2959441 | 0.2371832 | 0.3547051 |
| 3 | 1 | 0.3172914 | 0.2621339 | 0.3724488 |
| 4 | 1 | 0.3394361 | 0.2882498 | 0.3906223 |
| 5 | 1 | 0.3623061 | 0.3153154 | 0.4092968 |
| 6 | 1 | 0.3858171 | 0.3430379 | 0.4285962 |
ggplot(scenarios, aes(x=ToGo)) + scale_y_continuous(label=scales::percent) +
geom_ribbon(aes(ymin=Lower, ymax=Upper), fill='lightgrey') +
geom_line(aes(y=Prediction)) + facet_wrap(~Down, nrow=2)
eliPage <- read_html('http://www.pro-football-reference.com/players/M/MannEl00.htm')
eliStats <- eliPage %>% html_nodes("#passing") %>%
html_table(header=TRUE) %>% getElement(1)
useful::topleft(eliStats, c=7, r=8)
Year Age Tm Pos No. G GS 1 2004 23 NYG qb 10 9 7 2 2005 24 NYG QB 10 16 16 3 2006 25 NYG QB 10 16 16 4 2007 26 NYG QB 10 16 16 5 2008* 27 NYG QB 10 16 16 6 2009 28 NYG QB 10 16 16 7 2010 29 NYG QB 10 16 16 8 2011* 30 NYG QB 10 16 16
dir.create('results')
ggsave('results/EliPass.png')
write.table(eliStats, file='results/eliStats.csv', sep=',', row.names=FALSE)
[1] TRUE
[1] TRUE
repo <- repository(getwd())
add(repo, file.path('results', c('eliPass.png', 'eliStats.csv')))
commit(repo, message='Tracking plot and csv')
push(repo)
footballResults <- mime(
To = "jared@landeranalytics.com",
From = "jared@jaredlander.com",
Subject = "Eli Results",
body = "See the attached graph and data.") %>%
attach_file('results/EliPass.png') %>%
attach_file('results/eliStats.csv')
send_message(footballResults)