Jared P. Lander
Python in Finance
April 5, 2013
# Pandas
import pandas as pd
import pandas.rpy.common as rpy
# make R interaction easy
%load_ext rmagic
att = pd.read_csv("../../data/att.csv")
print(att.head())
## T.Open T.High T.Low T.Close T.Volume T.Adjusted Date
## 1 35.67 35.78 34.78 34.95 33694300 25.06 2007-01-03
## 2 34.95 35.24 34.07 34.50 44285400 24.74 2007-01-04
## 3 34.40 34.54 33.95 33.96 36561800 24.35 2007-01-05
## 4 33.40 34.01 33.21 33.81 40237400 24.50 2007-01-08
## 5 33.85 34.41 33.66 33.94 40082600 24.59 2007-01-09
## 6 34.20 35.00 31.94 34.03 29964300 24.66 2007-01-10
# function to load pandas DataFrame into R
# written by Wes McKinney
def pandas_to_r(df, name):
from rpy2.robjects import r, globalenv
r_df = rpy.convert_to_r_dataframe(df)
globalenv[name] = r_df
pandas_to_r(att, "att")
%%R
print(head(att))
## T.Open T.High T.Low T.Close T.Volume T.Adjusted Date
## 1 35.67 35.78 34.78 34.95 33694300 25.06 2007-01-03
## 2 34.95 35.24 34.07 34.50 44285400 24.74 2007-01-04
## 3 34.40 34.54 33.95 33.96 36561800 24.35 2007-01-05
## 4 33.40 34.01 33.21 33.81 40237400 24.50 2007-01-08
## 5 33.85 34.41 33.66 33.94 40082600 24.59 2007-01-09
## 6 34.20 35.00 31.94 34.03 29964300 24.66 2007-01-10
%%R
require(quantmod)
require(xts)
require(forecast)
%%R
# convert to date
att$Date <- as.Date(as.character(att$Date))
att$T.Close <- as.numeric(att$T.Close)
attClose <- xts(att$T.Close, order.by=att$Date)
%%R
require(ggplot2)
p <- ggplot(att, aes(x=Date, y=T.Close)) + geom_line()
print(p + geom_smooth())
%%R
attFit <- auto.arima(ts(attClose))
attFit
## Series: ts(attClose)
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -0.069 -0.113
## s.e. 0.025 0.026
##
## sigma^2 estimated as 0.236: log likelihood=-1093
## AIC=2193 AICc=2193 BIC=2209
Generalized AutoRegressive Conditional Heteroskedasticity
%%R
require(rugarch)
attSpec <- ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1, 1)),
mean.model=list(armaOrder=c(1, 1)), distribution.model="std")
attGarch <- ugarchfit(spec=attSpec, data=attClose)
print(attGarch@fit$matcoef)
print(infocriteria(attGarch))
## Estimate Std. Error t value Pr(>|t|)
## mu 35.159848 1.3282100 26.4716 0.000e+00
## ar1 0.997009 0.0013019 765.8227 0.000e+00
## ma1 -0.009937 0.0268014 -0.3708 7.108e-01
## omega 0.001335 0.0006916 1.9297 5.365e-02
## alpha1 0.069952 0.0149684 4.6733 2.964e-06
## beta1 0.925012 0.0153999 60.0662 0.000e+00
## shape 7.581676 1.4048336 5.3968 6.782e-08
##
## Akaike 0.9975
## Bayes 1.0214
## Shibata 0.9975
## Hannan-Quinn 1.0064
%%R
# ARMA(1,1)
attSpec1 <- ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1, 1)),
mean.model=list(armaOrder=c(1, 1)), distribution.model="std")
# ARMA(0,0)
attSpec2 <- ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1, 1)),
mean.model=list(armaOrder=c(0, 0)), distribution.model="std")
# ARMA(0,2)
attSpec3 <- ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1, 1)),
mean.model=list(armaOrder=c(0, 2)), distribution.model="std")
# ARMA(1,2)
attSpec4 <- ugarchspec(variance.model=list(model="sGARCH", garchOrder=c(1, 1)),
mean.model=list(armaOrder=c(1, 2)), distribution.model="std")
attGarch1 <- ugarchfit(spec=attSpec1, data=attClose)
attGarch2 <- ugarchfit(spec=attSpec2, data=attClose)
attGarch3 <- ugarchfit(spec=attSpec3, data=attClose)
attGarch4 <- ugarchfit(spec=attSpec4, data=attClose)
%%R
print(infocriteria(attGarch1)[1])
print(infocriteria(attGarch2)[1])
print(infocriteria(attGarch3)[1])
print(infocriteria(attGarch4)[1])
## [1] 0.9975
## [1] 5.109
## [1] 3.406
## [1] 0.9963
%%R
attPred <- ugarchboot(attGarch, n.ahead=50, method = c("Partial", "Full")[1])
plot(attPred, which=1)
acs = pd.read_csv("../../data/acs_ny.csv")
pandas_to_r(acs, "acs")
%%R
require(useful)
acs$Income <- with(acs, FamilyIncome >= 150000)
acsX <- build.x(Income ~ NumBedrooms + NumChildren + NumPeople + NumRooms + NumUnits +
NumVehicles + NumWorkers + OwnRent + YearBuilt + ElectricBill +
FoodStamp + HeatingFuel + Insurance + Language - 1, data=acs, contrasts=FALSE)
acsY <- build.y(Income ~ NumBedrooms + NumChildren + NumPeople + NumRooms + NumUnits +
NumVehicles + NumWorkers + OwnRent + YearBuilt + ElectricBill +
FoodStamp + HeatingFuel + Insurance + Language - 1, data=acs)
require(glmnet)
# run the cross-validated glmnet
acsCV1 <- cv.glmnet(x=acsX, y=acsY, family="binomial", nfold=5)
%%R
theCoef <- as.matrix(coef(acsCV1, s="lambda.1se"))
coefDF <- data.frame(Value=theCoef, Coefficient=rownames(theCoef))
coefDF <- coefDF[nonzeroCoef(coef(acsCV1, s="lambda.1se")), ]
g <- ggplot(coefDF, aes(x=X1, y=reorder(Coefficient, X1))) +
geom_vline(xintercept=0, color="grey", linetype=2) +
geom_point(color="blue") +
labs(x="Value", y="Coefficient", title="Coefficient Plot")
Owner of JP Lander Consulting
Adjunct Professor at Columbia University
Organizer of New York Open Statistical Programming Meetup
Author of R for Non-Statisticans (August 2013)
Twitter: @jaredlander
Website: http://www.jaredlander.com