#Reading in data (p. 3-7)
splus<-read.table(file="h:/splus/splus.txt",na.strings='.',col.names=c('id',"age","tol","male","noff","ttof"))

#Basic operations (p. 8-10)
c(1,2,5)

c(0:10)

rep(1,10)

matrix(c(1,1,2,2),2,2)

array(rep(c(1,1,2,2,),2),dim=c(2,2,2))

splus[1:2,]

splus[1:2,3:4]

tf<-(c(1,2,5)<=c(2,4,1))


#Common statistical procedures (p. 11-16)
summary(splus$age)
hist(splus$age)
stem(splus$age)

summary(splus$tol)
t.test(splus$tol,mu=6)
t.test(splus[c(1:1000)*splus$male,3],splus[c(1:1000)*(1-splus$male),3])
t.test(splus[c(1:1000)*splus$male,3],splus[c(1:1000)*(1-splus$male),3],mu=.5,alternative="greater")

t.test(splus[splus$male==TRUE,3],splus[splus$male==FALSE,3])

x1<-as.category(splus$male)
hist(splus$noff,breaks=c(0:20))
y1<-as.category(cut(splus$noff,breaks=c(-1,0,1,100)))
table(x1,y1)
chisq.test(x1,y1)

myreg<-lm(age~noff+tol,data=splus)
myreg$coefficients[1:2]
myreg$residuals
summary(myreg)
plot(splus$tol,myreg$residuals)
plot(splus$noff,myreg$residuals)

plot(splus$tol,splus$age,ylim=c(14,23),xlab="",ylab="")
par(new=T)
plot(splus$tol,myreg$fitted.values,col=2,ylim=c(14,23),xlab="TOL",ylab="Age at TOL")

summary(splus$ttof)
ttof<-c(splus[is.na(splus[,6])==FALSE,6],splus[is.na(splus[,6])==TRUE,3])
summary(ttof)

cens<-c(rep(1,sum(is.na(splus[,6])==FALSE)),rep(0,sum(is.na(splus[,6])==TRUE))) ###0 = alive and 1 = dead###
kM<-survfit(Surv(ttof,cens)~rep(1,1000))
plot.survfit(kM)
male<-c(splus[is.na(splus[,6])==FALSE,4],splus[is.na(splus[,6])==TRUE,4])
kM<-survfit(Surv(ttof,cens)~male)
plot.survfit(kM,lty=c(1,2))
survdiff(Surv(ttof,cens)~male,rho=0)
survdiff(Surv(ttof,cens)~male,rho=1)

coxmale<-coxph(Surv(ttof,cens)~male)
summary(coxmale)

#Graphics (p. 17-18)
tol<-splus[,2]
ttof<-splus[,6]
plot(tol,ttof,xlab="Age at Time of License",ylab="Time to First Offense")
plot(tol,ttof,xlab="Age at Time of License",ylab="Time to First Offense",pch='.')

plot(tol[splus$male==TRUE],ttof[splus$male==TRUE],xlab="Age at Time of License",ylab="Time to First Offense",pch=25)
plot(tol[splus$male==FALSE],ttof[splus$male==FALSE],xlab="Age at Time of License",ylab="Time to First Offense",pch=26)

plot(tol[splus$male==TRUE],ttof[splus$male==TRUE],xlab="",ylab="",pch=25,xlim=c(15,21),ylim=c(0,8),cex=1.5,col=3)
par(new=T)
plot(tol[splus$male==FALSE],ttof[splus$male==FALSE],xlab="",ylab="",pch=26, xlim=c(15,21),ylim=c(0,8),cex=1.5,col=2)
legend(18.5,6,legend=c("Female","Male"),pch="\032\031",cex=1.5)
mtext("Time to First Offense",side=2,line=3,cex=1.25)
mtext("Age at Time of License",side=1,line=3,cex=1.25)
title("Age at TOL vs. Time to First Offense")

#Matrix operations (p. 19-24)
c(1,2,3)*c(4,5,6)

A<-matrix(c(1,.5,.5,3),2,2)
B<-matrix(c(2,4,3,5),2,2)
A%*%B

t(A%*%B)


solve(B)
solve(B,c(1.5,2.5))
B%*%solve(B)
B%*%solve(B,c(1.5,2.5))

chol(A)
t(chol(A))%*%chol(A)

qr.Q(qr(A))
qr.R(qr(A))
qr.Q(qr(A))%*%qr.R(qr(A))

eigen(A)
eigen(A)$values[1]*eigen(A)$vector[,1]%*%t(eigen(A)$vector[,1])+
eigen(A)$values[2]*eigen(A)$vector[,2]%*%t(eigen(A)$vector[,2])

#User-defined functions (p. 25)
mydet<-function(A,B){
detA<-Re(prod(eigen(A)$values))
detB<-Re(prod(eigen(B)$values))
detAB<-detA*detB
return(detA,detB,detAB)
}

A<-matrix(c(1,2,3,4),2,2)
B<-matrix(c(2,7,4,5),2,2)
testdet<-mydet(A,B)
testdet$detA
testdet$detB
testdet$detAB

#Robust statistics (p. 26-30)
myreg<-lm(age~noff+tol,data=splus)
myreg$coefficients
myreg2<-l1fit(cbind(splus$noff,splus$tol),splus$age)
myreg2$coefficients

hist(splus[,2],probability=T,xlim=c(14,23),ylim=c(0,1),xlab="",ylab="")
par(new=T)
plot(ksmooth(splus[,2],kernel="normal",bandwidth=1)$x,
ksmooth(splus[,2],kernel="normal",bandwidth=1)$y,type='l',xlim=c(14,23),ylim=c(0,1),ylab="",xlab="Age at TOL")

age<-splus[is.na(ttof)==0,2]
nttof<-ttof[is.na(ttof)==0]
plot(age,nttof,ylim=c(0,10),xlab="",ylab="")
par(new=T)
plot.loess(loess(nttof~age),ylim=c(0,10),xlab="",ylab="")
par(new=T)
plot.loess(loess(nttof~age,degree=1),ylim=c(0,10),lty=2,xlab="",ylab="")
par(new=T)
plot(smooth.spline(age,nttof)$x,smooth.spline(age,nttof)$y,type='l',ylim=c(0,10),lty=3,xlab="",ylab="")
par(new=T)
plot(smooth.spline(age,nttof,df=4)$x,smooth.spline(age,nttof,df=4)$y,type='l',ylim=c(0,10),lty=4,xlab="Time to First Offense",ylab="Age at TOL")

#Simulations (p. 31-33)
betalb<-rep(0,200)
betaub<-rep(0,200)
count<-0
for(i in 1:200){
x<-runif(30,0,10)
y<-1+2*x+rnorm(30)
myreg<-lm(y~x)
betalb[i]<-myreg$coefficients[2]-qt(.975,28)*sqrt(deviance(myreg)/28)/sqrt(29*var(x))
betaub[i]<-myreg$coefficients[2]+qt(.975,28)*sqrt(deviance(myreg)/28)/sqrt(29*var(x))
if(betalb[i]<2&&betaub[i]>2) count<-count+1
print(i)
}
print(count)

100-(sum(betalb>2)+sum(betaub<2))/2

y<-c(3,5,4,2,8,6,5,6,7,2)
cumsum(y)[c(4,7,10)]-c(0,cumsum(y)[c(4,7)])