R Chapter 8


#Data

x1=c(0.794, 0.629, 0.597, 0.57, 0.524, 0.891, 0.707, 0.405, 0.808, 0.733,
0.616, 0.922, 0.649, 0.522, 0.988, 0.489, 0.398, 0.412, 0.423, 0.73,
0.603, 0.481, 0.952, 0.563, 0.986, 0.861, 0.633, 1.002, 0.973, 0.894,
0.958, 0.478, 0.669, 1.305, 0.494, 0.484, 0.878, 0.794, 0.591, 0.532,
0.685, 0.694, 0.672, 0.511, 0.776, 0.93, 0.508, 0.459, 0.816, 0.595)

x2=c(0.503, 0.5, 0.868, 0.54, 0.818, 0.608, 0.389, 0.48, 1.153, 0.838,
0.526, 0.81, 0.584, 0.422, 0.427, 0.39, 0.53, 0.411, 0.567, 0.806,
0.739, 0.655, 0.54, 0.418, 0.445, 0.46, 0.537, 0.53, 0.499, 0.512,
0.444, 0.611, 0.713, 0.653, 0.727, 0.649, 0.547, 0.463, 0.35, 0.689,
0.444, 0.431, 0.505, 0.676, 0.495, 0.652, 0.566, 0.629, 0.493, 0.428)

#Graphing Distributions

boxplot(x1,x2,names=c("Condition 1","Condition 2"),ylab="Response Time (sec)",col=c('lightblue','yellow'))

#histograms
#problems: 1 per plot
par(mfrow=c(2,1))
hist(x1,prob=T,col='lightblue',xlab="Response Time (sec)",main="Condition 1")
hist(x2,prob=T,col='lightblue',xlab="Response Time (sec)",main="Condition 2")

#check this out! kind of useless
dev.off()
hist(x1,prob=T,col=rgb(1,0,0,.2),xlab="Response Time (sec)",main="",ylim=c(0,3.5))
hist(x2,prob=T,col=rgb(1,1,0,.2),xlab="Response Time (sec)",main="",ylim=c(0,3.5),add=T)

#other problem bin size:
#ok
hist(x1,prob=T,col='lightblue',xlab="Response Time (sec)",main="Condition 1")

#too fine
hist(x1,prob=T,col='lightblue',xlab="Response Time (sec)",main="Condition 1",breaks=200)

#too coarse
hist(x1,prob=T,col='lightblue',xlab="Response Time (sec)",main="Condition 1",breaks=2)

#Roger's surefire histogram bins, use quantiles
#I think they are ugly and dont recommend them
hist(x1,prob=T,main="",xlab="Response Time",xlim=c(.3,1.5),
breaks=quantile(x1,seq(0,1,.2)),col='lightblue')

####Smoothed Histograms
plot(density(x2),xlim=c(.2,1.5),lty=2,lwd=2,main="",xlab="Response Time (sec)")
lines(density(x1),lwd=2)

#####ECDFs
plot(ecdf(x1),col.points='blue',main="",xlab="Response Time (sec)",
ylab="Cumulative Probability")
lines(ecdf(x2))
legend(1,.4,legend=c("Condition 1","Condition 2"), fill=c('blue','black'))

#####Stripchart with overlap
stripchart(list(x1,x2),xlim=c(.2,1.5),ylim=c(.5,2.5),
group.names=c("Condition 1","Condition 2"), pch=20,col=c(rgb(1,0,0,.2),rgb(0,0,1,.2)),cex=2,
xlab="Response Time (sec)")

#####Moments and Quantiles:

mat=cbind(x1,x2)
apply(mat,2,mean)
apply(mat,2,sd)

skewness <- function(x) {
m3 <- mean((x-mean(x))^3)
m3/(sd(x)^3)
}

kurtosis <- function(x) {
m4 = mean((x-mean(x))^4)
m4/(sd(x)^4)-3
}

apply(mat,2,skewness)
apply(mat,2,kurtosis)

QQ Plots

#sort data is always good

range=c(.2,1.5)
plot(sort(x1),sort(x2),xlim=range,ylim=range)
abline(0,1)
plot(sort(x2),sort(x1),xlim=range,ylim=range)
abline(0,1)

#suppose not the same numbers
p=seq(.1,.9,.2)
q1=quantile(x1,p)
q2=quantile(x2,p)

range=c(.2,1.5)
plot(q2,q1,xlim=range,ylim=range)
abline(0,1)

qqplot(x2,x1)
abline(0,1)

a1=rnorm(1000)
b1=rnorm(10)
qqplot(a1,b1)

a1=rnorm(1000)
b1=rnorm(1000)
qqplot(a1,b1)

##################################

#QQPlots for location changes

a=rnorm(1000,0,1)
b=rnorm(1000,1,1)
qqplot(a,b)
abline(0,1)

a=rweibull(1000,shape=1.5,scale=1)+.5
b=rweibull(1000,shape=1.5,scale=1)+1.5
qqplot(a,b)
abline(0,1)

###################################

#QQplots for scale changes

a=rnorm(1000,0,1)
b=rnorm(1000,0,2)
qqplot(a,b)
abline(0,1)

a=rweibull(1000,shape=1.5,scale=1)+1
b=rweibull(1000,shape=1.5,scale=2)+1
qqplot(a,b)
abline(0,1)

#####################################

#Shape Changes

a=rnorm(1000,0,1)
b=rt(1000,3)
qqplot(a,b)
abline(0,1)

a=rweibull(1000,shape=1.2,scale=1)
b=rweibull(1000,shape=2,scale=1)
qqplot(a,b)
abline(0,1)

#####################################3
#Delta Plots Rock
p=seq(.1,.9,.1)
df=quantile(x1,p)-quantile(x2,p)
av=(quantile(x1,p)+quantile(x2,p))/2
plot(av,df,ylim=c(-.05,.25),
ylab="RT Difference (sec)",xlab="RT Average (sec)")
abline(h=0)

############################################################
#Parametric:
#Goal, learn about shift, scale, and shape of distributions
#I like the 3-parameter Weibull best, doesnt fit the best, but is easiest to estimate.

x=rweibull(200,shape=1.5,scale=1)+1
stripplot(x,pch=20,cex=2,col=rgb(1,0,0,.1))
hist(x,breaks=20)

#use ML to estimate parameters
#par = psi,theta,beta
nll.wei=function(par,dat)
return(-sum(dweibull(dat-par[1],shape=par[3],scale=par[2],log=T)))

par=c(min(x)-.05,.3,2)
optim(par,nll.wei,dat=x)

#It is impotant to use the term 3-parameter or shifted Weibull, the standard Weibull is two-parameters, without shift.

########################################################
#Log-Normal
#lets play with parameters

y=rlnorm(200,0,.4)+.5
hist(y)

#interpretation
Log(y-shift)=Normal(mu,sigma^2)

#mu exp(mu) is a multiplier.

nll.lnorm=function(par,dat)
-sum(dlnorm(dat-par[1],meanlog=par[2],sdlog=par[3],log=T))

par=c(min(y)-.05,-.5,.2)
nll.lnorm(par,y)
optim(par,nll.lnorm,dat=y)

#lognormal account of x1 and x2

#x1
par=c(min(x1)-.05,-.5,.2)
nll.lnorm(par,x1)
g1=optim(par,nll.lnorm,dat=x1)
par=c(min(x2)-.05,-.5,.2)
nll.lnorm(par,x2)
g2=optim(par,nll.lnorm,dat=x2)

#Interpretation of scale here
exp(-0.7837941+1.3374341)

#Distrubiton 1 is 74% bigger than Distibution 2
#Log just means you have multiplicative effects, and this seems to be the norm in many measures, like RT and voltage.

##########################################################
##########################################################
#Mulitple Subjects:
x1=y
x2=y+1

plot(density(x1,xlim=c(0,6)))

lines(density(x2))

#pool all data
all=c(x1,x2)

lines(density(all),col='red')

#yuck

#alternative view!
plot(ecdf(all),col='red')
lines(ecdf(x1))
lines(ecdf(x2))

#quantile average
decile.p=seq(.1,.9,.1)
x1.d=quantile(x1,decile.p)
x2.d=quantile(x2,decile.p)
group.d=(x1.d+x2.d)/2

#make ECDF from quantiles
plot(x1.d,decile.p,typ='l',ylim=c(0,1),xlim=c(0,4))
points(x1.d,decile.p)
lines(x2.d,decile.p)
points(x2.d,decile.p)
lines(group.d,decile.p,col='blue')
points(group.d,decile.p,col='blue')

z1=(x1-mean(x1))/sd(x1)
z2=(x2-mean(x2))/sd(x2)

mean.mean=mean(c(mean(x1),mean(x2)))
mean.sd=mean(c(sd(x1),sd(x2)))
s=mean.sd*(c(z1,z2))+mean.mean
hist(s)

#alternative view!
plot(ecdf(s),col='green',xlim=c(0,6))
lines(ecdf(x1))
lines(ecdf(x2))

#######################################

dat=read.table(url("http://pcl.missouri.edu/jeff/sites/pcl.missouri.edu.jeff/files/example_0.dat"),header=T)
sub=dat$sub
pos=dat$pos
rt=dat$rt

ind.m=tapply(rt,list(sub,pos),mean)
I=nrow(ind.m)
matplot(t(ind.m),typ='l',col='grey',lty=1,axes=F,xlim=c(.7,2.3), ylab='Response Time (sec)')
axis(1,label=c('Nouns','Verbs'),at=c(1,2))
axis(2)

t.test(ind.m[,1],ind.m[,2],pair=T)

m.diff=ind.m[,2]-ind.m[,1]
m.ave=(ind.m[,2]+ind.m[,1])/2

plot(m.ave,m.diff,typ='h',xlab="Individual's Grand mean (sec)",ylab="Individuals POS Effect (sec)")
points(m.ave,m.diff,pch=20)
abline(h=0,lty=2)

#less helpful graph
grp.m=apply(ind.m,2,mean)
barplot(grp.m-.5,offset=.5,names.arg=c("Nouns","Verbs"))

##################################################################
#DELTA PLOTS

decile.p=seq(.1,.9,.1)
noun=matrix(ncol=length(decile.p),nrow=I)
verb=matrix(ncol=length(decile.p),nrow=I)
for (i in 1:I)
{
noun[i,]=quantile(rt[sub==i & pos==1],p=decile.p)
verb[i,]=quantile(rt[sub==i & pos==2],p=decile.p)
}

ave=(verb+noun)/2
diff=verb-noun

matplot(t(ave),t(diff),typ='l', lty=1,col='blue',ylab="Difference in Deciles (sec)",
xlab="Average of Deciles (sec)")
abline(0,0)

grp.noun=apply(noun,2,mean)
grp.verb=apply(verb,2,mean)

dif=grp.verb-grp.noun
ave=(grp.verb+grp.noun)/2
plot(ave,dif,typ='l',ylim=c(0,.05),
ylab="Difference of Deciles",xlab="Average of Deciles")
points(ave,dif,pch=21,bg='red')
abline(0,0)

AttachmentSize
QQplot.pdf5.05 KB
johnson2.pdf8.97 KB
pratte-deltas.pdf174.82 KB
chapter8.dat52.68 KB
Rouderetal2005PBR.pdf1.23 MB
big-example.dat223.97 KB
oyrt.R822 bytes

Research Methods 3020