Eff and Dow 2009

From InterSciWiki
Jump to: navigation, search

Anthon Eff - EduR-1.0‎

Contents

Download SCCS for Eff-Dow

http://intersci.ss.uci.edu/wiki/Eff_Dow_SCCS/SCCS.zip

download to your setwd(location)

Program2009 for PC

###setwd("/Users/drwhite/Documents/sccs-latest") #Mac
#Program 1
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
###setwd("C:/My Documents/sccs") #PC
setwd("c:/My Documents/MI")  # works for DRW DELL, should work at SocSciLab 155
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
 ### 20
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
cultints=SCCS$v232,roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204,
fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890,
pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1,
bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1,
foodtrade=SCCS$v819,foodscarc=SCCS$v1685,
ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260,
CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855,
AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196,
exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80,
settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237,
moralgods=SCCS$v238,fempower=SCCS$v663,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
war=SCCS$v1648,himilexp=(SCCS$v899==1)*1,
money=SCCS$v155,wagelabor=SCCS$v1732,
migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
nimp<-10
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
 ### 21
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
 ### 22
### Program 2
#MI--estimate model with network-lagged dependent variables, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("c:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
#--can replace "sum" with "max"
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum)
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"child value"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at frequencies and quartiles for the dep. varb.--
summary(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
 ### 23
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods",
"fempower","sexratio","war","himilexp","wagelabor","famsize","settype",
"localjh","money","cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich","popdens","pathstress","CVrain","rain",
"temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
nimp<-10
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--create spatially lagged dep. varbs. in stage 1 OLS--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+dateobs+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
+ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor+
 ### 24
migr+brideprice+nuclearfam+pctFemPolyg
,data=m9)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of restricted model--
xR<-lm(depvar ~ fyll + cultints + roots + fish +
exogamy + settype + femsubs, data = m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim","dateobs",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"fydd","wagelabor","war","himilexp","tree","foodtrade")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
 ### 25
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3))
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
bbb
r2
ccc
#--write results to csv file for perusal in spreadsheet--
#--these dont work but you can copy and paste from screen bb, r2, ccc 
#write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv",
append=FALSE)
#write.csv(bbb,file="OLSresults.csv",append=TRUE)
#write.csv(r2,file="OLSresults.csv",append=TRUE)
#write.csv(ccc,file="OLSresults.csv",append=TRUE)

Results: Normal output on PC

>  bbb
               coef Fstat        ddf pvalue   VIF
(Intercept) -10.347 0.851    2308485  0.356    NA
fyll          1.415 8.212    2277866  0.004 1.323
cultints      0.792 5.638 1622735889  0.018 1.897
roots        -2.285 3.968  809114236  0.046 1.210
fish          0.577 5.281  510986005  0.022 1.239
exogamy      -0.981 6.633  719826586  0.010 1.134
settype      -0.452 4.047  145046890  0.044 1.685
femsubs       0.630 4.037  362994936  0.045 1.241
>  r2
 R2:final model R2:IV(distance) R2:IV(language) 
      0.1061179       0.9247054       0.9666616 
>  ccc
                Fstat           df pvalue
RESET           0.705 7.405816e+06  0.401
Wald on restrs. 0.283 3.851307e+03  0.595
NCV             1.104 5.502358e+06  0.293
SWnormal        0.669 7.826620e+08  0.413
lagll           1.744 4.538706e+06  0.187
lagdd           3.449 2.529761e+07  0.063
>  #--write results to csv file for perusal in spreadsheet--
write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv")
works but not the following with ,
+  append=FALSE)


>  write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv",
+  append=FALSE)
Warning message:
In write.csv(paste("2SLS model for ", depvarname, sep = ""), file = "OLSresults.csv",  :
  attempt to set 'append' ignored
>  write.csv(bbb,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(bbb, file = "OLSresults.csv", append = TRUE) :
  attempt to set 'append' ignored
>  write.csv(r2,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(r2, file = "OLSresults.csv", append = TRUE) :
  attempt to set 'append' ignored
>  write.csv(ccc,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(ccc, file = "OLSresults.csv", append = TRUE) :
  attempt to set 'append' ignored
==Background==

Possible problems

Scott noted a problem -- ask him

Program2009 for MAC

WHAT CHANGES IS THE setwd(  ) which must have the SCCS.Rdata installed
NOTE: SCCS.Rdata will be used only for Eff and Dow if possible
setwd("/Users/drwhite/Documents/Eff_Dow/SCCS") #Mac for Eff and Dow only
#Program 1
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
###setwd("C:/My Documents/sccs") #PC
###setwd("c:/My Documents/MI")  # works for DRW DELL, should work at SocSciLab 155
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
 ### 20
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
cultints=SCCS$v232,roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204,
fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890,
pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1,
bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1,
foodtrade=SCCS$v819,foodscarc=SCCS$v1685,
ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260,
CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855,
AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196,
exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80,
settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237,
moralgods=SCCS$v238,fempower=SCCS$v663,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
war=SCCS$v1648,himilexp=(SCCS$v899==1)*1,
money=SCCS$v155,wagelabor=SCCS$v1732,
migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
nimp<-10
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
 ### 21
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
 ### 22
### Program 2
#MI--estimate model with network-lagged dependent variables, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("c:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
#--can replace "sum" with "max"
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum)
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"child value"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at frequencies and quartiles for the dep. varb.--
summary(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
 ### 23
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods",
"fempower","sexratio","war","himilexp","wagelabor","famsize","settype",
"localjh","money","cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich","popdens","pathstress","CVrain","rain",
"temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
nimp<-10
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--create spatially lagged dep. varbs. in stage 1 OLS--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+dateobs+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
+ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor+
 ### 24
migr+brideprice+nuclearfam+pctFemPolyg
,data=m9)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of restricted model--
xR<-lm(depvar ~ fyll + cultints + roots + fish +
exogamy + settype + femsubs, data = m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim","dateobs",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"fydd","wagelabor","war","himilexp","tree","foodtrade")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
 ### 25
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3))
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
bbb
r2
ccc
#--write results to csv file for perusal in spreadsheet--
#--these dont work but you can copy and paste from screen bb, r2, ccc 
#write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv",
append=FALSE)
#write.csv(bbb,file="OLSresults.csv",append=TRUE)
#write.csv(r2,file="OLSresults.csv",append=TRUE)
#write.csv(ccc,file="OLSresults.csv",append=TRUE)

Result of Program2009 for MAC

> bbb

             coef Fstat        ddf pvalue   VIF

(Intercept) -9.916 0.782 1421212 0.376 NA fyll 1.395 7.997 1403568 0.005 1.323 cultints 0.793 5.655 243354780 0.017 1.898 roots -2.295 4.009 981300686 0.045 1.209 fish 0.577 5.299 1180044472 0.021 1.239 exogamy -0.975 6.560 69827050 0.010 1.133 settype -0.449 4.005 672810071 0.045 1.684 femsubs 0.631 4.054 5417365901 0.044 1.242 > r2

R2:final model R2:IV(distance) R2:IV(language) 
     0.1068288       0.9234100       0.9670623 

> ccc

               Fstat           df pvalue

RESET 0.707 1.096526e+06 0.400 Wald on restrs. 0.228 1.307465e+03 0.633 NCV 1.103 7.638396e+06 0.294 SWnormal 0.684 1.816702e+09 0.408 lagll 1.658 2.850809e+06 0.198 lagdd 3.360 2.544041e+07 0.067

Personal tools