Eff and Dow 2009
From InterSciWiki
Contents |
Download SCCS for Eff-Dow
http://intersci.ss.uci.edu/wiki/Eff_Dow_SCCS/SCCS.zip
- download to your setwd(location)
Program2009 for PC
###setwd("/Users/drwhite/Documents/sccs-latest") #Mac
#Program 1
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
###setwd("C:/My Documents/sccs") #PC
setwd("c:/My Documents/MI") # works for DRW DELL, should work at SocSciLab 155
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
### 20
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
cultints=SCCS$v232,roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204,
fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890,
pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1,
bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1,
foodtrade=SCCS$v819,foodscarc=SCCS$v1685,
ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260,
CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855,
AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196,
exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80,
settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237,
moralgods=SCCS$v238,fempower=SCCS$v663,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
war=SCCS$v1648,himilexp=(SCCS$v899==1)*1,
money=SCCS$v155,wagelabor=SCCS$v1732,
migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
nimp<-10
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
### 21
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
### 22
### Program 2
#MI--estimate model with network-lagged dependent variables, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("c:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
#--can replace "sum" with "max"
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum)
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"child value"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at frequencies and quartiles for the dep. varb.--
summary(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
### 23
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods",
"fempower","sexratio","war","himilexp","wagelabor","famsize","settype",
"localjh","money","cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich","popdens","pathstress","CVrain","rain",
"temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
nimp<-10
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--create spatially lagged dep. varbs. in stage 1 OLS--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+dateobs+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
+ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor+
### 24
migr+brideprice+nuclearfam+pctFemPolyg
,data=m9)
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--Stage 2 OLS estimate of restricted model--
xR<-lm(depvar ~ fyll + cultints + roots + fish +
exogamy + settype + femsubs, data = m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim","dateobs",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"fydd","wagelabor","war","himilexp","tree","foodtrade")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
### 25
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3))
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
bbb
r2
ccc
#--write results to csv file for perusal in spreadsheet--
#--these dont work but you can copy and paste from screen bb, r2, ccc
#write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv",
append=FALSE)
#write.csv(bbb,file="OLSresults.csv",append=TRUE)
#write.csv(r2,file="OLSresults.csv",append=TRUE)
#write.csv(ccc,file="OLSresults.csv",append=TRUE)
Results: Normal output on PC
> bbb
coef Fstat ddf pvalue VIF
(Intercept) -10.347 0.851 2308485 0.356 NA
fyll 1.415 8.212 2277866 0.004 1.323
cultints 0.792 5.638 1622735889 0.018 1.897
roots -2.285 3.968 809114236 0.046 1.210
fish 0.577 5.281 510986005 0.022 1.239
exogamy -0.981 6.633 719826586 0.010 1.134
settype -0.452 4.047 145046890 0.044 1.685
femsubs 0.630 4.037 362994936 0.045 1.241
> r2
R2:final model R2:IV(distance) R2:IV(language)
0.1061179 0.9247054 0.9666616
> ccc
Fstat df pvalue
RESET 0.705 7.405816e+06 0.401
Wald on restrs. 0.283 3.851307e+03 0.595
NCV 1.104 5.502358e+06 0.293
SWnormal 0.669 7.826620e+08 0.413
lagll 1.744 4.538706e+06 0.187
lagdd 3.449 2.529761e+07 0.063
> #--write results to csv file for perusal in spreadsheet--
write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv")
works but not the following with ,
+ append=FALSE)
> write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv",
+ append=FALSE)
Warning message:
In write.csv(paste("2SLS model for ", depvarname, sep = ""), file = "OLSresults.csv", :
attempt to set 'append' ignored
> write.csv(bbb,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(bbb, file = "OLSresults.csv", append = TRUE) :
attempt to set 'append' ignored
> write.csv(r2,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(r2, file = "OLSresults.csv", append = TRUE) :
attempt to set 'append' ignored
> write.csv(ccc,file="OLSresults.csv",append=TRUE)
Warning message:
In write.csv(ccc, file = "OLSresults.csv", append = TRUE) :
attempt to set 'append' ignored
==Background==
Possible problems
Scott noted a problem -- ask him
Program2009 for MAC
WHAT CHANGES IS THE setwd( ) which must have the SCCS.Rdata installed NOTE: SCCS.Rdata will be used only for Eff and Dow if possible setwd("/Users/drwhite/Documents/Eff_Dow/SCCS") #Mac for Eff and Dow only #Program 1 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- ###setwd("C:/My Documents/sccs") #PC ###setwd("c:/My Documents/MI") # works for DRW DELL, should work at SocSciLab 155 rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } ### 20 row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872 ) #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- ### 21 zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") ### 22 ### Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("c:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED depvarname<-"child value" #--can add additional SCCS variable, but only if it has no missing values--- dateobs<-SCCS$v838 dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) ### 23 #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","famsize","settype", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg") #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ ### 24 migr+brideprice+nuclearfam+pctFemPolyg ,data=m9) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- xR<-lm(depvar ~ fyll + cultints + roots + fish + exogamy + settype + femsubs, data = m9) #--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich", "popdens","pathstress","ncmallow","famsize","localjh", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } ### 25 #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc #--write results to csv file for perusal in spreadsheet-- #--these dont work but you can copy and paste from screen bb, r2, ccc #write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) #write.csv(bbb,file="OLSresults.csv",append=TRUE) #write.csv(r2,file="OLSresults.csv",append=TRUE) #write.csv(ccc,file="OLSresults.csv",append=TRUE)
Result of Program2009 for MAC
> bbb
coef Fstat ddf pvalue VIF
(Intercept) -9.916 0.782 1421212 0.376 NA fyll 1.395 7.997 1403568 0.005 1.323 cultints 0.793 5.655 243354780 0.017 1.898 roots -2.295 4.009 981300686 0.045 1.209 fish 0.577 5.299 1180044472 0.021 1.239 exogamy -0.975 6.560 69827050 0.010 1.133 settype -0.449 4.005 672810071 0.045 1.684 femsubs 0.631 4.054 5417365901 0.044 1.242 > r2
R2:final model R2:IV(distance) R2:IV(language)
0.1068288 0.9234100 0.9670623
> ccc
Fstat df pvalue
RESET 0.707 1.096526e+06 0.400 Wald on restrs. 0.228 1.307465e+03 0.633 NCV 1.103 7.638396e+06 0.294 SWnormal 0.684 1.816702e+09 0.408 lagll 1.658 2.850809e+06 0.198 lagdd 3.360 2.544041e+07 0.067