EduMod-65: Imputation and Regression
From InterSciWiki
User:roblesn - 2SLS checklist Nick your 7.1 results are great. Go on (copy program further down and strip out deletes) to further edit your xR Restricted variable Doug 15:36, 10 February 2010 (PST) Doing group with your restricted variables. I can see you're looking for the variable to log but its not fempower. Might be interesting to see WHICH fempower variable between and v657 and v662 is significant rather than trace logged variables. Even with the RESET being significant this is already enough for a powerpoint and final paper.
- EduMod-64: Imputation and Regression
- EduMod-66: Imputation and Regression
- Remember: When you save your ==B| results== first open TextPad, add one space, then paste your results from R, then copy and paste the TextPad page back to your new ==B| results==. What the extra space to the left of each line of text does is to make the results READABLE in the wikipage.
A| Unrestricted Model PROGRAM copied ==1A} to==5|A change your 5|A depvar to begin YOUR project<-
#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP
nimp<-3 ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+ pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+###climate+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor ,data=m9) #--OLS estimate of restricted model-- xR<-lm(depvar~fyll+fydd+ pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+###climate+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor ,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
UNRESTRICTED FIRST RESULTS
a<-c(table(depvar), NROW(depvar),depvarname)
> imp<-"number of imputations nimp="
> impute=c(imp,nimp)
>
> bbb
coef Fstat ddf pvalue VIF
(Intercept) 2.83859309 1.77654032 13.751251 0.20423841 NA
fyll -0.73933579 1.55506311 11255.497860 0.21241549 3.131
fydd 0.23894265 0.57927490 24.871640 0.45375289 3.057
pre_mar_sex 0.08268219 0.98148845 12351.249670 0.32185102 1.464
money 0.05498930 0.36932623 60.096189 0.54566152 2.423
foodstress -0.11160830 0.51786113 22.847765 0.47905260 1.575
femctrldwellg 0.14722998 1.48024194 7.281069 0.26170012 1.590
wealthy 0.11232801 0.59486855 12.320275 0.45508119 1.647
fratgrpstr -0.01164086 0.00823256 8.905256 0.92971245 2.679--->REMOVE
marrcaptives -0.08736195 0.49367361 174.500066 0.48322939 1.842
plunder 0.13066740 0.32206116 3533.641393 0.57040764 1.690
cultints 0.02841499 0.06215400 227.167878 0.80334856 5.083--->REMOVE
roots -0.58155692 1.42368560 3988.357396 0.23286869 5.372
cereals -0.06945798 0.02075642 427.511151 0.88551237 7.691--->REMOVE
gath -0.13233361 1.40621471 56.104173 0.24068382 3.488
plow 0.80795839 3.93478798 23115.580139 0.04730891 3.149-->SIGNIFICANT
hunt -0.20101703 2.92836455 189.598909 0.08867100 5.430--> CLOSE TO SIGNIFICANT
fish -0.18685404 4.68062983 312.586813 0.03126203 3.285-->SIGNIFICANT
anim -0.28353130 7.37496453 190.006159 0.00722372 5.607--> VERY SIGNIFICANT
pigs 0.18452362 0.26286257 471.202983 0.60840057 2.388
milk -0.05626051 0.02111088 689.949479 0.88451974 4.113--->REMOVE
bovines -0.39386190 1.00818449 47.425751 0.32043389 4.254
tree -0.57118929 0.85297576 1866.878756 0.35583122 3.463
foodtrade 0.01483561 2.03961338 628.440183 0.15374536 1.533
foodscarc -0.06647825 0.58594131 26.408558 0.45078037 1.445
ecorich 0.04898173 0.27029247 10956.923177 0.60314615 1.902
popdens 0.00076760 0.00004683 73.924945 0.99455815 3.877--->REMOVE
pathstress -0.02364615 0.41810605 77.463190 0.51979492 2.344
exogamy -0.17353391 3.92848696 73.024604 0.05123835 1.374-->SIGNIFICANT
ncmallow -0.02208772 0.25085817 96.254375 0.61761487 1.622
famsize 0.03544177 0.14760321 727.407900 0.70094870 1.726
settype -0.00306719 0.00141764 13.967401 0.97049837 4.089--->REMOVE
localjh 0.04824870 0.06048991 265.766374 0.80591312 1.808--->REMOVE
superjh 0.50785110 16.23402310 26.422558 0.00042299 2.811-->VERY SIGNIFICANT
moralgods 0.22287240 3.94165396 63.443780 0.05143076 2.165-->SIGNIFICANT
fempower 0.07170493 1.63680142 31.371593 0.21014675 1.378
femsubs -0.08207492 1.12927392 4083.838784 0.28799310 1.861
sexratio -0.11457958 0.29746583 8.577658 0.59936831 1.495
war -0.00787549 0.24008264 105.380693 0.62516498 1.537
himilexp 0.03726522 0.02546467 52.019930 0.87383265 1.626--->REMOVE
wagelabor 0.08367274 0.29330317 9.313493 0.60082546 1.684
> r2
R2:final model R2:IV(distance) R2:IV(language)
0.6550117 0.9776988 0.9763240
> ccc
Fstat df pvalue
RESET 24.188 1120.860 0.000
Wald on restrs. 16.821 23.321 0.000
NCV 5.531 369.771 0.019
SWnormal 15.729 23258.134 0.000
lagll 2.092 262372.804 0.148
lagdd 3.766 62134.683 0.052
> bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
> bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
> bbb
mnb fst v pval VIF
(Intercept) 2.839 1.777 13.751 0.204 NA
fyll -0.739 1.555 11255.498 0.212 3.131
fydd 0.239 0.579 24.872 0.454 3.057
pre_mar_sex 0.083 0.981 12351.250 0.322 1.464
money 0.055 0.369 60.096 0.546 2.423
foodstress -0.112 0.518 22.848 0.479 1.575
femctrldwellg 0.147 1.480 7.281 0.262 1.590
wealthy 0.112 0.595 12.320 0.455 1.647
fratgrpstr -0.012 0.008 8.905 0.930 2.679
marrcaptives -0.087 0.494 174.500 0.483 1.842
plunder 0.131 0.322 3533.641 0.570 1.690
cultints 0.028 0.062 227.168 0.803 5.083
roots -0.582 1.424 3988.357 0.233 5.372
cereals -0.069 0.021 427.511 0.886 7.691
gath -0.132 1.406 56.104 0.241 3.488
plow 0.808 3.935 23115.580 0.047 3.149
hunt -0.201 2.928 189.599 0.089 5.430
fish -0.187 4.681 312.587 0.031 3.285
anim -0.284 7.375 190.006 0.007 5.607
pigs 0.185 0.263 471.203 0.608 2.388
milk -0.056 0.021 689.949 0.885 4.113
bovines -0.394 1.008 47.426 0.320 4.254
tree -0.571 0.853 1866.879 0.356 3.463
foodtrade 0.015 2.040 628.440 0.154 1.533
foodscarc -0.066 0.586 26.409 0.451 1.445
ecorich 0.049 0.270 10956.923 0.603 1.902
popdens 0.001 0.000 73.925 0.995 3.877
pathstress -0.024 0.418 77.463 0.520 2.344
exogamy -0.174 3.928 73.025 0.051 1.374
ncmallow -0.022 0.251 96.254 0.618 1.622
famsize 0.035 0.148 727.408 0.701 1.726
settype -0.003 0.001 13.967 0.970 4.089
localjh 0.048 0.060 265.766 0.806 1.808
superjh 0.508 16.234 26.423 0.000 2.811
moralgods 0.223 3.942 63.444 0.051 2.165
fempower 0.072 1.637 31.372 0.210 1.378
femsubs -0.082 1.129 4083.839 0.288 1.861
sexratio -0.115 0.297 8.578 0.599 1.495
war -0.008 0.240 105.381 0.625 1.537
himilexp 0.037 0.025 52.020 0.874 1.626
wagelabor 0.084 0.293 9.313 0.601 1.684
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
> impute
[1] "number of imputations nimp=" "3"
SECOND RUNNING PROGRAM POLICE
- #Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
marrcaptives+plunder+###climate+
roots+gath+plow+
hunt+fish+anim+pigs+bovines+tree+foodtrade+foodscarc+
ecorich+pathstress+exogamy+ncmallow+famsize+
superjh+moralgods+fempower+femsubs+
sexratio+war+money+wagelabor
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
REsULTS FOR RESTRICTED POLICE#1
ccc
Fstat df pvalue
RESET 23.290 548003.236 0.000
Wald on restrs. 0.000 2129006.971 0.999
NCV 5.638 303.871 0.018
SWnormal 14.885 1180.552 0.000
lagll 2.033 315169.944 0.154
lagdd 3.656 122965.466 0.056
> bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
> bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
> bbb
mnb fst v pval VIF
(Intercept) 3.045 2.958 30.719 0.096 NA
fyll -0.709 1.775 50866.156 0.183 2.651
fydd 0.224 0.615 37.590 0.438 2.771
pre_mar_sex 0.078 0.975 184316.975 0.323 1.369
money 0.059 0.506 50.914 0.480 2.120
foodstress -0.114 0.622 27.174 0.437 1.461
femctrldwellg 0.135 1.459 8.189 0.261 1.488
wealthy 0.107 0.564 9.309 0.471 1.543
marrcaptives -0.079 0.484 493.925 0.487 1.632
plunder 0.136 0.407 37231.740 0.524 1.535-->REMOVE
roots -0.538 3.718 17679.586 0.054 1.859-->SIGNIFICANT
gath -0.138 2.566 343.917 0.110 2.332
plow 0.842 4.897 2356.748 0.027 2.850-->SIGNIFICANT
hunt -0.209 6.348 187.466 0.013 2.845-->SIGNIFICANT
fish -0.197 7.948 97.924 0.006 2.174-->SIGNIFICANT
anim -0.295 18.472 537.112 0.000 2.600-->SIGNIFICANT
pigs 0.195 0.368 13952.846 0.544 2.060-->REMOVE
bovines -0.422 1.716 231.605 0.191 3.235
tree -0.528 1.677 1980.096 0.196 1.576
foodtrade 0.015 2.350 1068.532 0.126 1.432
foodscarc -0.065 0.607 22.051 0.444 1.365
ecorich 0.042 0.221 4517.403 0.638 1.811-->REMOVE
pathstress -0.025 0.554 701.675 0.457 2.172
exogamy -0.178 4.860 56.192 0.032 1.213-->SIGNIFICANT
ncmallow -0.020 0.241 98.818 0.625 1.477-->REMOVE
famsize 0.038 0.229 111.293 0.633 1.288-->REMOVE
superjh 0.508 20.747 44.528 0.000 2.417-->SIGNIFICANT
moralgods 0.216 4.546 75.190 0.036 1.858-->SIGNIFICANT
fempower 0.071 2.000 153.559 0.159 1.282
femsubs -0.083 1.308 35042.207 0.253 1.715
sexratio -0.104 0.296 11.430 0.597 1.403-->REMOVE
war -0.008 0.301 294.199 0.583 1.469-->REMOVE
wagelabor 0.078 0.310 14.382 0.586 1.586-->REMOVE
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
> impute
[1] "number of imputations nimp=" "3"
>
MORE RESTRICTION POLICE #2
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
marrcaptives+###climate+
roots+gath+plow+
hunt+fish+anim+bovines+tree+foodtrade+foodscarc+
pathstress+exogamy+
superjh+moralgods+fempower+femsubs+
money
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
RESULTS FOR RUNNING POLICE #3
bbb
mnb fst v pval VIF
(Intercept) 3.427 6.077 38.168 0.018 NA
fyll -0.681 1.878 3977.175 0.171 2.358
fydd 0.211 0.628 48.977 0.432 2.527
pre_mar_sex 0.068 0.790 67609.164 0.374 1.318-->REMOVE
money 0.068 0.806 172.619 0.370 1.933-->REMOVE
foodstress -0.140 1.095 75.083 0.299 1.383
femctrldwellg 0.107 0.914 5.692 0.378 1.306-->REMOVE
wealthy 0.124 0.826 8.118 0.389 1.384-->REMOVE
marrcaptives -0.113 1.215 115.465 0.273 1.337
roots -0.510 3.612 1280.693 0.058 1.734
gath -0.140 3.185 364.497 0.075 1.980
plow 0.874 5.603 1670.710 0.018 2.746
hunt -0.221 8.572 391.287 0.004 2.465
fish -0.213 10.787 2158.402 0.001 2.037
anim -0.311 23.633 1058.503 0.000 2.343
bovines -0.466 2.415 106.403 0.123 2.795
tree -0.476 1.630 11554.295 0.202 1.365
foodtrade 0.015 2.499 668.122 0.114 1.350
foodscarc -0.060 0.560 23.940 0.461 1.328-->REMOVE
pathstress -0.020 0.439 9005.759 0.508 1.876-->REMOVE
exogamy -0.179 5.520 259.346 0.020 1.173
superjh 0.492 24.188 88.821 0.000 2.085
moralgods 0.237 6.931 2205.484 0.009 1.633
fempower 0.075 2.412 226.592 0.122 1.220
femsubs -0.091 1.858 2450.197 0.173 1.482
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
> impute
PROGRAM 4 POLICE
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
foodstress+
marrcaptives+###climate+
roots+gath+plow+
hunt+fish+anim+bovines+tree+foodtrade+exogamy+
superjh+moralgods+fempower+femsubs+
money
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
RESULTS Number4 POLICE
R2:final model R2:IV(distance) R2:IV(language)
0.6263966 0.9804302 0.9774232
> ccc
Fstat df pvalue
RESET 15.633 847.911 0.000
Wald on restrs. 0.017 38314.043 0.897
NCV 6.999 477.813 0.008
SWnormal 15.606 2474.842 0.000
lagll 1.947 2365117.298 0.163
lagdd 2.904 67718.358 0.088
coef Fstat ddf pvalue VIF
(Intercept) 3.70011184 11.0154646 1276.4727 0.00092903 NA
fyll -0.66783568 1.8552932 15716.5314 0.17318854 2.306
fydd 0.15448664 0.3817471 693.9556 0.53687240 2.431
foodstress -0.16089011 1.7308093 496.9727 0.18891416 1.234
marrcaptives -0.11722691 1.4171140 757.6009 0.23425189 1.281-->REMOVE
roots -0.61326526 6.0752408 444623.8178 0.01370934 1.522
gath -0.14863869 4.0421256 4548667.8284 0.04437795 1.838
plow 0.91434708 7.0515642 1896.5955 0.00798590 2.392
hunt -0.18932768 7.2625282 8516813.2156 0.00704082 2.210
fish -0.17663726 9.3067383 4372.2665 0.00229682 1.639
anim -0.28955098 21.9845586 3084865.0118 0.00000275 2.231
bovines -0.45443949 2.8117317 1401568.7297 0.09357754 2.467
tree -0.48924272 1.7742966 4878.1506 0.18291346 1.322
foodtrade 0.01400120 2.3630956 29623.6994 0.12424612 1.300
exogamy -0.19291322 6.9463290 3479.4157 0.00843642 1.122
superjh 0.50964762 28.0026197 301.3825 0.00000023 2.013
moralgods 0.23244086 6.7780531 706.3835 0.00942233 1.588
fempower 0.07901164 2.7649007 128.4985 0.09878999 1.164
femsubs -0.08694324 1.7906148 167259.5635 0.18085309 1.431
money 0.08277161 1.2456803 649.6772 0.26479175 1.890-->REMOVE
REMOVING PART 5 POLICE
- Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
foodstress+
###climate+
roots+gath+plow+
hunt+fish+anim+bovines+tree+foodtrade+exogamy+
superjh+moralgods+fempower+femsubs
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
RESULTS for 5 POLICE
> bbb
coef Fstat ddf pvalue VIF
(Intercept) 3.42374475 10.1077508 5.380461e+03 0.00148484 NA
fyll -0.71217011 2.0947152 1.481020e+05 0.14781137 2.301
fydd 0.26904560 1.2493208 6.406217e+07 0.26368224 2.289
foodstress -0.08884800 0.5717965 5.314196e+01 0.45288505 1.138
roots -0.60986177 5.8171387 2.163610e+03 0.01595351 1.529
gath -0.13281740 3.2343330 3.053356e+03 0.07220840 1.787
plow 1.10322496 11.2651460 7.573897e+04 0.00079015 2.183
hunt -0.22435180 10.7358470 3.137899e+03 0.00106224 2.046
fish -0.18234717 9.8496219 1.408321e+03 0.00173390 1.615
anim -0.29617747 22.7912372 8.904379e+07 0.00000181 2.223
bovines -0.50803248 3.5520290 1.944259e+06 0.05947283 2.410
tree -0.54151634 2.1709029 2.532914e+03 0.14076789 1.301
foodtrade 0.01597879 3.1090261 9.261224e+02 0.07818924 1.245
exogamy -0.19075125 6.8051884 5.306370e+04 0.00909188 1.116
superjh 0.52165185 31.8153967 1.388481e+04 0.00000002 1.898
moralgods 0.23352300 7.0451866 3.221730e+04 0.00795176 1.559
fempower 0.07871408 2.6001318 3.840855e+02 0.10767616 1.146
femsubs -0.09185884 1.9864149 3.017863e+04 0.15872654 1.404
> r2
R2:final model R2:IV(distance) R2:IV(language)
0.6214586 0.9785105 0.9762273
> ccc
Fstat df pvalue
RESET 15.704 7025.871 0.000
Wald on restrs. 0.005 748185.406 0.944
NCV 7.239 5779.972 0.007
SWnormal 15.511 5555.953 0.000
lagll 1.941 12755388.221 0.164
lagdd 3.063 1170781.121 0.080
> bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
> bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
> bbb
mnb fst v pval VIF
(Intercept) 3.424 10.108 5380.461 0.001 NA
fyll -0.712 2.095 148101.970 0.148 2.301
fydd 0.269 1.249 64062172.453 0.264 2.289
foodstress -0.089 0.572 53.142 0.453 1.138<--REMOVE
roots -0.610 5.817 2163.610 0.016 1.529
gath -0.133 3.234 3053.356 0.072 1.787
plow 1.103 11.265 75738.972 0.001 2.183
hunt -0.224 10.736 3137.899 0.001 2.046
fish -0.182 9.850 1408.321 0.002 1.615
anim -0.296 22.791 89043790.579 0.000 2.223
bovines -0.508 3.552 1944258.774 0.059 2.410
tree -0.542 2.171 2532.914 0.141 1.301
foodtrade 0.016 3.109 926.122 0.078 1.245
exogamy -0.191 6.805 53063.696 0.009 1.116
superjh 0.522 31.815 13884.808 0.000 1.898
moralgods 0.234 7.045 32217.299 0.008 1.559
fempower 0.079 2.600 384.086 0.108 1.146
femsubs -0.092 1.986 30178.626 0.159 1.404
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
> impute
PROGRAM TRY 6 POLICE
- Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
###climate+
roots+gath+plow+
hunt+fish+anim+bovines+tree+foodtrade+exogamy+
superjh+moralgods+fempower+femsubs
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
RESULTS FOR TRY 6 POLICE
> bbb
coef Fstat ddf pvalue VIF
(Intercept) 3.25427570 9.567070 3027458.394 0.00198100 NA
fyll -0.73021880 2.213351 1666610.147 0.13682134 2.293
fydd 0.28803539 1.445810 308139.745 0.22920233 2.265
roots -0.63847818 6.532789 6667.207 0.01061236 1.503
gath -0.13577770 3.388819 3039.494 0.06573736 1.783
plow 1.09145863 11.082106 139292.164 0.00087187 2.174
hunt -0.23026318 11.542893 36803.637 0.00068081 2.023
fish -0.18351592 10.057378 2284.608 0.00153751 1.609
anim -0.29891406 23.313203 81713813.011 0.00000138 2.214
bovines -0.51605399 3.671025 1023194.199 0.05536642 2.406
tree -0.57855931 2.525783 3199.529 0.11209800 1.279
foodtrade 0.01538888 2.955691 8663.814 0.08561145 1.235
exogamy -0.18974129 6.735902 10744.056 0.00946199 1.111
superjh 0.52644946 32.628401 36233.827 0.00000001 1.889
moralgods 0.22229832 6.578593 161938.596 0.01032214 1.517
fempower 0.08341096 3.020582 1672.169 0.08239806 1.130
femsubs -0.08611704 1.769603 419222.521 0.18343200 1.390<--Remove
> r2
R2:final model R2:IV(distance) R2:IV(language)
0.6192190 0.9785105 0.9762273
> ccc
Fstat df pvalue
RESET 15.767 53481.22 0.000
Wald on restrs. 0.003 1285431.35 0.953
NCV 7.974 200939.52 0.005
SWnormal 15.093 22092.63 0.000
lagll 1.928 47527983.83 0.165
lagdd 3.080 641362.16 0.079
> bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
> bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
> bbb
mnb fst v pval VIF
(Intercept) 3.254 9.567 3027458.394 0.002 NA
fyll -0.730 2.213 1666610.147 0.137 2.293
fydd 0.288 1.446 308139.745 0.229 2.265
roots -0.638 6.533 6667.207 0.011 1.503
gath -0.136 3.389 3039.494 0.066 1.783
plow 1.091 11.082 139292.164 0.001 2.174
hunt -0.230 11.543 36803.637 0.001 2.023
fish -0.184 10.057 2284.608 0.002 1.609
anim -0.299 23.313 81713813.011 0.000 2.214
bovines -0.516 3.671 1023194.199 0.055 2.406
tree -0.579 2.526 3199.529 0.112 1.279
foodtrade 0.015 2.956 8663.814 0.086 1.235
exogamy -0.190 6.736 10744.056 0.009 1.111
superjh 0.526 32.628 36233.827 0.000 1.889
moralgods 0.222 6.579 161938.596 0.010 1.517
fempower 0.083 3.021 1672.169 0.082 1.130
femsubs -0.086 1.770 419222.521 0.183 1.390
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
> impute
TRY 7 POLICE
PROGRAM TRY 7 POLICE
- Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
###climate+
roots+gath+plow+
hunt+fish+anim+bovines+tree+foodtrade+exogamy+
superjh+moralgods+fempower
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
FINAL RESULTS POLICE - Great! - DW
R2:final model R2:IV(distance) R2:IV(language)
0.6136340 0.9785105 0.9762273
> ccc
Fstat df pvalue
RESET 16.023 21631.82 0.000 some var needs to be logged
Wald on restrs. 0.007 315422.37 0.935
NCV 8.296 1098628.80 0.004 some bunched regression errors
SWnormal 14.759 12545.33 0.000
lagll 1.765 38638259.70 0.184
lagdd 2.853 813154.45 0.091
mnb fst v pval VIF
(Intercept) 2.836 7.910 1279771.482 0.005 NA
fyll -0.753 2.339 1731257.732 0.126 2.290
fydd 0.342 2.087 429659.552 0.149 2.199
roots -0.702 8.138 5412.672 0.004 1.447
gath -0.136 3.367 3447.013 0.067 1.783
plow 1.144 12.242 206455.960 0.000 2.143
hunt -0.211 10.059 56840.914 0.002 1.926
fish -0.164 8.519 2025.391 0.004 1.506
anim -0.295 22.624 8006206.522 0.000 2.210
bovines -0.506 3.497 558082.836 0.061 2.404
tree -0.582 2.537 3702.789 0.111 1.279 <-- delete
foodtrade 0.017 3.547 15757.721 0.060 1.218
exogamy -0.204 7.880 11219.211 0.005 1.087
superjh 0.536 33.696 30755.531 0.000 1.878
moralgods 0.227 6.851 1028135.869 0.009 1.513
fempower 0.080 2.791 5024.209 0.095 1.126 <-- see what happens to RESET if you <-log(1+thisSCCS$v... var)
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
TRY 8 W/O TREE
- Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
###climate+
roots+gath+plow+
hunt+fish+anim+bovines+foodtrade+exogamy+
superjh+moralgods+fempower
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
TRY 8 RESULTS good results: DRW
r2
R2:final model R2:IV(distance) R2:IV(language)
0.6034978 0.9804443 0.9769738
Fstat df pvalue
RESET 18.480 62533.836 0.000 not obvious what might need to be logged
Wald on restrs. 0.026 9358.091 0.872
NCV 8.514 1048.692 0.004
SWnormal 10.557 2909.647 0.001
lagll 1.460 20689.417 0.227
lagdd 2.530 174679.029 0.112
> bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
> bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
> bbb
mnb fst v pval VIF
(Intercept) 2.627 6.848 1877.457 0.009 NA
fyll -0.839 2.845 1801161.406 0.092 2.293
fydd 0.413 3.004 1472.513 0.083 2.142
roots -0.590 6.336 11669490.860 0.012 1.297
gath -0.117 2.541 11936.945 0.111 1.735
plow 1.179 12.849 6290.599 0.000 2.111
hunt -0.181 7.748 663.008 0.006 1.758
fish -0.164 8.475 25207.953 0.004 1.505
anim -0.278 19.576 1622.539 0.000 2.177
bovines -0.452 2.722 3386.390 0.099 2.395
foodtrade 0.016 3.341 21731.622 0.068 1.209
exogamy -0.193 6.900 2636.639 0.009 1.084
superjh 0.562 37.618 6423402.278 0.000 1.829
moralgods 0.210 5.669 5815.113 0.017 1.513
fempower 0.082 2.849 336.038 0.092 1.116
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
TRY 9 RESULTS also good
r2
R2:final model R2:IV(distance) R2:IV(language)
0.5967023 0.9804443 0.9769738
Fstat df pvalue
RESET 17.173 27726.603 0.000 not obvious what might need to be logged
Wald on restrs. 0.054 2077.838 0.816
NCV 7.740 924.262 0.006
SWnormal 12.059 5199.759 0.001
lagll 1.456 35016.080 0.228
lagdd 2.423 501654.808 0.120
mnb fst v pval VIF
(Intercept) 2.468 6.003 1.031631e+03 0.014 NA
fyll -0.952 3.691 8.544249e+05 0.055 2.247
fydd 0.484 4.203 7.945520e+02 0.041 2.066
roots -0.541 5.367 3.159518e+07 0.021 1.275
plow 1.154 12.201 4.778956e+03 0.000 2.106
hunt -0.202 10.119 8.283810e+02 0.002 1.678
fish -0.140 6.556 6.439615e+04 0.010 1.392
anim -0.252 17.191 3.537418e+03 0.000 2.033
bovines -0.359 1.771 1.890590e+03 0.183 2.286
foodtrade 0.017 3.538 1.586855e+04 0.060 1.208
exogamy -0.202 7.471 2.257439e+03 0.006 1.078
superjh 0.594 43.676 1.714590e+09 0.000 1.742
moralgods 0.206 5.442 1.595069e+04 0.020 1.511
fempower 0.080 2.617 1.470000e+02 0.108 1.114
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
TRY 10 PROGRAM
- Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
###climate+
roots+plow+
hunt+fish+anim+foodtrade+exogamy+
superjh+moralgods+fempower
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree","gath","bovines" )
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
TRY 10 RESULTS also good
r2
R2:final model R2:IV(distance) R2:IV(language)
0.5916506 0.9804443 0.9769738
Fstat df pvalue
RESET 16.814 133403.158 0.000
Wald on restrs. 0.078 2740.949 0.779
NCV 8.898 1343.490 0.003
SWnormal 11.206 6976.605 0.001
lagll 1.351 27149.436 0.245
lagdd 2.333 386538.873 0.127
mnb fst v pval VIF
(Intercept) 2.383 5.576 9.139790e+02 0.018 NA
fyll -0.974 3.847 3.495494e+05 0.050 2.244
fydd 0.507 4.614 7.438840e+02 0.032 2.054
roots -0.471 4.250 2.702116e+05 0.039 1.211
plow 0.968 10.449 5.414196e+04 0.001 1.731
hunt -0.181 8.665 1.856461e+03 0.003 1.569
fish -0.123 5.312 1.141325e+04 0.021 1.320
anim -0.264 19.268 1.153690e+04 0.000 1.989
foodtrade 0.018 3.773 1.563632e+04 0.052 1.205
exogamy -0.203 7.503 2.376409e+03 0.006 1.078
superjh 0.587 42.531 1.217651e+11 0.000 1.736
moralgods 0.193 4.806 1.591517e+04 0.028 1.493
fempower 0.073 2.160 7.069800e+01 0.146 1.100
> aaa
1 2 3 4 5
"124" "4" "4" "6" "42" "180" "police"
B| Unrestricted Model RESULTS: Dep Var change to: (yours)
> bbb
mnb fst v pval VIF
(Intercept) 1.499 0.625 2972.768 0.429 NA
fyll -0.456 0.759 4762.453 0.384 2.791
fydd 0.700 11.070 214.460 0.001 3.132 <--keep
fratgrpstr 0.171 3.638 157.715 0.058 2.588 <--keep
marrcaptives 0.289 6.221 21.859 0.021 1.737 <--keep
plunder -0.652 9.275 31.996 0.005 1.604 <--keep
cultints -0.013 0.016 462.894 0.899 5.097
roots 0.445 1.079 696.655 0.299 4.940
cereals 0.322 0.580 408.991 0.447 7.409
gath 0.095 1.048 783.198 0.306 3.189
plow -0.536 2.354 1612.849 0.125 3.034
hunt 0.128 1.671 8347.508 0.196 5.033
fish -0.018 0.056 2888.055 0.812 3.071
anim 0.137 2.357 96.348 0.128 5.725
pigs -0.513 2.296 311.485 0.131 2.584 <--keep
milk -0.847 5.551 69.154 0.021 4.314 <--keep
bovines 0.277 0.695 372.384 0.405 4.269
tree 0.339 0.378 186.900 0.539 3.251
foodtrade -0.017 3.145 111.055 0.079 1.475 <--keep
foodscarc -0.010 0.019 21.943 0.892 1.285
ecorich -0.042 0.257 27009.971 0.612 1.865
popdens 0.172 3.132 202.959 0.078 3.750 <--keep
pathstress 0.037 1.316 141786.377 0.251 2.505
exogamy 0.051 0.436 2793.326 0.509 1.422
ncmallow -0.009 0.059 35224.928 0.808 1.478
famsize -0.032 0.164 3278.637 0.686 1.617
settype -0.009 0.020 146.457 0.889 3.955
localjh -0.005 0.001 165.547 0.975 1.724
superjh -0.075 0.532 129.517 0.467 2.657
moralgods 0.022 0.053 34.070 0.820 1.908
fempower -0.076 1.684 8.888 0.227 1.392 <--keep
femsubs 0.089 1.853 116401.230 0.173 1.683
sexratio -0.102 0.412 4282.394 0.521 1.294
war 0.008 0.309 137401.687 0.579 1.555
himilexp 0.175 0.723 37.899 0.401 1.553
money 0.051 0.375 40.405 0.543 2.231
wagelabor -0.008 0.003 5.846 0.958 1.593
> aaa
1 2 3 4 5
"27" "32" "45" "34" "46" "184" "polygyny"
> impute
[1] "number of imputations nimp=" "3"
>
A| from -54 xR Restricted model for v90 Police. For Unrestricted model copy xUR< vars over xR< to let others use the new indep vars #### speedup
#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP
nimp<-3 ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v860
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"polygyny"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+ pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+###climate+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor ,data=m9) #--OLS estimate of restricted model-- xR<-lm(depvar~fyll+fydd+ ### fyll+ <--- ADDED fydd for distance clustering ##pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+ ###climate+fish+ plow+ ###cereals+gath+ ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+ ###settype+localjh+superjh+moralgods+ fempower+femsubs ###+ ###war+himilexp +money+wagelabor ###sexratio+ ,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
B| Restricted Model RESULTS
A| Unrestricted Model PROGRAM copied ==1A} to==5|A change your 5|A depvar to begin YOUR project<-
#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP
nimp<-3 ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v860
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"polygyny"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+ pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+###climate+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor ,data=m9) #--OLS estimate of restricted model-- xR<-lm(depvar~fyll+fydd+ pre_mar_sex+money+foodstress+femctrldwellg+wealthy+ fratgrpstr+marrcaptives+plunder+###climate+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor ,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
A| First program 1 with log variables WORKS
#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp
#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=log(1+SCCS$v203),
hunt=log(1+SCCS$v204),
fish=SCCS$v205), #log(1+SCCS$v205),
anim=log(1+SCCS$v206),
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=log(1+SCCS$v663), #<-log(1+thisSCCS$v... var)
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP
nimp<-3 ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")
#Program 2 (12-18-2009)==
#MI--estimate model, combine results
rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)
#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)
#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]
#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)
#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))
indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")
#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------
#--number of imputed datasets--
#nimp<-10
nimp<-3 ################################################################## speedup test
#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL
#--loop through the imputed datasets--
for (i in 1:nimp){
#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]
#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)
#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+
###climate+
roots+plow+
hunt+fish+anim+foodtrade+exogamy+
superjh+moralgods+fempower
,data=m9)
# fydd+ ### fyll+ <--- ADDED fydd for distance clustering
# fratgrpstr+marrcaptives+plunder+ ###climate+fish+
# plow+ ###cereals+gath+
# ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
# ###pathstress+ ecorich+popdens+exogamy+ncmallow+famsize+
# ###settype+localjh+superjh+moralgods+
# fempower+femsubs ###+
# ###war+himilexp +money+wagelabor ###sexratio+
# ,data=m9)
#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))
#--collect some model diagnostics--
dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize", "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree","gath","bovines" )
#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
}
#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------
#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),10)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")
#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)
aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute
Logging Fempower didnt remove RESET --DW having a look at your progress
not clear what should have been logged.
Interesting that fyll is negative, fydd positive.
mnb fst v pval VIF (Intercept) 2.295 4.278 91.203 0.041 NA fyll -0.940 3.485 1934.111 0.062 2.252 fydd 0.492 4.446 48995.164 0.035 2.034 roots -0.474 4.267 122280.038 0.039 1.214 plow 0.953 10.002 58471.995 0.002 1.741 hunt -0.178 8.454 5789.773 0.004 1.567 fish -0.125 5.471 562331.911 0.019 1.319 anim -0.267 18.954 995.664 0.0002 2.021 foodtrade 0.018 4.011 5133922.582 0.045 1.207 exogamy -0.204 7.654 26028.450 0.006 1.074 superjh 0.587 41.503 3449.695 0.0000000001 1.744 moralgods 0.196 4.741 1004.977 0.030 1.540 fempower 0.242 0.817 8.562 0.391 1.131
(you left off the R2 and diagnostics) DRW RERUN:
coef Fstat ddf pvalue VIF
(Intercept) 2.11549231 3.847829 9.798975e+02 0.05009324 NA
fyll -0.90891797 3.283457 6.787713e+03 0.07002553 2.244 <- negative on language
fydd 0.49596925 4.520688 1.917518e+07 0.03348737 2.034
roots -0.47381415 4.236001 1.149779e+08 0.03957528 1.215
plow 0.95749750 9.920426 4.269428e+04 0.00163567 1.760
hunt -0.18504478 9.023295 9.428143e+03 0.00267260 1.571
fish -0.12614962 5.536005 3.104774e+04 0.01863526 1.318
anim -0.28010379 19.939583 1.873217e+02 0.00001379 2.028
foodtrade 0.01873274 4.248579 5.189531e+03 0.03933290 1.199
exogamy -0.19025095 6.649897 1.259677e+04 0.00992739 1.067
superjh 0.56960390 36.268465 1.414129e+02 0.00000001 1.773
moralgods 0.21863101 5.333031 5.491904e+01 0.02471200 1.520
fempower 0.28426556 1.594840 2.331993e+02 0.20789726 1.085
R2:final model R2:IV(distance) R2:IV(language)
0.5865162 0.9811534 0.9772505 <-- 59% very high r2)
Fstat df pvalue
RESET 13.974 189.742 0.000 <- which variable to log?
Wald on restrs. 0.084 5328.628 0.773
NCV 8.438 53.792 0.005
SWnormal 10.732 1317.135 0.001
lagll 1.489 1279956.371 0.222
lagdd 2.543 32429.985 0.111