EduMod-65: Imputation and Regression

From InterSciWiki
Jump to: navigation, search

User:roblesn - 2SLS checklist Nick your 7.1 results are great. Go on (copy program further down and strip out deletes) to further edit your xR Restricted variable Doug 15:36, 10 February 2010 (PST) Doing group with your restricted variables. I can see you're looking for the variable to log but its not fempower. Might be interesting to see WHICH fempower variable between and v657 and v662 is significant rather than trace logged variables. Even with the RESET being significant this is already enough for a powerpoint and final paper.

  • EduMod-64: Imputation and Regression
  • EduMod-66: Imputation and Regression
  • Remember: When you save your ==B| results== first open TextPad, add one space, then paste your results from R, then copy and paste the TextPad page back to your new ==B| results==. What the extra space to the left of each line of text does is to make the results READABLE in the wikipage.

Contents

A| Unrestricted Model PROGRAM copied ==1A} to==5|A change your 5|A depvar to begin YOUR project<-

#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#--To find the citation for a package, use this function:---
citation("mice")

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)

#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)

#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton  Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'  
#'1 (Christianity)'  
#'2 (Islam)'  
#'3 (Hindu/Buddhist)'  

#--check to see number of missing values in vaux, 
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp

#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
) 

#--look at first 6 rows of fx--
head(fx)

#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp

#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
  



#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------

#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP  
nimp<-3  ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}

#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)

#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)

#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")




#Program 2 (12-18-2009)==
#MI--estimate model, combine results

rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)

#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)

#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v90
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"police"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]

#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)

#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))


indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")


#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------

#--number of imputed datasets--
#nimp<-10
nimp<-3  ################################################################## speedup test

#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL

#--loop through the imputed datasets--
for (i in 1:nimp){

#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]

#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx   
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)

#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)

#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+                
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
# fydd+                ### fyll+ <--- ADDED fydd for distance clustering
#  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
#  plow+                              ###cereals+gath+
#  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
#  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
#  ###settype+localjh+superjh+moralgods+
#  fempower+femsubs                   ###+
#  ###war+himilexp  +money+wagelabor  ###sexratio+
#  ,data=m9)

#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))

#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))

#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")


#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))

}


#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------

#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")

#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")

#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)

aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)

bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

UNRESTRICTED FIRST RESULTS

 a<-c(table(depvar), NROW(depvar),depvarname)
 >  imp<-"number of imputations nimp="
 >  impute=c(imp,nimp)
 >  
 >  bbb
                      coef       Fstat          ddf     pvalue   VIF
 (Intercept)    2.83859309  1.77654032    13.751251 0.20423841    NA
 fyll          -0.73933579  1.55506311 11255.497860 0.21241549 3.131 
 fydd           0.23894265  0.57927490    24.871640 0.45375289 3.057
 pre_mar_sex    0.08268219  0.98148845 12351.249670 0.32185102 1.464
 money          0.05498930  0.36932623    60.096189 0.54566152 2.423
 foodstress    -0.11160830  0.51786113    22.847765 0.47905260 1.575
 femctrldwellg  0.14722998  1.48024194     7.281069 0.26170012 1.590
 wealthy        0.11232801  0.59486855    12.320275 0.45508119 1.647
 fratgrpstr    -0.01164086  0.00823256     8.905256 0.92971245 2.679--->REMOVE
 marrcaptives  -0.08736195  0.49367361   174.500066 0.48322939 1.842
 plunder        0.13066740  0.32206116  3533.641393 0.57040764 1.690
 cultints       0.02841499  0.06215400   227.167878 0.80334856 5.083--->REMOVE
 roots         -0.58155692  1.42368560  3988.357396 0.23286869 5.372
 cereals       -0.06945798  0.02075642   427.511151 0.88551237 7.691--->REMOVE
 gath          -0.13233361  1.40621471    56.104173 0.24068382 3.488
 plow           0.80795839  3.93478798 23115.580139 0.04730891 3.149-->SIGNIFICANT
 hunt          -0.20101703  2.92836455   189.598909 0.08867100 5.430--> CLOSE TO SIGNIFICANT
 fish          -0.18685404  4.68062983   312.586813 0.03126203 3.285-->SIGNIFICANT
 anim          -0.28353130  7.37496453   190.006159 0.00722372 5.607--> VERY SIGNIFICANT
 pigs           0.18452362  0.26286257   471.202983 0.60840057 2.388
 milk          -0.05626051  0.02111088   689.949479 0.88451974 4.113--->REMOVE
 bovines       -0.39386190  1.00818449    47.425751 0.32043389 4.254
 tree          -0.57118929  0.85297576  1866.878756 0.35583122 3.463
 foodtrade      0.01483561  2.03961338   628.440183 0.15374536 1.533
 foodscarc     -0.06647825  0.58594131    26.408558 0.45078037 1.445
 ecorich        0.04898173  0.27029247 10956.923177 0.60314615 1.902
 popdens        0.00076760  0.00004683    73.924945 0.99455815 3.877--->REMOVE
 pathstress    -0.02364615  0.41810605    77.463190 0.51979492 2.344
 exogamy       -0.17353391  3.92848696    73.024604 0.05123835 1.374-->SIGNIFICANT
 ncmallow      -0.02208772  0.25085817    96.254375 0.61761487 1.622
 famsize        0.03544177  0.14760321   727.407900 0.70094870 1.726
 settype       -0.00306719  0.00141764    13.967401 0.97049837 4.089--->REMOVE
 localjh        0.04824870  0.06048991   265.766374 0.80591312 1.808--->REMOVE
 superjh        0.50785110 16.23402310    26.422558 0.00042299 2.811-->VERY SIGNIFICANT
 moralgods      0.22287240  3.94165396    63.443780 0.05143076 2.165-->SIGNIFICANT
 fempower       0.07170493  1.63680142    31.371593 0.21014675 1.378
 femsubs       -0.08207492  1.12927392  4083.838784 0.28799310 1.861
 sexratio      -0.11457958  0.29746583     8.577658 0.59936831 1.495
 war           -0.00787549  0.24008264   105.380693 0.62516498 1.537
 himilexp       0.03726522  0.02546467    52.019930 0.87383265 1.626--->REMOVE
 wagelabor      0.08367274  0.29330317     9.313493 0.60082546 1.684
 >  r2
  R2:final model R2:IV(distance) R2:IV(language) 
       0.6550117       0.9776988       0.9763240 
 >  ccc
                  Fstat         df pvalue
 RESET           24.188   1120.860  0.000
 Wald on restrs. 16.821     23.321  0.000
 NCV              5.531    369.771  0.019
 SWnormal        15.729  23258.134  0.000
 lagll            2.092 262372.804  0.148
 lagdd            3.766  62134.683  0.052
 >  bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 >  bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 >  bbb
                  mnb    fst         v  pval   VIF
 (Intercept)    2.839  1.777    13.751 0.204    NA
 fyll          -0.739  1.555 11255.498 0.212 3.131
 fydd           0.239  0.579    24.872 0.454 3.057
 pre_mar_sex    0.083  0.981 12351.250 0.322 1.464
 money          0.055  0.369    60.096 0.546 2.423
 foodstress    -0.112  0.518    22.848 0.479 1.575
 femctrldwellg  0.147  1.480     7.281 0.262 1.590
 wealthy        0.112  0.595    12.320 0.455 1.647
 fratgrpstr    -0.012  0.008     8.905 0.930 2.679
 marrcaptives  -0.087  0.494   174.500 0.483 1.842
 plunder        0.131  0.322  3533.641 0.570 1.690
 cultints       0.028  0.062   227.168 0.803 5.083
 roots         -0.582  1.424  3988.357 0.233 5.372
 cereals       -0.069  0.021   427.511 0.886 7.691
 gath          -0.132  1.406    56.104 0.241 3.488
 plow           0.808  3.935 23115.580 0.047 3.149
 hunt          -0.201  2.928   189.599 0.089 5.430
 fish          -0.187  4.681   312.587 0.031 3.285
 anim          -0.284  7.375   190.006 0.007 5.607
 pigs           0.185  0.263   471.203 0.608 2.388
 milk          -0.056  0.021   689.949 0.885 4.113
 bovines       -0.394  1.008    47.426 0.320 4.254
 tree          -0.571  0.853  1866.879 0.356 3.463
 foodtrade      0.015  2.040   628.440 0.154 1.533
 foodscarc     -0.066  0.586    26.409 0.451 1.445
 ecorich        0.049  0.270 10956.923 0.603 1.902
 popdens        0.001  0.000    73.925 0.995 3.877
 pathstress    -0.024  0.418    77.463 0.520 2.344
 exogamy       -0.174  3.928    73.025 0.051 1.374
 ncmallow      -0.022  0.251    96.254 0.618 1.622
 famsize        0.035  0.148   727.408 0.701 1.726
 settype       -0.003  0.001    13.967 0.970 4.089
 localjh        0.048  0.060   265.766 0.806 1.808
 superjh        0.508 16.234    26.423 0.000 2.811
 moralgods      0.223  3.942    63.444 0.051 2.165
 fempower       0.072  1.637    31.372 0.210 1.378
 femsubs       -0.082  1.129  4083.839 0.288 1.861
 sexratio      -0.115  0.297     8.578 0.599 1.495
 war           -0.008  0.240   105.381 0.625 1.537
 himilexp       0.037  0.025    52.020 0.874 1.626
 wagelabor      0.084  0.293     9.313 0.601 1.684
 >  aaa
        1        2        3        4        5                   
    "124"      "4"      "4"      "6"     "42"    "180" "police" 
 >  impute

[1] "number of imputations nimp=" "3"

SECOND RUNNING PROGRAM POLICE

  1. #Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 marrcaptives+plunder+###climate+
 roots+gath+plow+
 hunt+fish+anim+pigs+bovines+tree+foodtrade+foodscarc+
 ecorich+pathstress+exogamy+ncmallow+famsize+
 superjh+moralgods+fempower+femsubs+
 sexratio+war+money+wagelabor
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
 dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
 bbb
 r2
 ccc
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 bbb
 aaa
 impute

REsULTS FOR RESTRICTED POLICE#1

  ccc
                 Fstat          df pvalue
RESET           23.290  548003.236  0.000
Wald on restrs.  0.000 2129006.971  0.999
NCV              5.638     303.871  0.018
SWnormal        14.885    1180.552  0.000
lagll            2.033  315169.944  0.154
lagdd            3.656  122965.466  0.056
>   bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
>   bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
>   bbb
                 mnb    fst          v  pval   VIF
(Intercept)    3.045  2.958     30.719 0.096    NA
fyll          -0.709  1.775  50866.156 0.183 2.651
fydd           0.224  0.615     37.590 0.438 2.771
pre_mar_sex    0.078  0.975 184316.975 0.323 1.369
money          0.059  0.506     50.914 0.480 2.120
foodstress    -0.114  0.622     27.174 0.437 1.461
femctrldwellg  0.135  1.459      8.189 0.261 1.488
wealthy        0.107  0.564      9.309 0.471 1.543
marrcaptives  -0.079  0.484    493.925 0.487 1.632
plunder        0.136  0.407  37231.740 0.524 1.535-->REMOVE
roots         -0.538  3.718  17679.586 0.054 1.859-->SIGNIFICANT
gath          -0.138  2.566    343.917 0.110 2.332
plow           0.842  4.897   2356.748 0.027 2.850-->SIGNIFICANT
hunt          -0.209  6.348    187.466 0.013 2.845-->SIGNIFICANT
fish          -0.197  7.948     97.924 0.006 2.174-->SIGNIFICANT
anim          -0.295 18.472    537.112 0.000 2.600-->SIGNIFICANT
pigs           0.195  0.368  13952.846 0.544 2.060-->REMOVE
bovines       -0.422  1.716    231.605 0.191 3.235
tree          -0.528  1.677   1980.096 0.196 1.576
foodtrade      0.015  2.350   1068.532 0.126 1.432
foodscarc     -0.065  0.607     22.051 0.444 1.365
ecorich        0.042  0.221   4517.403 0.638 1.811-->REMOVE
pathstress    -0.025  0.554    701.675 0.457 2.172
exogamy       -0.178  4.860     56.192 0.032 1.213-->SIGNIFICANT
ncmallow      -0.020  0.241     98.818 0.625 1.477-->REMOVE
famsize        0.038  0.229    111.293 0.633 1.288-->REMOVE
superjh        0.508 20.747     44.528 0.000 2.417-->SIGNIFICANT
moralgods      0.216  4.546     75.190 0.036 1.858-->SIGNIFICANT
fempower       0.071  2.000    153.559 0.159 1.282
femsubs       -0.083  1.308  35042.207 0.253 1.715
sexratio      -0.104  0.296     11.430 0.597 1.403-->REMOVE
war           -0.008  0.301    294.199 0.583 1.469-->REMOVE
wagelabor      0.078  0.310     14.382 0.586 1.586-->REMOVE
>   aaa
       1        2        3        4        5                   
   "124"      "4"      "4"      "6"     "42"    "180" "police" 
>   impute
[1] "number of imputations nimp=" "3"                          

>

MORE RESTRICTION POLICE #2

 #Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 marrcaptives+###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+tree+foodtrade+foodscarc+
 pathstress+exogamy+
 superjh+moralgods+fempower+femsubs+
 money
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
 bbb
 r2
 ccc
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 bbb
 aaa
 impute

RESULTS FOR RUNNING POLICE #3

   bbb
                 mnb    fst         v  pval   VIF
(Intercept)    3.427  6.077    38.168 0.018    NA
fyll          -0.681  1.878  3977.175 0.171 2.358
fydd           0.211  0.628    48.977 0.432 2.527
pre_mar_sex    0.068  0.790 67609.164 0.374 1.318-->REMOVE
money          0.068  0.806   172.619 0.370 1.933-->REMOVE
foodstress    -0.140  1.095    75.083 0.299 1.383
femctrldwellg  0.107  0.914     5.692 0.378 1.306-->REMOVE
wealthy        0.124  0.826     8.118 0.389 1.384-->REMOVE
marrcaptives  -0.113  1.215   115.465 0.273 1.337
roots         -0.510  3.612  1280.693 0.058 1.734
gath          -0.140  3.185   364.497 0.075 1.980
plow           0.874  5.603  1670.710 0.018 2.746
hunt          -0.221  8.572   391.287 0.004 2.465
fish          -0.213 10.787  2158.402 0.001 2.037
anim          -0.311 23.633  1058.503 0.000 2.343
bovines       -0.466  2.415   106.403 0.123 2.795
tree          -0.476  1.630 11554.295 0.202 1.365
foodtrade      0.015  2.499   668.122 0.114 1.350
foodscarc     -0.060  0.560    23.940 0.461 1.328-->REMOVE
pathstress    -0.020  0.439  9005.759 0.508 1.876-->REMOVE
exogamy       -0.179  5.520   259.346 0.020 1.173
superjh        0.492 24.188    88.821 0.000 2.085
moralgods      0.237  6.931  2205.484 0.009 1.633
fempower       0.075  2.412   226.592 0.122 1.220
femsubs       -0.091  1.858  2450.197 0.173 1.482
>   aaa
       1        2        3        4        5                   
   "124"      "4"      "4"      "6"     "42"    "180" "police" 
>   impute


PROGRAM 4 POLICE

#Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 foodstress+
 marrcaptives+###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+tree+foodtrade+exogamy+
 superjh+moralgods+fempower+femsubs+
 money
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute


RESULTS Number4 POLICE

  R2:final model R2:IV(distance) R2:IV(language) 
      0.6263966       0.9804302       0.9774232 
>  ccc
                 Fstat          df pvalue
RESET           15.633     847.911  0.000
Wald on restrs.  0.017   38314.043  0.897
NCV              6.999     477.813  0.008
SWnormal        15.606    2474.842  0.000
lagll            1.947 2365117.298  0.163
lagdd            2.904   67718.358  0.088
                    coef      Fstat          ddf     pvalue   VIF
(Intercept)   3.70011184 11.0154646    1276.4727 0.00092903    NA
fyll         -0.66783568  1.8552932   15716.5314 0.17318854 2.306
fydd          0.15448664  0.3817471     693.9556 0.53687240 2.431
foodstress   -0.16089011  1.7308093     496.9727 0.18891416 1.234
marrcaptives -0.11722691  1.4171140     757.6009 0.23425189 1.281-->REMOVE
roots        -0.61326526  6.0752408  444623.8178 0.01370934 1.522
gath         -0.14863869  4.0421256 4548667.8284 0.04437795 1.838
plow          0.91434708  7.0515642    1896.5955 0.00798590 2.392
hunt         -0.18932768  7.2625282 8516813.2156 0.00704082 2.210
fish         -0.17663726  9.3067383    4372.2665 0.00229682 1.639
anim         -0.28955098 21.9845586 3084865.0118 0.00000275 2.231
bovines      -0.45443949  2.8117317 1401568.7297 0.09357754 2.467
tree         -0.48924272  1.7742966    4878.1506 0.18291346 1.322
foodtrade     0.01400120  2.3630956   29623.6994 0.12424612 1.300
exogamy      -0.19291322  6.9463290    3479.4157 0.00843642 1.122
superjh       0.50964762 28.0026197     301.3825 0.00000023 2.013
moralgods     0.23244086  6.7780531     706.3835 0.00942233 1.588
fempower      0.07901164  2.7649007     128.4985 0.09878999 1.164
femsubs      -0.08694324  1.7906148  167259.5635 0.18085309 1.431
money         0.08277161  1.2456803     649.6772 0.26479175 1.890-->REMOVE


REMOVING PART 5 POLICE

  1. Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 foodstress+
 ###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+tree+foodtrade+exogamy+
 superjh+moralgods+fempower+femsubs
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute


RESULTS for 5 POLICE

 >  bbb
                    coef      Fstat          ddf     pvalue   VIF
 (Intercept)  3.42374475 10.1077508 5.380461e+03 0.00148484    NA
 fyll        -0.71217011  2.0947152 1.481020e+05 0.14781137 2.301
 fydd         0.26904560  1.2493208 6.406217e+07 0.26368224 2.289
 foodstress  -0.08884800  0.5717965 5.314196e+01 0.45288505 1.138
 roots       -0.60986177  5.8171387 2.163610e+03 0.01595351 1.529
 gath        -0.13281740  3.2343330 3.053356e+03 0.07220840 1.787
 plow         1.10322496 11.2651460 7.573897e+04 0.00079015 2.183
 hunt        -0.22435180 10.7358470 3.137899e+03 0.00106224 2.046
 fish        -0.18234717  9.8496219 1.408321e+03 0.00173390 1.615
 anim        -0.29617747 22.7912372 8.904379e+07 0.00000181 2.223
 bovines     -0.50803248  3.5520290 1.944259e+06 0.05947283 2.410
 tree        -0.54151634  2.1709029 2.532914e+03 0.14076789 1.301
 foodtrade    0.01597879  3.1090261 9.261224e+02 0.07818924 1.245
 exogamy     -0.19075125  6.8051884 5.306370e+04 0.00909188 1.116
 superjh      0.52165185 31.8153967 1.388481e+04 0.00000002 1.898
 moralgods    0.23352300  7.0451866 3.221730e+04 0.00795176 1.559
 fempower     0.07871408  2.6001318 3.840855e+02 0.10767616 1.146
 femsubs     -0.09185884  1.9864149 3.017863e+04 0.15872654 1.404
 >  r2
  R2:final model R2:IV(distance) R2:IV(language) 
       0.6214586       0.9785105       0.9762273 
 >  ccc
                  Fstat           df pvalue
 RESET           15.704     7025.871  0.000
 Wald on restrs.  0.005   748185.406  0.944
 NCV              7.239     5779.972  0.007
 SWnormal        15.511     5555.953  0.000
 lagll            1.941 12755388.221  0.164
 lagdd            3.063  1170781.121  0.080
 >  bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 >  bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 >  bbb
                mnb    fst            v  pval   VIF
 (Intercept)  3.424 10.108     5380.461 0.001    NA
 fyll        -0.712  2.095   148101.970 0.148 2.301
 fydd         0.269  1.249 64062172.453 0.264 2.289
 foodstress  -0.089  0.572       53.142 0.453 1.138<--REMOVE
 roots       -0.610  5.817     2163.610 0.016 1.529
 gath        -0.133  3.234     3053.356 0.072 1.787
 plow         1.103 11.265    75738.972 0.001 2.183
 hunt        -0.224 10.736     3137.899 0.001 2.046
 fish        -0.182  9.850     1408.321 0.002 1.615
 anim        -0.296 22.791 89043790.579 0.000 2.223
 bovines     -0.508  3.552  1944258.774 0.059 2.410
 tree        -0.542  2.171     2532.914 0.141 1.301
 foodtrade    0.016  3.109      926.122 0.078 1.245
 exogamy     -0.191  6.805    53063.696 0.009 1.116
 superjh      0.522 31.815    13884.808 0.000 1.898
 moralgods    0.234  7.045    32217.299 0.008 1.559
 fempower     0.079  2.600      384.086 0.108 1.146
 femsubs     -0.092  1.986    30178.626 0.159 1.404
 >  aaa
        1        2        3        4        5                   
    "124"      "4"      "4"      "6"     "42"    "180" "police" 
 >  impute

PROGRAM TRY 6 POLICE

  1. Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 ###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+tree+foodtrade+exogamy+
 superjh+moralgods+fempower+femsubs
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

RESULTS FOR TRY 6 POLICE

 >  bbb
                    coef     Fstat          ddf     pvalue   VIF
 (Intercept)  3.25427570  9.567070  3027458.394 0.00198100    NA
 fyll        -0.73021880  2.213351  1666610.147 0.13682134 2.293
 fydd         0.28803539  1.445810   308139.745 0.22920233 2.265
 roots       -0.63847818  6.532789     6667.207 0.01061236 1.503
 gath        -0.13577770  3.388819     3039.494 0.06573736 1.783
 plow         1.09145863 11.082106   139292.164 0.00087187 2.174
 hunt        -0.23026318 11.542893    36803.637 0.00068081 2.023
 fish        -0.18351592 10.057378     2284.608 0.00153751 1.609
 anim        -0.29891406 23.313203 81713813.011 0.00000138 2.214
 bovines     -0.51605399  3.671025  1023194.199 0.05536642 2.406
 tree        -0.57855931  2.525783     3199.529 0.11209800 1.279
 foodtrade    0.01538888  2.955691     8663.814 0.08561145 1.235
 exogamy     -0.18974129  6.735902    10744.056 0.00946199 1.111
 superjh      0.52644946 32.628401    36233.827 0.00000001 1.889
 moralgods    0.22229832  6.578593   161938.596 0.01032214 1.517
 fempower     0.08341096  3.020582     1672.169 0.08239806 1.130
 femsubs     -0.08611704  1.769603   419222.521 0.18343200 1.390<--Remove
 >  r2
  R2:final model R2:IV(distance) R2:IV(language) 
       0.6192190       0.9785105       0.9762273 
 >  ccc
                  Fstat          df pvalue
 RESET           15.767    53481.22  0.000
 Wald on restrs.  0.003  1285431.35  0.953
 NCV              7.974   200939.52  0.005
 SWnormal        15.093    22092.63  0.000
 lagll            1.928 47527983.83  0.165
 lagdd            3.080   641362.16  0.079
 >  bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 >  bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 >  bbb
                mnb    fst            v  pval   VIF
 (Intercept)  3.254  9.567  3027458.394 0.002    NA
 fyll        -0.730  2.213  1666610.147 0.137 2.293
 fydd         0.288  1.446   308139.745 0.229 2.265
 roots       -0.638  6.533     6667.207 0.011 1.503
 gath        -0.136  3.389     3039.494 0.066 1.783
 plow         1.091 11.082   139292.164 0.001 2.174
 hunt        -0.230 11.543    36803.637 0.001 2.023
 fish        -0.184 10.057     2284.608 0.002 1.609
 anim        -0.299 23.313 81713813.011 0.000 2.214
 bovines     -0.516  3.671  1023194.199 0.055 2.406
 tree        -0.579  2.526     3199.529 0.112 1.279
 foodtrade    0.015  2.956     8663.814 0.086 1.235
 exogamy     -0.190  6.736    10744.056 0.009 1.111
 superjh      0.526 32.628    36233.827 0.000 1.889
 moralgods    0.222  6.579   161938.596 0.010 1.517
 fempower     0.083  3.021     1672.169 0.082 1.130
 femsubs     -0.086  1.770   419222.521 0.183 1.390
 >  aaa
        1        2        3        4        5                   
    "124"      "4"      "4"      "6"     "42"    "180" "police" 
 >  impute

TRY 7 POLICE

PROGRAM TRY 7 POLICE

  1. Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 ###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+tree+foodtrade+exogamy+
 superjh+moralgods+fempower
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

FINAL RESULTS POLICE - Great! - DW

   R2:final model R2:IV(distance) R2:IV(language) 
        0.6136340       0.9785105       0.9762273 
  >  ccc
                   Fstat          df pvalue
  RESET           16.023    21631.82  0.000 some var needs to be logged
  Wald on restrs.  0.007   315422.37  0.935
  NCV              8.296  1098628.80  0.004 some bunched regression errors
  SWnormal        14.759    12545.33  0.000
  lagll            1.765 38638259.70  0.184
  lagdd            2.853   813154.45  0.091
                 mnb    fst           v  pval   VIF
  (Intercept)  2.836  7.910 1279771.482 0.005    NA
  fyll        -0.753  2.339 1731257.732 0.126 2.290
  fydd         0.342  2.087  429659.552 0.149 2.199
  roots       -0.702  8.138    5412.672 0.004 1.447
  gath        -0.136  3.367    3447.013 0.067 1.783
  plow         1.144 12.242  206455.960 0.000 2.143
  hunt        -0.211 10.059   56840.914 0.002 1.926
  fish        -0.164  8.519    2025.391 0.004 1.506
  anim        -0.295 22.624 8006206.522 0.000 2.210
  bovines     -0.506  3.497  558082.836 0.061 2.404
  tree        -0.582  2.537    3702.789 0.111 1.279 <-- delete
  foodtrade    0.017  3.547   15757.721 0.060 1.218
  exogamy     -0.204  7.880   11219.211 0.005 1.087
  superjh      0.536 33.696   30755.531 0.000 1.878
  moralgods    0.227  6.851 1028135.869 0.009 1.513
  fempower     0.080  2.791    5024.209 0.095 1.126 <-- see what happens to RESET if you <-log(1+thisSCCS$v... var)
         1        2        3        4        5                   
     "124"      "4"      "4"      "6"     "42"    "180" "police"

TRY 8 W/O TREE

  1. Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 ###climate+
 roots+gath+plow+
 hunt+fish+anim+bovines+foodtrade+exogamy+
 superjh+moralgods+fempower
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree")
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

TRY 8 RESULTS good results: DRW

   r2
  R2:final model R2:IV(distance) R2:IV(language) 
       0.6034978       0.9804443       0.9769738 
                  Fstat         df pvalue
 RESET           18.480  62533.836  0.000 not obvious what might need to be logged
 Wald on restrs.  0.026   9358.091  0.872
 NCV              8.514   1048.692  0.004
 SWnormal        10.557   2909.647  0.001
 lagll            1.460  20689.417  0.227
 lagdd            2.530 174679.029  0.112
 >  bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
 >  bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 >  bbb
                mnb    fst            v  pval   VIF
 (Intercept)  2.627  6.848     1877.457 0.009    NA
 fyll        -0.839  2.845  1801161.406 0.092 2.293
 fydd         0.413  3.004     1472.513 0.083 2.142
 roots       -0.590  6.336 11669490.860 0.012 1.297
 gath        -0.117  2.541    11936.945 0.111 1.735
 plow         1.179 12.849     6290.599 0.000 2.111
 hunt        -0.181  7.748      663.008 0.006 1.758
 fish        -0.164  8.475    25207.953 0.004 1.505
 anim        -0.278 19.576     1622.539 0.000 2.177
 bovines     -0.452  2.722     3386.390 0.099 2.395
 foodtrade    0.016  3.341    21731.622 0.068 1.209
 exogamy     -0.193  6.900     2636.639 0.009 1.084
 superjh      0.562 37.618  6423402.278 0.000 1.829
 moralgods    0.210  5.669     5815.113 0.017 1.513
 fempower     0.082  2.849      336.038 0.092 1.116
        1        2        3        4        5                   
    "124"      "4"      "4"      "6"     "42"    "180" "police" 

TRY 9 RESULTS also good

   r2
  R2:final model R2:IV(distance) R2:IV(language) 
       0.5967023       0.9804443       0.9769738 
                  Fstat         df pvalue
 RESET           17.173  27726.603  0.000 not obvious what might need to be logged
 Wald on restrs.  0.054   2077.838  0.816
 NCV              7.740    924.262  0.006
 SWnormal        12.059   5199.759  0.001
 lagll            1.456  35016.080  0.228
 lagdd            2.423 501654.808  0.120
                mnb    fst            v  pval   VIF
 (Intercept)  2.468  6.003 1.031631e+03 0.014    NA
 fyll        -0.952  3.691 8.544249e+05 0.055 2.247
 fydd         0.484  4.203 7.945520e+02 0.041 2.066
 roots       -0.541  5.367 3.159518e+07 0.021 1.275
 plow         1.154 12.201 4.778956e+03 0.000 2.106
 hunt        -0.202 10.119 8.283810e+02 0.002 1.678
 fish        -0.140  6.556 6.439615e+04 0.010 1.392
 anim        -0.252 17.191 3.537418e+03 0.000 2.033
 bovines     -0.359  1.771 1.890590e+03 0.183 2.286
 foodtrade    0.017  3.538 1.586855e+04 0.060 1.208
 exogamy     -0.202  7.471 2.257439e+03 0.006 1.078
 superjh      0.594 43.676 1.714590e+09 0.000 1.742
 moralgods    0.206  5.442 1.595069e+04 0.020 1.511
 fempower     0.080  2.617 1.470000e+02 0.108 1.114
 >  aaa
        1        2        3        4        5                   
    "124"      "4"      "4"      "6"     "42"    "180" "police" 

TRY 10 PROGRAM

  1. Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 ###climate+
 roots+plow+
 hunt+fish+anim+foodtrade+exogamy+
 superjh+moralgods+fempower
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree","gath","bovines" )
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

TRY 10 RESULTS also good

     r2
    R2:final model R2:IV(distance) R2:IV(language) 
         0.5916506       0.9804443       0.9769738 
                    Fstat         df pvalue
   RESET           16.814 133403.158  0.000
   Wald on restrs.  0.078   2740.949  0.779
   NCV              8.898   1343.490  0.003
   SWnormal        11.206   6976.605  0.001
   lagll            1.351  27149.436  0.245
   lagdd            2.333 386538.873  0.127
                  mnb    fst            v  pval   VIF
   (Intercept)  2.383  5.576 9.139790e+02 0.018    NA
   fyll        -0.974  3.847 3.495494e+05 0.050 2.244
   fydd         0.507  4.614 7.438840e+02 0.032 2.054
   roots       -0.471  4.250 2.702116e+05 0.039 1.211
   plow         0.968 10.449 5.414196e+04 0.001 1.731
   hunt        -0.181  8.665 1.856461e+03 0.003 1.569
   fish        -0.123  5.312 1.141325e+04 0.021 1.320
   anim        -0.264 19.268 1.153690e+04 0.000 1.989
   foodtrade    0.018  3.773 1.563632e+04 0.052 1.205
   exogamy     -0.203  7.503 2.376409e+03 0.006 1.078
   superjh      0.587 42.531 1.217651e+11 0.000 1.736
   moralgods    0.193  4.806 1.591517e+04 0.028 1.493
   fempower     0.073  2.160 7.069800e+01 0.146 1.100
   >  aaa
          1        2        3        4        5                   
      "124"      "4"      "4"      "6"     "42"    "180" "police"

B| Unrestricted Model RESULTS: Dep Var change to: (yours)

>  bbb
                mnb    fst          v  pval   VIF
(Intercept)   1.499  0.625   2972.768 0.429    NA
fyll         -0.456  0.759   4762.453 0.384 2.791
fydd          0.700 11.070    214.460 0.001 3.132 <--keep
fratgrpstr    0.171  3.638    157.715 0.058 2.588 <--keep
marrcaptives  0.289  6.221     21.859 0.021 1.737 <--keep
plunder      -0.652  9.275     31.996 0.005 1.604 <--keep
cultints     -0.013  0.016    462.894 0.899 5.097
roots         0.445  1.079    696.655 0.299 4.940
cereals       0.322  0.580    408.991 0.447 7.409
gath          0.095  1.048    783.198 0.306 3.189
plow         -0.536  2.354   1612.849 0.125 3.034
hunt          0.128  1.671   8347.508 0.196 5.033
fish         -0.018  0.056   2888.055 0.812 3.071
anim          0.137  2.357     96.348 0.128 5.725
pigs         -0.513  2.296    311.485 0.131 2.584 <--keep
milk         -0.847  5.551     69.154 0.021 4.314 <--keep
bovines       0.277  0.695    372.384 0.405 4.269
tree          0.339  0.378    186.900 0.539 3.251
foodtrade    -0.017  3.145    111.055 0.079 1.475 <--keep
foodscarc    -0.010  0.019     21.943 0.892 1.285
ecorich      -0.042  0.257  27009.971 0.612 1.865
popdens       0.172  3.132    202.959 0.078 3.750 <--keep
pathstress    0.037  1.316 141786.377 0.251 2.505
exogamy       0.051  0.436   2793.326 0.509 1.422
ncmallow     -0.009  0.059  35224.928 0.808 1.478
famsize      -0.032  0.164   3278.637 0.686 1.617
settype      -0.009  0.020    146.457 0.889 3.955
localjh      -0.005  0.001    165.547 0.975 1.724
superjh      -0.075  0.532    129.517 0.467 2.657
moralgods     0.022  0.053     34.070 0.820 1.908
fempower     -0.076  1.684      8.888 0.227 1.392 <--keep 
femsubs       0.089  1.853 116401.230 0.173 1.683
sexratio     -0.102  0.412   4282.394 0.521 1.294
war           0.008  0.309 137401.687 0.579 1.555
himilexp      0.175  0.723     37.899 0.401 1.553
money         0.051  0.375     40.405 0.543 2.231
wagelabor    -0.008  0.003      5.846 0.958 1.593
>  aaa
         1          2          3          4          5                       
      "27"       "32"       "45"       "34"       "46"      "184" "polygyny" 
>  impute
[1] "number of imputations nimp=" "3"                          
>

A| from -54 xR Restricted model for v90 Police. For Unrestricted model copy xUR< vars over xR< to let others use the new indep vars #### speedup

#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#--To find the citation for a package, use this function:---
citation("mice")

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)

#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)

#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton  Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'  
#'1 (Christianity)'  
#'2 (Islam)'  
#'3 (Hindu/Buddhist)'  

#--check to see number of missing values in vaux, 
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp

#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
) 

#--look at first 6 rows of fx--
head(fx)

#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp

#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
  



#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------

#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP  
nimp<-3  ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}

#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)

#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)

#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")




#Program 2 (12-18-2009)==
#MI--estimate model, combine results

rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)

#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)

#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v860
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"polygyny"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]

#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)

#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))


indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")


#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------

#--number of imputed datasets--
#nimp<-10
nimp<-3  ################################################################## speedup test

#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL

#--loop through the imputed datasets--
for (i in 1:nimp){

#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]

#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx   
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)

#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)

#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+                ### fyll+ <--- ADDED fydd for distance clustering
##pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+   ###climate+fish+
plow+                              ###cereals+gath+
###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
###settype+localjh+superjh+moralgods+
fempower+femsubs                   ###+
###war+himilexp  +money+wagelabor  ###sexratio+
,data=m9)
# fydd+                ### fyll+ <--- ADDED fydd for distance clustering
#  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
#  plow+                              ###cereals+gath+
#  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
#  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
#  ###settype+localjh+superjh+moralgods+
#  fempower+femsubs                   ###+
#  ###war+himilexp  +money+wagelabor  ###sexratio+
#  ,data=m9)

#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))

#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))

#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")


#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))

}


#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------

#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")

#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")

#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)

aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)

bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

B| Restricted Model RESULTS

A| Unrestricted Model PROGRAM copied ==1A} to==5|A change your 5|A depvar to begin YOUR project<-

#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#--To find the citation for a package, use this function:---
citation("mice")

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)

#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)

#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton  Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'  
#'1 (Christianity)'  
#'2 (Islam)'  
#'3 (Hindu/Buddhist)'  

#--check to see number of missing values in vaux, 
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp

#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=SCCS$v203,
hunt=SCCS$v204,
fish=SCCS$v205,
anim=SCCS$v206,
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=SCCS$v663,
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
) 

#--look at first 6 rows of fx--
head(fx)

#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp

#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
  



#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------

#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP  
nimp<-3  ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}

#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)

#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)

#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")




#Program 2 (12-18-2009)==
#MI--estimate model, combine results

rm(list=ls(all=TRUE))
#--Set path to your directory with data and program--
setwd("C:/My Documents/MI")
options(echo=TRUE)

#--need these packages for estimation and diagnostics--
library(foreign)
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in original SCCS data---
load("SCCS.Rdata",.GlobalEnv)
#--Read in two weight matrices--
ll<-as.matrix(read.dta("langwm.dta")[,-1])
dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
#--Read in the imputed dataset---
load("impdat.Rdata",.GlobalEnv)

#--create dep.varb. you wish to use from SCCS data--
#--Here we sum variables measuring how much a society values children--
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
depvar<-SCCS$v860
#--find obs. for which dep. varb. is non-missing--
zdv<-which(!is.na(depvar))
depvar<-depvar[zdv]
#HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
depvarname<-"polygyny"
#--can add additional SCCS variable, but only if it has no missing values---
dateobs<-SCCS$v838
dateobs<-dateobs[zdv]

#--look at histogram and frequencies for the dep. varb.--
hist(depvar)
table(depvar)

#--modify weight matrices---
#--set diagonal equal to zeros--
diag(ll)<-0
diag(dd)<-0
#--use only obs. where dep. varb. non-missing--
ll<-ll[zdv,zdv]
dd<-dd[zdv,zdv]
#--row standardize (rows sum to one)
ll<-ll/rowSums(ll)
dd<-dd/rowSums(dd)
#--check to see that rows sum to one
rowSums(ll)
rowSums(dd)
#--make weight matrix object for later autocorrelation test--
wmatll<-mat2listw(as.matrix(ll))
wmatdd<-mat2listw(as.matrix(dd))


indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
"femsubs","foodscarc",
"fratgrpstr","marrcaptives","plunder",###"climate",
"exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
"war","himilexp","wagelabor",
"famsize","settype","localjh","money",
"cultints","roots","cereals","gath","hunt","fish",
"anim","pigs","milk","plow","bovines","tree","foodtrade",
"ndrymonth","ecorich",
"popdens","pathstress","CVrain","rain","temp","AP1","AP2")


#-----------------------------------------------------
#---Estimate model on each imputed dataset------------
#-----------------------------------------------------

#--number of imputed datasets--
#nimp<-10
nimp<-3  ################################################################## speedup test

#--will append values to these empty objects--
vif<-NULL
ss<-NULL
beta<-NULL
dng<-NULL

#--loop through the imputed datasets--
for (i in 1:nimp){

#--select the ith imputed dataset--
m9<-impdat[which(impdat$.imp==i),]
#--retain only obs. for which dep. varb. is nonmissing--
m9<-m9[zdv,]

#--create spatially lagged dep. varbs.--
y<-as.matrix(depvar)
xx<-as.matrix(m9[,indpv])
#--for instruments we use the spatial lag of our indep. varbs.--
#--First, the spatially lagged varb. for distance--
xdy<-dd%*%xx
cyd<-dd%*%y
o<-lm(cyd~xdy)
#--the fitted value is our instrumental variable--
fydd<-fitted(o)
#--keep R2 from this regression--
dr2<-summary(o)$r.squared
#--Then, the spatially lagged varb. for language--
xly<-ll%*%xx   
cyl<-ll%*%y
o<-lm(cyl~xly)
#--the fitted value is our instrumental variable--
fyll<-fitted(o)
#--keep R2 from this regression--
lr2<-summary(o)$r.squared
m9<-cbind(m9,fydd,fyll)

#--OLS estimate of unrestricted model--
xUR<-lm(depvar~fyll+fydd+
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)

#--OLS estimate of restricted model--
xR<-lm(depvar~fyll+fydd+                
pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
fratgrpstr+marrcaptives+plunder+###climate+
cultints+roots+cereals+gath+plow+
hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
settype+localjh+superjh+moralgods+fempower+femsubs+
sexratio+war+himilexp+money+wagelabor
,data=m9)
# fydd+                ### fyll+ <--- ADDED fydd for distance clustering
#  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
#  plow+                              ###cereals+gath+
#  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
#  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
#  ###settype+localjh+superjh+moralgods+
#  fempower+femsubs                   ###+
#  ###war+himilexp  +money+wagelabor  ###sexratio+
#  ,data=m9)

#--corrected sigma2 and R2 for 2SLS--
qxx<-m9
qxx[,"fydd"]<-cyd
qxx[,"fyll"]<-cyl
b<-coef(xR)
incpt<-matrix(1,NROW(qxx),1)
x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
e<-y-x%*%as.matrix(b)
cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))

#--collect coefficients and their variances--
ov<-summary(xR)
vif<-rbind(vif,vif(xR))
ss<-rbind(ss,diag(ov$cov*cs2))
#--collect robust coef. variances when there is heteroskedasticity--
#eb<-e^2
#x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
#hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
#ss<-rbind(ss,diag(hcm))
beta<-rbind(beta,coef(xR))

#--collect some model diagnostics--
dropt<-c("cereals","gath","plow","hunt","anim",
"pigs","milk","bovines","foodscarc","ecorich",
"popdens","pathstress","ncmallow","famsize","localjh",
"superjh","moralgods","fempower","sexratio","money",
"wagelabor","war","himilexp","tree")


#--Ramsey RESET test--
p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
#--Wald test (H0: dropped variables have coefficient equal zero)--
o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
#--Heteroskedasticity test (H0: homoskedastic residuals)--
p3<-ncv.test(xR)$ChiSquare
#--Shapiro-Wilke normality test (H0: residuals normal)
p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
#--LaGrange Multiplier test for spatial autocorrelation: language--
o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
p5<-as.numeric(o$LMlag$statistic)
#--LaGrange Multiplier test for spatial autocorrelation: distance--
o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
p6<-as.numeric(o$LMlag$statistic)
#--model R2--
p7<-cr2
dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))

}


#--------------------------------------------
#--Rubin's formulas for combining estimates--
#--------------------------------------------

#--first find final regr. coefs. and p-values--
mnb<-apply(beta,2,mean)
####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
mnv<-apply(ss,2,mean)
vrT<-mnv+vrb*(1-nimp^(-1))
fst<-mnb^2/vrT
r<-(1+nimp^(-1))*vrb/mnv
v<-(nimp-1)*(1+r^(-1))^2
pval<-pf(fst,1,v,lower.tail=FALSE)
bbb<-data.frame(round(cbind(mnb,fst,v,pval),8)) #,3 changed to .8 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")

#--Then combine the diagnostics we collected--
dng<-data.frame(dng)
names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
"R2:final model","R2:IV(distance)","R2:IV(language)")
r2<-apply(dng[,7:9],2,mean)
adng<-dng[,1:6]
mdm<-apply(adng,2,mean)
vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
aa<-4*mdm^2-2*vrd
aa[which(aa<0)]<-0
rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
vd<-(nimp-1)*(1+rd^(-1))^2
Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
#-All chi-sq we collected have df=1-------
pvald<-pf(Dm,1,vd,lower.tail=FALSE)
ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
names(ccc)<-c("Fstat","df","pvalue")

#--write results to csv file for perusal in spreadsheet--
write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
write.csv(bbb,file="OLSresults.csv",append=TRUE)
write.csv(r2,file="OLSresults.csv",append=TRUE)
write.csv(ccc,file="OLSresults.csv",append=TRUE)

aaa<-c(table(depvar), NROW(depvar),depvarname)
imp<-"number of imputations nimp="
impute=c(imp,nimp)

bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

A| First program 1 with log variables WORKS

#Program 1 --> Program 2 below
#Program 1 Part A
#MI--make the imputed datasets
#--change the following path to the directory with your data and program--
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
library(tripak)
library(zoo)
library(sp)
library(maptools)
library(spam)
#--for program 2 below
library(spdep)
library(car)
library(lmtest)
library(sandwich)

#--To find the citation for a package, use this function:---
citation("mice")

#-----------------------------
#--Read in data, rearrange----
#-----------------------------

#--Read in auxiliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)

#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)

#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton  Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'  
#'1 (Christianity)'  
#'2 (Islam)'  
#'3 (Hindu/Buddhist)'  

#--check to see number of missing values in vaux, 
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp

#MODIFY THESE STATEMENTS FOR A NEW PROJECT
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
fratgrpstr=SCCS$v570,
marrcaptives=SCCS$v870,
plunder=SCCS$v912, ##2009 depvars
pre_mar_sex=SCCS$v167,
money=SCCS$v17,
foodstress=SCCS$v678,
femctrldwellg=SCCS$v591,
wealthy=SCCS$v1721,
war679=SCCS$v679,
###climate<-(SCCS$v857==1 | SCCS$v857==2 | SCCS$v857==6)*1+(SCCS$v857==3 | SCCS$v857==4)*2+(SCCS$v857==5)*3,
nonmatrel=SCCS$v52,
lrgfam=SCCS$v68,
exogamy=SCCS$v72,
famsize=SCCS$v80,
money=SCCS$v155,
popdens=SCCS$v156,
malesexag=SCCS$v175,
ndrymonth=SCCS$v196,
gath=log(1+SCCS$v203),
hunt=log(1+SCCS$v204),
fish=SCCS$v205), #log(1+SCCS$v205),
anim=log(1+SCCS$v206),
brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,
ncmallow=SCCS$v227,
cultints=SCCS$v232,
tree=(SCCS$v233==4)*1,
roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,
settype=SCCS$v234,
localjh=(SCCS$v236-1),
superjh=SCCS$v237,
moralgods=SCCS$v238,
segadlboys=SCCS$v242,
plow=(SCCS$v243>1)*1,
pigs=(SCCS$v244==2)*1,
bovines=(SCCS$v244==7)*1,
milk=(SCCS$v245>1)*1,
agrlateboy=SCCS$v300,
fempower=log(1+SCCS$v663),   #<-log(1+thisSCCS$v... var)
migr=(SCCS$v677==2)*1,
foodtrade=SCCS$v819,
dateobs=SCCS$v838,
rain=SCCS$v854,
temp=SCCS$v855,
ecorich=SCCS$v857,
#pctFemPolyg=SCCS$v872,
femsubs=SCCS$v890,
himilexp=(SCCS$v899==1)*1,
AP1=SCCS$v921,
AP2=SCCS$v928,
pathstress=SCCS$v1260,
war=SCCS$v1648,
foodscarc=SCCS$v1685,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
wagelabor=SCCS$v1732,
CVrain=SCCS$v1914/SCCS$v1913
) 

#--look at first 6 rows of fx--
head(fx)

#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp

#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
  



#Program 1 Part B
#-----------------------------
#----Multiple imputation------
#-----------------------------

#--number of imputed data sets to create--
#nimp<-10 #CHANGED TO TEST SPEEDUP  
nimp<-3  ################################################################## speedup test
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=20,m=nimp),action="long")
###aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}

#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)

#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)

#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")


 #Program 2 (12-18-2009)==
 #MI--estimate model, combine results
 
 rm(list=ls(all=TRUE))
 #--Set path to your directory with data and program--
 setwd("C:/My Documents/MI")
 options(echo=TRUE)
 
 #--need these packages for estimation and diagnostics--
 library(foreign)
 library(spdep)
 library(car)
 library(lmtest)
 library(sandwich)
 
 #-----------------------------
 #--Read in data, rearrange----
 #-----------------------------
 
 #--Read in original SCCS data---
 load("SCCS.Rdata",.GlobalEnv)
 #--Read in two weight matrices--
 ll<-as.matrix(read.dta("langwm.dta")[,-1])
 dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)])
 #--Read in the imputed dataset---
 load("impdat.Rdata",.GlobalEnv)
 
 #--create dep.varb. you wish to use from SCCS data--
 #--Here we sum variables measuring how much a society values children--
 depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #can replace "sum" with "max"
 depvar<-SCCS$v90
 #--find obs. for which dep. varb. is non-missing--
 zdv<-which(!is.na(depvar))
 depvar<-depvar[zdv]
 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED
 depvarname<-"police"
 #--can add additional SCCS variable, but only if it has no missing values---
 dateobs<-SCCS$v838
 dateobs<-dateobs[zdv]
 
 #--look at histogram and frequencies for the dep. varb.--
 hist(depvar)
 table(depvar)
 
 #--modify weight matrices---
 #--set diagonal equal to zeros--
 diag(ll)<-0
 diag(dd)<-0
 #--use only obs. where dep. varb. non-missing--
 ll<-ll[zdv,zdv]
 dd<-dd[zdv,zdv]
 #--row standardize (rows sum to one)
 ll<-ll/rowSums(ll)
 dd<-dd/rowSums(dd)
 #--check to see that rows sum to one
 rowSums(ll)
 rowSums(dd)
 #--make weight matrix object for later autocorrelation test--
 wmatll<-mat2listw(as.matrix(ll))
 wmatdd<-mat2listw(as.matrix(dd))
 
 
 indpv<-c("pre_mar_sex", "money", "foodstress", "femctrldwellg", "wealthy",
 "femsubs","foodscarc",
 "fratgrpstr","marrcaptives","plunder",###"climate",
 "exogamy","ncmallow","superjh","moralgods","fempower","sexratio",
 "war","himilexp","wagelabor",
 "famsize","settype","localjh","money",
 "cultints","roots","cereals","gath","hunt","fish",
 "anim","pigs","milk","plow","bovines","tree","foodtrade",
 "ndrymonth","ecorich",
 "popdens","pathstress","CVrain","rain","temp","AP1","AP2")
 
 
 #-----------------------------------------------------
 #---Estimate model on each imputed dataset------------
 #-----------------------------------------------------
 
 #--number of imputed datasets--
 #nimp<-10
 nimp<-3  ################################################################## speedup test
 
 #--will append values to these empty objects--
 vif<-NULL
 ss<-NULL
 beta<-NULL
 dng<-NULL
 
 #--loop through the imputed datasets--
 for (i in 1:nimp){
 
 #--select the ith imputed dataset--
 m9<-impdat[which(impdat$.imp==i),]
 #--retain only obs. for which dep. varb. is nonmissing--
 m9<-m9[zdv,]
 
 #--create spatially lagged dep. varbs.--
 y<-as.matrix(depvar)
 xx<-as.matrix(m9[,indpv])
 #--for instruments we use the spatial lag of our indep. varbs.--
 #--First, the spatially lagged varb. for distance--
 xdy<-dd%*%xx
 cyd<-dd%*%y
 o<-lm(cyd~xdy)
 #--the fitted value is our instrumental variable--
 fydd<-fitted(o)
 #--keep R2 from this regression--
 dr2<-summary(o)$r.squared
 #--Then, the spatially lagged varb. for language--
 xly<-ll%*%xx   
 cyl<-ll%*%y
 o<-lm(cyl~xly)
 #--the fitted value is our instrumental variable--
 fyll<-fitted(o)
 #--keep R2 from this regression--
 lr2<-summary(o)$r.squared
 m9<-cbind(m9,fydd,fyll)
 
 #--OLS estimate of unrestricted model--
 
 xUR<-lm(depvar~fyll+fydd+
 pre_mar_sex+money+foodstress+femctrldwellg+wealthy+
 fratgrpstr+marrcaptives+plunder+###climate+
 cultints+roots+cereals+gath+plow+
 hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 ecorich+popdens+pathstress+exogamy+ncmallow+famsize+
 settype+localjh+superjh+moralgods+fempower+femsubs+
 sexratio+war+himilexp+money+wagelabor
 ,data=m9)
 
 #--OLS estimate of restricted model--
 xR<-lm(depvar~fyll+fydd+                
 ###climate+
 roots+plow+
 hunt+fish+anim+foodtrade+exogamy+
 superjh+moralgods+fempower
 ,data=m9)
 
 # fydd+                ### fyll+ <--- ADDED fydd for distance clustering
 #  fratgrpstr+marrcaptives+plunder+   ###climate+fish+
 #  plow+                              ###cereals+gath+
 #  ###hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+
 #  ###pathstress+                             ecorich+popdens+exogamy+ncmallow+famsize+
 #  ###settype+localjh+superjh+moralgods+
 #  fempower+femsubs                   ###+
 #  ###war+himilexp  +money+wagelabor  ###sexratio+
 #  ,data=m9)
 
 #--corrected sigma2 and R2 for 2SLS--
 qxx<-m9
 qxx[,"fydd"]<-cyd
 qxx[,"fyll"]<-cyl
 b<-coef(xR)
 incpt<-matrix(1,NROW(qxx),1)
 x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]]))
 e<-y-x%*%as.matrix(b)
 cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x)))
 cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2))
 
 #--collect coefficients and their variances--
 ov<-summary(xR)
 vif<-rbind(vif,vif(xR))
 ss<-rbind(ss,diag(ov$cov*cs2))
 #--collect robust coef. variances when there is heteroskedasticity--
 #eb<-e^2
 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]]))
 #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x)
 #ss<-rbind(ss,diag(hcm))
 beta<-rbind(beta,coef(xR))
 
 #--collect some model diagnostics--
     dropt<-c("fratgrpstr","cultints","cereals","milk","popdens","settype","localjh","himilexp","wagelabor","war","sexratio","famsize",  "ncmallow","ecorich","pigs","plunder","pre_mar_sex","money","femctrldwellg","wealthy","foodscarc","pathstress","marrcaptives","foodstress","femsubs","tree","gath","bovines" )
 
 #--Ramsey RESET test--
 p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE)
 #--Wald test (H0: dropped variables have coefficient equal zero)--
 o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2]
 p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue
 #--Heteroskedasticity test (H0: homoskedastic residuals)--
 p3<-ncv.test(xR)$ChiSquare
 #--Shapiro-Wilke normality test (H0: residuals normal)
 p4<-qchisq(shapiro.test(residuals(xR))$p.value,1,lower.tail=FALSE)
 #--LaGrange Multiplier test for spatial autocorrelation: language--
 o<-lm.LMtests(xR, wmatll, test=c("LMlag"))
 p5<-as.numeric(o$LMlag$statistic)
 #--LaGrange Multiplier test for spatial autocorrelation: distance--
 o<-lm.LMtests(xR, wmatdd, test=c("LMlag"))
 p6<-as.numeric(o$LMlag$statistic)
 #--model R2--
 p7<-cr2
 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2))
 
 }
 
 
 #--------------------------------------------
 #--Rubin's formulas for combining estimates--
 #--------------------------------------------
 
 #--first find final regr. coefs. and p-values--
 mnb<-apply(beta,2,mean)
 ####################vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1)
 vrb<-colSums((beta-t(matrix(mnb,length(mnb),nimp)))^2)/(nimp-1)
 mnv<-apply(ss,2,mean)
 vrT<-mnv+vrb*(1-nimp^(-1))
 fst<-mnb^2/vrT
 r<-(1+nimp^(-1))*vrb/mnv
 v<-(nimp-1)*(1+r^(-1))^2
 pval<-pf(fst,1,v,lower.tail=FALSE)
 bbb<-data.frame(round(cbind(mnb,fst,v,pval),10)) #,3 changed to .8 for significance test
 bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
 names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF")
 
 #--Then combine the diagnostics we collected--
 dng<-data.frame(dng)
 names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd",
 "R2:final model","R2:IV(distance)","R2:IV(language)")
 r2<-apply(dng[,7:9],2,mean)
 adng<-dng[,1:6]
 mdm<-apply(adng,2,mean)
 vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1)
 aa<-4*mdm^2-2*vrd
 aa[which(aa<0)]<-0
 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5)
 vd<-(nimp-1)*(1+rd^(-1))^2
 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd)
 #-All chi-sq we collected have df=1-------
 pvald<-pf(Dm,1,vd,lower.tail=FALSE)
 ccc<-data.frame(round(cbind(Dm,vd,pvald),3))
 names(ccc)<-c("Fstat","df","pvalue")
 
 #--write results to csv file for perusal in spreadsheet--
 write.csv("==OLS model for depvar==",file="OLSresults.csv",append=FALSE)
 write.csv(bbb,file="OLSresults.csv",append=TRUE)
 write.csv(r2,file="OLSresults.csv",append=TRUE)
 write.csv(ccc,file="OLSresults.csv",append=TRUE)
 
 aaa<-c(table(depvar), NROW(depvar),depvarname)
 imp<-"number of imputations nimp="
 impute=c(imp,nimp)
 
bbb
r2
ccc
bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) #.8 changed to back to .3 for significance test
bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3)
bbb
aaa
impute

Logging Fempower didnt remove RESET --DW having a look at your progress

not clear what should have been logged.

Interesting that fyll is negative, fydd positive.

 mnb    fst           v  pval   VIF
(Intercept)  2.295  4.278      91.203 0.041    NA
fyll        -0.940  3.485    1934.111 0.062 2.252
fydd         0.492  4.446   48995.164 0.035 2.034
roots       -0.474  4.267  122280.038 0.039 1.214
plow         0.953 10.002   58471.995 0.002 1.741
hunt        -0.178  8.454    5789.773 0.004 1.567
fish        -0.125  5.471  562331.911 0.019 1.319
anim        -0.267 18.954     995.664 0.0002 2.021
foodtrade    0.018  4.011 5133922.582 0.045 1.207
exogamy     -0.204  7.654   26028.450 0.006 1.074
superjh      0.587 41.503    3449.695 0.0000000001 1.744
moralgods    0.196  4.741    1004.977 0.030 1.540
fempower     0.242  0.817       8.562 0.391 1.131

(you left off the R2 and diagnostics) DRW RERUN:

                   coef     Fstat          ddf     pvalue   VIF
(Intercept)  2.11549231  3.847829 9.798975e+02 0.05009324    NA
fyll        -0.90891797  3.283457 6.787713e+03 0.07002553 2.244 <- negative on language
fydd         0.49596925  4.520688 1.917518e+07 0.03348737 2.034
roots       -0.47381415  4.236001 1.149779e+08 0.03957528 1.215
plow         0.95749750  9.920426 4.269428e+04 0.00163567 1.760
hunt        -0.18504478  9.023295 9.428143e+03 0.00267260 1.571
fish        -0.12614962  5.536005 3.104774e+04 0.01863526 1.318
anim        -0.28010379 19.939583 1.873217e+02 0.00001379 2.028
foodtrade    0.01873274  4.248579 5.189531e+03 0.03933290 1.199
exogamy     -0.19025095  6.649897 1.259677e+04 0.00992739 1.067
superjh      0.56960390 36.268465 1.414129e+02 0.00000001 1.773
moralgods    0.21863101  5.333031 5.491904e+01 0.02471200 1.520
fempower     0.28426556  1.594840 2.331993e+02 0.20789726 1.085
 R2:final model R2:IV(distance) R2:IV(language) 
      0.5865162       0.9811534       0.9772505 <-- 59% very high r2)
                 Fstat          df pvalue
RESET           13.974     189.742  0.000 <- which variable to log?
Wald on restrs.  0.084    5328.628  0.773
NCV              8.438      53.792  0.005
SWnormal        10.732    1317.135  0.001
lagll            1.489 1279956.371  0.222
lagdd            2.543   32429.985  0.111

B| YOUR FIRST RESULTS Restricted depvar:________

Personal tools