Difference between revisions of "EduMod-26"
(→A|after removing non significant junk :)) |
(→A|after removing non significant junk :)) |
||
Line 2,632: | Line 2,632: | ||
==A|after removing non significant junk :) == | ==A|after removing non significant junk :) == | ||
+ | DRW: Here's your error | ||
+ | + #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | + #--Stage 2 OLS estimate of restricted model-- | ||
+ | + ###xR<-lm(depvar ~ fyll + cultints + roots + fish + | ||
+ | + ### exogamy + settype + femsubs, data = m9) | ||
+ | + xR<-lm(depvar~fyll+dateobs+ #1#fydd | ||
+ | + cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum | ||
+ | + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ | ||
+ | Error: unexpected symbol in: | ||
+ | " cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum | ||
+ | anim" | ||
+ | '''[[User:Douglas R. White|Doug]] 10:27, 3 November 2009 (PST) You forgot the + after cereals | ||
+ | NEXT ERROR: | ||
+ | > #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | > #--Stage 2 OLS estimate of restricted model-- | ||
+ | > ###xR<-lm(depvar ~ fyll + cultints + roots + fish + | ||
+ | > ### exogamy + settype + femsubs, data = m9) | ||
+ | > xR<-lm(depvar~fyll+dateobs+ #1#fydd | ||
+ | + cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum | ||
+ | + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ | ||
+ | + +ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ | ||
+ | + settype+localjh+superjh+moralgods+fempower+femsubs+ | ||
+ | + sexratio+war+himilexp+money+wagelabor+ | ||
+ | + migr+brideprice+nuclearfam+pctFemPolyg | ||
+ | + +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy | ||
+ | + , data = m9) ###ADDED | ||
+ | Error in inherits(x, "data.frame") : object "m9" not found | ||
+ | Error: (DRW: extra + before ecorich | ||
+ | RUN AGAIN AND LEARN HOW TO FIND AND COPY, SCAN, the first error | ||
+ | |||
+ | Program 1 --> Program 2 | ||
+ | #MI--make the imputed datasets | ||
+ | #--change the following path to the directory with your data and program-- | ||
+ | setwd("C:/My Documents/MI") | ||
+ | rm(list=ls(all=TRUE)) | ||
+ | options(echo=TRUE) | ||
+ | #--you need the following two packages--you must install them first-- | ||
+ | library(foreign) | ||
+ | library(mice) | ||
+ | library(tripak) | ||
+ | library(zoo) | ||
+ | library(sp) | ||
+ | library(maptools) | ||
+ | library(spam) | ||
+ | |||
+ | #--To find the citation for a package, use this function:--- | ||
+ | citation("mice") | ||
+ | |||
+ | #----------------------------- | ||
+ | #--Read in data, rearrange---- | ||
+ | #----------------------------- | ||
+ | |||
+ | #--Read in auxiliary variables--- | ||
+ | load("vaux.Rdata",.GlobalEnv) | ||
+ | row.names(vaux)<-NULL | ||
+ | #--Read in the SCCS dataset--- | ||
+ | load("SCCS.Rdata",.GlobalEnv) | ||
+ | |||
+ | #--look at first 6 rows of vaux-- | ||
+ | head(vaux) | ||
+ | #--look at field names of vaux-- | ||
+ | names(vaux) | ||
+ | #--check to see that rows are properly aligned in the two datasets-- | ||
+ | #--sum should equal 186--- | ||
+ | sum((SCCS$socname==vaux$socname)*1) | ||
+ | #--remove the society name field-- | ||
+ | vaux<-vaux[,-28] | ||
+ | names(vaux) | ||
+ | |||
+ | #--Two nominal variables: brg and rlg---- | ||
+ | #--brg: consolidated Burton Regions----- | ||
+ | #0 = (rest of world) circumpolar, South and Meso-America, west North America | ||
+ | #1 = Subsaharan Africa | ||
+ | #2 = Middle Old World | ||
+ | #3 = Southeast Asia, Insular Pacific, Sahul | ||
+ | #4 = Eastern Americas | ||
+ | #--rlg: Religion--- | ||
+ | #'0 (no world religion)' | ||
+ | #'1 (Christianity)' | ||
+ | #'2 (Islam)' | ||
+ | #'3 (Hindu/Buddhist)' | ||
+ | |||
+ | #--check to see number of missing values in vaux, | ||
+ | #--whether variables are numeric, | ||
+ | #--and number of discrete values for each variable--- | ||
+ | vvn<-names(vaux) | ||
+ | pp<-NULL | ||
+ | for (i in 1:length(vvn)){ | ||
+ | nmiss<-length(which(is.na(vaux[,vvn[i]]))) | ||
+ | numeric<-is.numeric(vaux[,vvn[i]]) | ||
+ | numDiscrVals<-length(table(vaux[,vvn[i]])) | ||
+ | pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) | ||
+ | } | ||
+ | row.names(pp)<-vvn | ||
+ | pp | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | #--extract variables to be used from SCCS, put in dataframe fx-- | ||
+ | fx<-data.frame( | ||
+ | socname=SCCS$socname,socID=SCCS$"sccs#", | ||
+ | valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), | ||
+ | dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, | ||
+ | cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, | ||
+ | fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, | ||
+ | pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, | ||
+ | bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, | ||
+ | foodtrade=SCCS$v819,foodscarc=SCCS$v1685, | ||
+ | ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, | ||
+ | CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, | ||
+ | AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, | ||
+ | exogamy=SCCS$v72,ncmallow=SCCS$v227, ### famsize=SCCS$v80, | ||
+ | settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, | ||
+ | moralgods=SCCS$v238,fempower=SCCS$v663, | ||
+ | sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), | ||
+ | war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, | ||
+ | money=SCCS$v155,wagelabor=SCCS$v1732, | ||
+ | migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, | ||
+ | nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, | ||
+ | nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, | ||
+ | segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED | ||
+ | |||
+ | #--look at first 6 rows of fx-- | ||
+ | head(fx) | ||
+ | |||
+ | #--check to see number of missing values-- | ||
+ | #--also check whether numeric-- | ||
+ | vvn<-names(fx) | ||
+ | pp<-NULL | ||
+ | for (i in 1:length(vvn)){ | ||
+ | nmiss<-length(which(is.na(fx[,vvn[i]]))) | ||
+ | numeric<-is.numeric(fx[,vvn[i]]) | ||
+ | pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) | ||
+ | } | ||
+ | row.names(pp)<-vvn | ||
+ | pp | ||
+ | |||
+ | #--identify variables with missing values-- | ||
+ | z<-which(pp[,1]>0) | ||
+ | zv1<-vvn[z] | ||
+ | zv1 | ||
+ | #--identify variables with non-missing values-- | ||
+ | z<-which(pp[,1]==0) | ||
+ | zv2<-vvn[z] | ||
+ | zv2 | ||
+ | |||
+ | #----------------------------- | ||
+ | #----Multiple imputation------ | ||
+ | #----------------------------- | ||
+ | |||
+ | #--number of imputed data sets to create-- | ||
+ | nimp<-10 | ||
+ | #--one at a time, loop through those variables with missing values-- | ||
+ | for (i in 1:length(zv1)){ | ||
+ | #--attach the imputand to the auxiliary data-- | ||
+ | zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) | ||
+ | #--in the following line, the imputation is done-- | ||
+ | aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") | ||
+ | #--during first iteration of the loop, create dataframe impdat-- | ||
+ | if (i==1){ | ||
+ | impdat<-data.frame(aqq[,c(".id",".imp")]) | ||
+ | } | ||
+ | #--the imputand is placed as a field in impdat and named-- | ||
+ | impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) | ||
+ | names(impdat)[NCOL(impdat)]<-zv1[i] | ||
+ | } | ||
+ | |||
+ | #--now the non-missing variables are attached to impdat-- | ||
+ | gg<-NULL | ||
+ | for (i in 1:nimp){ | ||
+ | gg<-rbind(gg,data.frame(fx[,zv2])) | ||
+ | } | ||
+ | impdat<-cbind(impdat,gg) | ||
+ | |||
+ | #--take a look at the top 6 and bottom 6 rows of impdat-- | ||
+ | head(impdat) | ||
+ | tail(impdat) | ||
+ | |||
+ | #--impdat is saved as an R-format data file-- | ||
+ | save(impdat,file="impdat.Rdata") | ||
+ | |||
+ | |||
+ | |||
+ | Program 2 | ||
+ | #MI--estimate model with network-lagged dependent variables, combine results | ||
+ | rm(list=ls(all=TRUE)) | ||
+ | #--Set path to your directory with data and program-- | ||
+ | setwd("C:/My Documents/MI") | ||
+ | options(echo=TRUE) | ||
+ | |||
+ | #--need these packages for estimation and diagnostics-- | ||
+ | library(foreign) | ||
+ | library(spdep) | ||
+ | library(car) | ||
+ | library(lmtest) | ||
+ | library(sandwich) | ||
+ | |||
+ | #----------------------------- | ||
+ | #--Read in data, rearrange---- | ||
+ | #----------------------------- | ||
+ | |||
+ | #--Read in original SCCS data--- | ||
+ | load("SCCS.Rdata",.GlobalEnv) | ||
+ | #--Read in two weight matrices-- | ||
+ | ll<-as.matrix(read.dta("langwm.dta")[,-1]) | ||
+ | dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) | ||
+ | #--Read in the imputed dataset--- | ||
+ | load("impdat.Rdata",.GlobalEnv) | ||
+ | |||
+ | #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT | ||
+ | #--create dep.varb. you wish to use from SCCS data-- | ||
+ | #--Here we sum variables measuring how much a society values children-- | ||
+ | #--can replace "sum" with "max" | ||
+ | ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) | ||
+ | depvar<-SCCS$v740###NEW | ||
+ | #--find obs. for which dep. varb. is non-missing-- | ||
+ | zdv<-which(!is.na(depvar)) | ||
+ | depvar<-depvar[zdv] | ||
+ | #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED | ||
+ | ###depvarname<-"childvar" | ||
+ | depvarname<-"fam_arr" | ||
+ | #--can add additional SCCS variable, but only if it has no missing values--- | ||
+ | #dateobs<-SCCS$v838 | ||
+ | #dateobs<-dateobs[zdv] | ||
+ | |||
+ | #--look at frequencies and quartiles for the dep. varb.-- | ||
+ | summary(depvar) | ||
+ | table(depvar) | ||
+ | |||
+ | #--modify weight matrices--- | ||
+ | #--set diagonal equal to zeros-- | ||
+ | diag(ll)<-0 | ||
+ | diag(dd)<-0 | ||
+ | #--use only obs. where dep. varb. non-missing-- | ||
+ | ll<-ll[zdv,zdv] | ||
+ | dd<-dd[zdv,zdv] | ||
+ | #--row standardize (rows sum to one) | ||
+ | ll<-ll/rowSums(ll) | ||
+ | dd<-dd/rowSums(dd) | ||
+ | #--make weight matrix object for later autocorrelation test-- | ||
+ | wmatll<-mat2listw(as.matrix(ll)) | ||
+ | wmatdd<-mat2listw(as.matrix(dd)) | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", | ||
+ | "fempower","sexratio","war","himilexp","wagelabor","settype", #1# "famsize", | ||
+ | "localjh","money","cultints","roots","cereals","gath","hunt","fish", | ||
+ | "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", | ||
+ | "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", | ||
+ | "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", | ||
+ | "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED | ||
+ | |||
+ | #----------------------------------------------------- | ||
+ | #---Estimate model on each imputed dataset------------ | ||
+ | #----------------------------------------------------- | ||
+ | |||
+ | #--number of imputed datasets-- | ||
+ | nimp<-10 | ||
+ | |||
+ | #--will append values to these empty objects-- | ||
+ | vif<-NULL | ||
+ | ss<-NULL | ||
+ | beta<-NULL | ||
+ | dng<-NULL | ||
+ | |||
+ | #--loop through the imputed datasets-- | ||
+ | for (i in 1:nimp){ | ||
+ | |||
+ | #--select the ith imputed dataset-- | ||
+ | m9<-impdat[which(impdat$.imp==i),] | ||
+ | #--retain only obs. for which dep. varb. is nonmissing-- | ||
+ | m9<-m9[zdv,] | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | #--create spatially lagged dep. varbs. in stage 1 OLS-- | ||
+ | y<-as.matrix(depvar) | ||
+ | xx<-as.matrix(m9[,indpv]) | ||
+ | #--for instruments we use the spatial lag of our indep. varbs.-- | ||
+ | #--First, the spatially lagged varb. for distance-- | ||
+ | xdy<-dd%*%xx | ||
+ | cyd<-dd%*%y | ||
+ | o<-lm(cyd~xdy) | ||
+ | #--the fitted value is our instrumental variable-- | ||
+ | fydd<-fitted(o) | ||
+ | #--keep R2 from this regression-- | ||
+ | dr2<-summary(o)$r.squared | ||
+ | #--Then, the spatially lagged varb. for language-- | ||
+ | xly<-ll%*%xx | ||
+ | cyl<-ll%*%y | ||
+ | o<-lm(cyl~xly) | ||
+ | #--the fitted value is our instrumental variable-- | ||
+ | fyll<-fitted(o) | ||
+ | #--keep R2 from this regression-- | ||
+ | lr2<-summary(o)$r.squared | ||
+ | m9<-cbind(m9,fydd,fyll) | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | #--Stage 2 OLS estimate of unrestricted model-- | ||
+ | xUR<-lm(depvar~fyll+fydd+dateobs+ | ||
+ | cultints+roots+cereals+gath+plow+ | ||
+ | hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ | ||
+ | ecorich+popdens+pathstress+exogamy+ncmallow+ ### famsize+ | ||
+ | settype+localjh+superjh+moralgods+fempower+femsubs+ | ||
+ | sexratio+war+himilexp+money+wagelabor+ | ||
+ | migr+brideprice+nuclearfam+pctFemPolyg+ | ||
+ | nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | #--Stage 2 OLS estimate of restricted model-- | ||
+ | ###xR<-lm(depvar ~ fyll + cultints + roots + fish + | ||
+ | ### exogamy + settype + femsubs, data = m9) | ||
+ | xR<-lm(depvar~fyll+dateobs+ #1#fydd | ||
+ | cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum | ||
+ | ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ | ||
+ | settype+localjh+superjh+moralgods+fempower+femsubs+ | ||
+ | |||
+ | migr+brideprice+nuclearfam+pctFemPolyg | ||
+ | +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy | ||
+ | , data = m9) ###ADDED | ||
+ | |||
+ | #--corrected sigma2 and R2 for 2SLS-- | ||
+ | qxx<-m9 | ||
+ | qxx[,"fydd"]<-cyd | ||
+ | qxx[,"fyll"]<-cyl | ||
+ | b<-coef(xR) | ||
+ | incpt<-matrix(1,NROW(qxx),1) | ||
+ | x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) | ||
+ | e<-y-x%*%as.matrix(b) | ||
+ | cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) | ||
+ | cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) | ||
+ | |||
+ | #--collect coefficients and their variances-- | ||
+ | ov<-summary(xR) | ||
+ | vif<-rbind(vif,vif(xR)) | ||
+ | ss<-rbind(ss,diag(ov$cov*cs2)) | ||
+ | #--collect robust coef. variances when there is heteroskedasticity-- | ||
+ | #eb<-e^2 | ||
+ | #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) | ||
+ | #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) | ||
+ | #ss<-rbind(ss,diag(hcm)) | ||
+ | beta<-rbind(beta,coef(xR)) | ||
+ | |||
+ | #MODIFY THESE STATEMENTS FOR A NEW PROJECT | ||
+ | #--collect some model diagnostics-- | ||
+ | dropt<-c("cereals","gath","plow","hunt","anim","dateobs", | ||
+ | "pigs","milk","bovines","foodscarc","ecorich","localjh", #1# "famsize", | ||
+ | "superjh","moralgods","fempower","sexratio","money", | ||
+ | "fydd","wagelabor","war","himilexp","tree","foodtrade") | ||
+ | |||
+ | |||
+ | #--Ramsey RESET test-- | ||
+ | p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) | ||
+ | #--Wald test (H0: dropped variables have coefficient equal zero)-- | ||
+ | o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] | ||
+ | p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue | ||
+ | #--Heteroskedasticity test (H0: homoskedastic residuals)-- | ||
+ | p3<-ncv.test(xR)$ChiSquare | ||
+ | #--Shapiro-Wilke normality test (H0: residuals normal) | ||
+ | p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) | ||
+ | #--LaGrange Multiplier test for spatial autocorrelation: language-- | ||
+ | o<-lm.LMtests(xR, wmatll, test=c("LMlag")) | ||
+ | p5<-as.numeric(o$LMlag$statistic) | ||
+ | #--LaGrange Multiplier test for spatial autocorrelation: distance-- | ||
+ | o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) | ||
+ | p6<-as.numeric(o$LMlag$statistic) | ||
+ | #--model R2-- | ||
+ | p7<-cr2 | ||
+ | dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) | ||
+ | |||
+ | } | ||
+ | |||
+ | #-------------------------------------------- | ||
+ | #--Rubin's formulas for combining estimates-- | ||
+ | #-------------------------------------------- | ||
+ | |||
+ | #--first find final regr. coefs. and p-values-- | ||
+ | mnb<-apply(beta,2,mean) | ||
+ | vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) | ||
+ | mnv<-apply(ss,2,mean) | ||
+ | vrT<-mnv+vrb*(1-nimp^(-1)) | ||
+ | fst<-mnb^2/vrT | ||
+ | r<-(1+nimp^(-1))*vrb/mnv | ||
+ | v<-(nimp-1)*(1+r^(-1))^2 | ||
+ | pval<-pf(fst,1,v,lower.tail=FALSE) | ||
+ | bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) | ||
+ | bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) | ||
+ | names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") | ||
+ | |||
+ | #--Then combine the diagnostics we collected-- | ||
+ | dng<-data.frame(dng) | ||
+ | names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", | ||
+ | "R2:final model","R2:IV(distance)","R2:IV(language)") | ||
+ | r2<-apply(dng[,7:9],2,mean) | ||
+ | adng<-dng[,1:6] | ||
+ | mdm<-apply(adng,2,mean) | ||
+ | vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) | ||
+ | aa<-4*mdm^2-2*vrd | ||
+ | aa[which(aa<0)]<-0 | ||
+ | rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) | ||
+ | vd<-(nimp-1)*(1+rd^(-1))^2 | ||
+ | Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) | ||
+ | #-All chi-sq we collected have df=1------- | ||
+ | pvald<-pf(Dm,1,vd,lower.tail=FALSE) | ||
+ | ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) | ||
+ | names(ccc)<-c("Fstat","df","pvalue") | ||
+ | |||
+ | bbb | ||
+ | r2 | ||
+ | ccc | ||
+ | |||
+ | #Corrected to publication version with depvarname | ||
+ | #--write results to csv file for perusal in spreadsheet-- | ||
+ | write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) | ||
+ | write.csv(bbb,file="OLSresults.csv",append=TRUE) | ||
+ | write.csv(r2,file="OLSresults.csv",append=TRUE) | ||
+ | write.csv(ccc,file="OLSresults.csv",append=TRUE) | ||
==Hopefully the final results!== | ==Hopefully the final results!== |
Revision as of 13:09, 5 November 2009
Abiha Bilgrami - RUN THE MAIN PROGRAM AND THEN RUN THE WORKSHOP PROGRAM. Copy this entire file to your EduMod-X page as instructed.
- Doug's sequential link: EduMod-27: (your dep var here) Imputation and Regression to next EduMod
- Partner 18: EduMod-18: (your dep var here) Imputation and Regression
- Partner 26: EduMod-26: (your dep var here) Imputation and Regression
- Partner 40: EduMod-40: (your dep var here) Imputation and Regression
- Template 11: EduMod-11: (your dep var here) Imputation and Regression
This isnt the way to do it --- see ==9 A| ...== below Doug 19:56, 28 October 2009 (PDT)
Doug 15:48, 4 November 2009 (PST) Abiha, now how have it, I ran your last program, added results: Print. then just Recopy the program, Remove from xR<- all but the items to keep (fydd, fyll and signif). Rerun the new copy of the program. Those could be your final results.
depvar<-apply(SCCS[,c ("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v740 #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(mararr)) mararr<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED depvarname<-"marrarr" #--can add additional SCCS variable, but only if it has no missing values--- dateobs<-SCCS$v838 dateobs<-dateobs[zdv]
Contents
- 1 Your workshop
- 2 Workshop program test of original edumod with marrarr depvar
- 3 Results
- 4 Programs: Copy and paste GOOD START PAGE
- 5 Links
- 6 Copy of Amanda's Step 1 depvar v667 "rape"
- 7 First Try=
- 8 A| (incorrect as to depvar) GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="marrarr"
- 9 A| GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="rape"
- 10 A| GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="mar_arr"
- 11 B| Good Results
- 12 A| after eliminating high VIFs and nonsignificant
- 13 Results
- 14 A|after removing non significant junk :)
- 15 Hopefully the final results!
Your workshop
Back to Imputing_data_for_Regression_Analysis#EduMod
Your main work will be done here after you get your first results with the (4 Programs: Copy and paste) program and paste them below. The demonstration program (Eff and Dow') shows only the final step not the intermediate ones. Here I post the last part of the program. Here you start again to re-estimate the model, this time substituting in xR= (final variables so that xR=xUR (all independent variables). That is done by copying the list of variables that you see after
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model--
(but dont copy the xUR<-lm itself) down to replace the list of xR<-lm variables (the function lm(depvar~list of indep vars) is what does the multiple linear regression, lm="linear model". You will see I have ###commented out the original xR<-lm. Note that for your project rather than this demo you will want also to change your depvar and add to the list of independent variables, working from the codebook, using the variables numbers, like v860, and giving them names in program 1. You probably dont have to change the excluded variables under
#--collect some model diagnostics--
(NOTE: all program statements in the wiki must have a space in the first column)
Now, in a real project, the results you will get by setting xR==xUR need to culled into two sets: those with p<.05 and those to exclude. The analysis of excluded variables when properly specified (the two sets should really be mutually exclusive) will tell you whether you have omitted anything that should be excluded. Read EFF and DOW carefully for this step.
IMPORTANT: put both the FULL SET of results for all your xUR variables, which you will obtain here, and the CULLED SET of results for your xR variables, into the RESULTS section of this page so you and I can check whether you have done the analysis correctly.
Workshop program test of original edumod with marrarr depvar
Program 1 and 2: Modified for all xR=xUR except dateobs - should take about 40 seconds #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("c:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-SCCS$v740 #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED depvarname<-"mar-arr" #--can add additional SCCS variable, but only if it has no missing values--- dateobs<-SCCS$v838 dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","famsize","settype", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg") #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg ,data=m9) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(depvar ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) ###THIS IS JUST PROGRAM 2 EXCEPT DOUG WHITE REDEFINED xR=same as xUR but deleted "dateobs," in line 2 xR<-lm(depvar~fyll+fydd+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg ,data=m9) #--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich", "popdens","pathstress","ncmallow","famsize","localjh", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
Results
From workshop
all xR=xUR except dateobs. Your next step if this were a new project would be to keep xR=only those with p<.05
bbb coef Fstat ddf pvalue VIF (Intercept) -3.778 0.048 3518.772 0.827 NA fyll 2.092 5.493 72408.940 0.019 4.311 fydd -0.701 1.525 27950.568 0.217 3.570 cultints 1.090 3.768 6088.808 0.052 5.222 roots -4.595 3.656 3015.253 0.056 5.091 cereals -1.476 0.386 1699.391 0.534 7.508 gath -0.492 0.925 3771.955 0.336 3.224 plow -2.235 1.249 5935.504 0.264 3.178 hunt -0.162 0.083 7873.798 0.773 5.378 fish 0.273 0.432 1534.074 0.511 3.200 anim -0.138 0.079 22553.987 0.779 5.968 pigs 0.560 0.092 3241.095 0.762 2.233 milk -1.520 0.681 5301.829 0.409 3.970 bovines 1.870 1.011 2777.386 0.315 4.355 tree -5.090 2.826 9236.350 0.093 3.306 foodtrade 0.091 3.101 6431.573 0.078 1.613 foodscarc -0.385 0.947 490.565 0.331 1.306 ecorich -0.198 0.178 1018.154 0.673 1.843 popdens -0.377 0.503 5074.414 0.478 3.888 pathstress -0.119 0.391 4055.907 0.532 2.841 exogamy -0.927 4.464 13709.866 0.035 1.478 ncmallow -0.137 0.387 925.286 0.534 1.682 famsize 0.297 0.335 3180.405 0.563 2.211 settype -0.532 2.115 3169.088 0.146 4.289 localjh -0.572 0.326 1678.280 0.568 1.889 superjh -0.080 0.019 1443.457 0.890 2.719 moralgods 0.162 0.079 274.976 0.779 2.265 fempower 0.302 1.043 219.597 0.308 1.380 femsubs 0.932 5.737 16349.251 0.017 1.880 sexratio -0.369 0.152 114.773 0.697 1.385 war -0.104 1.804 356.775 0.180 1.396 himilexp 1.149 1.009 712.762 0.315 1.621 money 0.369 0.700 6880.529 0.403 2.362 wagelabor -0.959 1.862 85.903 0.176 1.537 migr 0.352 0.092 248.280 0.762 1.567 brideprice -0.927 0.559 8133.950 0.455 2.032 nuclearfam -0.886 0.383 2955.185 0.536 2.374 pctFemPolyg 0.003 0.013 278.994 0.908 1.831 > r2 R2:final model R2:IV(distance) R2:IV(language) 0.2735421 0.9259708 0.9666807 > ccc Fstat df pvalue RESET 2.619 159.018 0.108 Wald on restrs. 0.286 200.401 0.594 NCV -0.015 190.566 1.000 SWnormal 0.172 1898.653 0.678 lagll 3.103 530583.685 0.078 lagdd 4.352 402962.502 0.037
Programs: Copy and paste GOOD START PAGE
These 2 programs took 48 seconds on my computer Doug 08:57, 1 October 2009 (PDT) Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI")) rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), marrtrans=(SCCS$v208+SCCS$v209+SCCS$v605),himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872 ) #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" #1# depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v740 #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(mararr)) mararr<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED depvarname<-"marrarr" #--can add additional SCCS variable, but only if it has no missing values--- dateobs<-SCCS$v838 dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(mararr) table(mararr) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("grlsage","edulevel","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","famsize","settype", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg") #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(marrarr) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(marrarr~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg ,data=m9) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- xR<-lm(marrarr ~ fyll + cultints + roots + fish + exogamy + settype + femsubs, data = m9) #--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich", "popdens","pathstress","ncmallow","famsize","localjh", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",marrarrname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
Links
- Evolution and Human Behaviour: Sexual selection under parental choice in Agropastoral societies Bourde and Green 1983
Citation: Sexual selection under parental choice: the role of parents in the evolution of human mating Evolution and Human Behavior, Volume 28, Issue 6, November 2007, Pages 403-409 Menelaos Apostolou
- School-Age Pregnancy and Parenthood
Chapter 14: The duration of maidenhood across cultures
- Cross-Cultural Patterning of Some Sexual Attitudes and Practices1
Gwen J. Broude http://ccr.sagepub.com/cgi/content/abstract/11/4/227
- Annual Review of Anthropology
Vol. 16: 143-160 (Volume publication date October 1987) (doi:10.1146/annurev.an.16.100187.001043) Cross-Cultural Surveys Today M L Burton, and D R White http://arjournals.annualreviews.org/doi/abs/10.1146/annurev.an.16.100187.001043
- Causes of Conjugal Dissolution: A Cross-cultural Study
Laura Betzig Current Anthropology, Vol. 30, No. 5 (Dec., 1989), pp. 654-676 Published by: The University of Chicago Press on behalf of Wenner-Gren Foundation for Anthropological Research
http://www.jstor.org/stable/2743579
- Factors of Sexual Freedom Among Foragers in Cross-Cultural Perspective
Andrey V. Korotayev http://ccr.sagepub.com/cgi/content/abstract/37/1/29
- Wife–Husband Intimacy and Female Status in Cross-Cultural Perspective
Victor C. de Munck http://ccr.sagepub.com/cgi/content/abstract/41/4/307
- Puberty Rites as Clues to the Nature of Human Adolescence
Glenn Weisfeld http://ccr.sagepub.com/cgi/content/abstract/31/1/27
- Assumptions on Sex and Society in the Biosocial Theory of Incest
Lewellyn Hendrix http://ccr.sagepub.com/cgi/content/abstract/33/2/193
- Romance, Parenthood, and Gender in a Modern African Society
Daniel Jordan Smith Ethnology, Vol. 40, No. 2 (Spring, 2001), pp. 129-151 Published by: University of Pittsburgh- Of the Commonwealth System of Higher Education
http://www.jstor.org/stable/3773927
Copy of Amanda's Step 1 depvar v667 "rape"
depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED depvarname<-"child value"
to
###depvar<-apply(SCCS[,c("v740")],1,sum) "marr-arr"<-SCCS$740 ###ADDED Doug 12:45, 15 October 2009 (PDT) #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"child value" TAKEN OUT depvarname<-"marr-arr"
First Try=
- This doesn't make sense, if I am looking at significance of depvars for marriage arrangements then why is it showing errors.
- Variables are
>Girl's age
>the level of education of the potential groom/bride
>marital residence
>agro pastoral society
A| (incorrect as to depvar) GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="marrarr"
Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI")) rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v667###NEW #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"childvar" depvarname<-"rape" #--can add additional SCCS variable, but only if it has no missing values--- #dateobs<-SCCS$v838 #dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(marrarr) table(marrarr) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","famsize","settype", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(marrarr) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(marrarr~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg+ nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(marrarr ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) xR<-lm(marrarr~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy , data = m9) ###ADDED
#--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich", "popdens","pathstress","ncmallow","famsize","localjh", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc
#Corrected to publication version with depvarname #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
B| RESULTS!!!
.id .imp valchild femsubs foodscarc ndrymonth exogamy ncmallow superjh 1 1 1 20 4 3 12 5 3 2 2 2 1 28 4 4 7 4 8 1 3 3 1 28 5 4 7 5 5 3 4 4 1 20 4 1 6 3 8 4 5 5 1 28 5 1 4 5 3 3 6 6 1 32 7 1 6 4 3 4 moralgods fempower sexratio marrtrans himilexp wagelabor migr nuclearfam 1 1 6 2 8 1 1 0 1 2 3 6 2 14 0 1 0 0 3 1 5 1 12 0 3 1 0 4 2 6 2 12 1 1 1 0 5 2 5 3 12 1 1 1 0 6 2 7 2 10 0 1 0 0 pctFemPolyg socname socID cultints roots cereals gath hunt fish anim 1 12 Nama Hottentot 1 1 0 0 1 3 1 5 2 19 Kung Bushmen 2 1 0 0 8 2 0 0 3 66 Thonga 3 3 0 1 0 1 1 3 4 54 Lozi 4 5 0 1 1 2 1 2 5 45 Mbundu 5 3 0 1 1 1 1 2 6 41 Suku 6 3 1 0 1 2 1 0 pigs milk plow bovines tree foodtrade ecorich popdens pathstress CVrain 1 0 1 0 1 0 5 2 1 8 5.4952936 2 0 0 0 0 0 0 2 1 10 1.2035850 3 0 1 0 1 0 5 5 5 11 0.3019896 4 0 1 0 1 0 5 5 3 16 0.3315805 5 0 0 0 1 0 0 5 3 15 0.1832097 6 0 0 0 0 0 5 5 3 18 0.0825547 rain temp AP1 AP2 famsize settype localjh money brideprice 1 1 7 12 3 2 1 1 1 0 2 1 7 20 4 4 1 2 1 0 3 1 2 16 4 5 7 1 3 1 4 1 2 21 6 2 3 1 1 0 5 1 2 19 4 2 7 2 4 1 6 1 2 18 2 4 7 2 4 1 > tail(impdat) .id .imp valchild femsubs foodscarc ndrymonth exogamy ncmallow superjh 1855 181 10 16 4 1 0 3 5 1 1856 182 10 10 2 4 2 3 2 1 1857 183 10 20 3 1 0 3 8 1 1858 184 10 30 3 4 10 4 4 1 1859 185 10 28 1 1 8 3 1 1 1860 186 10 36 6 1 0 5 8 1 moralgods fempower sexratio marrtrans himilexp wagelabor migr nuclearfam 1855 2 6 3 18 0 1 0 1 1856 2 7 2 18 0 1 0 0 1857 1 6 1 12 0 3 0 1 1858 3 6 2 12 1 2 1 0 1859 2 5 2 10 0 1 0 0 1860 4 7 2 14 0 1 1 1 pctFemPolyg socname socID cultints roots cereals gath hunt fish anim 1855 80 Cayua 181 3 1 0 2 2 1 0 1856 6 Lengua 182 2 1 0 2 5 2 0 1857 10 Abipon 183 1 0 0 2 6 1 1 1858 10 Mapuche 184 5 0 1 1 0 1 2 1859 2 Tehuelche 185 1 0 0 2 7 1 0 1860 10 Yahgan 186 1 0 0 1 2 7 0 pigs milk plow bovines tree foodtrade ecorich popdens pathstress CVrain 1855 0 0 0 0 0 0 4 1 12 0.1182235 1856 0 0 0 0 0 0 4 1 11 0.2388077 1857 0 0 0 0 0 0 4 1 12 0.1240110 1858 0 1 0 1 0 5 4 4 8 0.5305867 1859 0 0 0 0 0 5 2 1 7 3.7490359 1860 0 0 0 0 0 0 4 1 7 0.3865824 rain temp AP1 AP2 famsize settype localjh money brideprice 1855 1 3 20 4 4 7 1 1 0 1856 1 3 19 4 4 1 2 1 0 1857 1 3 17 2 2 1 1 1 1 1858 3 3 20 4 2 6 3 3 1 1859 2 7 10 0 4 1 2 1 0 1860 4 4 14 3 2 1 1 1 0 > > #--impdat is saved as an R-format data file-- > save(impdat,file="impdat.Rdata") > > > > Program 2 Error: unexpected numeric constant in " Program 2" > #MI--estimate model with network-lagged dependent variables, combine results > rm(list=ls(all=TRUE)) > #--Set path to your directory with data and program-- > setwd("C:/My Documents/MI") > options(echo=TRUE) > > #--need these packages for estimation and diagnostics-- > library(foreign) > library(spdep) > library(car) > library(lmtest) > library(sandwich) > > #----------------------------- > #--Read in data, rearrange---- > #----------------------------- > > #--Read in original SCCS data--- > load("SCCS.Rdata",.GlobalEnv) > #--Read in two weight matrices-- > ll<-as.matrix(read.dta("langwm.dta")[,-1]) > dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) > #--Read in the imputed dataset--- > load("impdat.Rdata",.GlobalEnv) > > #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT > #--create dep.varb. you wish to use from SCCS data--
A| GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="rape"
Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI") rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227, ### famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v667###NEW #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"childvar" depvarname<-"rape" #--can add additional SCCS variable, but only if it has no missing values--- #dateobs<-SCCS$v838 #dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","settype", #1# "famsize", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+ ### famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg+ nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(depvar ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) xR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy , data = m9) ###ADDED
#--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich","localjh", #1# "famsize", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc
#Corrected to publication version with depvarname #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
A| GOOD START HERE FOR OTHER PROJECTS Expanded xR to full xUR- ALL indvars w/ depvar="mar_arr"
Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI") rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227, ### famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v740###NEW #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"childvar" depvarname<-"fam_arr" #--can add additional SCCS variable, but only if it has no missing values--- #dateobs<-SCCS$v838 #dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","settype", #1# "famsize", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+ ### famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg+ nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(depvar ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) xR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ +ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy , data = m9) ###ADDED
#--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich","localjh", #1# "famsize", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc
#Corrected to publication version with depvarname #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
B| Good Results
740. Marriage Arrangements (Female)
35 . = Missing data 12 1 = Individual selects and/or courts partner autonomously: approval by parents or others unnecessary 40 2 = Individual selects and/or courts partner authonomously: parental, kin, and/or community approval necessary or highly desireable 4 3 = Individual suggests partner to parents or others; arrangements for courtship or marriage then proceed if choice is approved OR parents ask approval of individuals to initiate a match OR individual is approached by parent or others on behalf of suitor and can accept or reject the match 27 4 = Individual choice and arranged marriages are alternatives 35 5 = Parents choose partner: individual can object 33 6 = Parents choose partner: individual cannot easily object or rarely objects in fact > bbb coef Fstat ddf pvalue VIF (Intercept) 0.659 0.013 596.315 0.909 NA fyll 1.083 0.839 666.037 0.360 2.499 fydd -0.214 0.101 717.302 0.751 2.778 dateobs -0.001 1.092 1493.955 0.296 1.450 cultints 0.316 2.980 4523.850 0.084 5.682 <-- close roots -0.639 0.632 1689.992 0.427 5.895 cereals -1.567 3.925 8010.649 0.048 9.040 <-- significant gath 0.148 0.903 3537.236 0.342 3.146 plow 0.060 0.008 9095.234 0.928 3.439 hunt -0.116 0.421 15888.612 0.517 6.160 fish 0.036 0.069 2939.733 0.793 3.527 anim 0.139 0.775 20701.768 0.379 5.695 pigs -0.238 0.169 896.278 0.681 2.681 milk -0.975 2.182 426.180 0.140 4.511 bovines 0.850 1.677 1555.312 0.195 5.270 tree -0.365 0.139 2290.526 0.709 3.958 foodtrade 0.022 1.695 4058.163 0.193 1.765 foodscarc 0.002 0.000 85.497 0.986 1.424 ecorich -0.068 0.190 1257.310 0.663 2.076 popdens -0.263 2.519 3448.404 0.113 3.947 pathstress -0.032 0.291 1729.590 0.590 2.828 exogamy 0.221 2.624 1035.425 0.106 1.541 <--close ncmallow -0.016 0.051 836.707 0.821 1.711 settype 0.087 0.490 1081.303 0.484 4.814 localjh 0.404 1.514 928.159 0.219 2.054 superjh 0.069 0.113 1148.963 0.737 3.202 moralgods 0.156 0.819 536.541 0.366 2.211 fempower -0.136 2.063 411.225 0.152 1.526 femsubs -0.172 1.833 1813.909 0.176 1.889 sexratio -0.162 0.197 33.968 0.660 1.589 war 0.005 0.026 177.670 0.873 1.976 himilexp -0.178 0.244 1219.469 0.621 1.720 money 0.017 0.014 1437.071 0.906 2.585 wagelabor -0.027 0.009 29.054 0.925 1.791 migr 0.001 0.000 62.654 0.998 1.721 brideprice 0.380 0.655 733.055 0.419 2.720 nuclearfam 0.311 0.389 338.098 0.533 2.773 pctFemPolyg 0.019 6.354 199.049 0.012 2.124 <-- significant nonmatrel 0.097 0.153 100.504 0.697 1.644 lrgfam 0.024 0.155 2060.069 0.694 2.698 malesexag 0.001 0.000 53.640 0.992 1.628 segadlboys -0.096 0.525 427.013 0.469 1.606 agrlateboy 0.190 3.267 97.330 0.074 1.724 <-- close > r2 R2:final model R2:IV(distance) R2:IV(language) 0.3735736 0.9101640 0.9012827 > ccc Fstat df pvalue RESET 1.167 392.140 0.281 Wald on restrs. 2.098 317.314 0.148 NCV 0.060 840.607 0.806 SWnormal 2.014 1534.979 0.156 lagll 2.734 178750.663 0.098 lagdd 2.804 905118.375 0.094
A| after eliminating high VIFs and nonsignificant
DRW: Here's your error
+ #MODIFY THESE STATEMENTS FOR A NEW PROJECT + #--Stage 2 OLS estimate of restricted model-- + ###xR<-lm(depvar ~ fyll + cultints + roots + fish + + ### exogamy + settype + femsubs, data = m9) + xR<-lm(depvar~fyll+dateobs+ #1#fydd + cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ Error: unexpected symbol in: " cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum anim" Doug 10:27, 3 November 2009 (PST) You forgot the + after cereals NEXT ERROR: > #MODIFY THESE STATEMENTS FOR A NEW PROJECT > #--Stage 2 OLS estimate of restricted model-- > ###xR<-lm(depvar ~ fyll + cultints + roots + fish + > ### exogamy + settype + femsubs, data = m9) > xR<-lm(depvar~fyll+dateobs+ #1#fydd + cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ + +ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ + settype+localjh+superjh+moralgods+fempower+femsubs+ + sexratio+war+himilexp+money+wagelabor+ + migr+brideprice+nuclearfam+pctFemPolyg + +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy + , data = m9) ###ADDED Error in inherits(x, "data.frame") : object "m9" not found Error: (DRW: extra + before ecorich RUN AGAIN AND LEARN HOW TO FIND AND COPY, SCAN, the first error
Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI") rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227, ### famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v740###NEW #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"childvar" depvarname<-"fam_arr" #--can add additional SCCS variable, but only if it has no missing values--- #dateobs<-SCCS$v838 #dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","settype", #1# "famsize", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+ ### famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg+ nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(depvar ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) xR<-lm(depvar~fyll+dateobs+ #1#fydd cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy , data = m9) ###ADDED
#--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich","localjh", #1# "famsize", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc
#Corrected to publication version with depvarname #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)
Results
http://eclectic.ss.uci.edu/~drwhite/courses/SCCCodes.htm
740. . Marriage Arrangements (Female) 35 . = Missing data 12 1 = Individual selects and/or courts partner autonomously: approval by parents or others unnecessary 40 2 = Individual selects and/or courts partner authonomously: parental, kin, and/or community approval necessary or highly desireable 4 3 = Individual suggests partner to parents or others; arrangements for courtship or marriage then proceed if choice is approved OR parents ask approval of individuals to initiate a match OR individual is approached by parent or others on behalf of suitor and can accept or reject the match 27 4 = Individual choice and arranged marriages are alternatives 35 5 = Parents choose partner: individual can object 33 6 = Parents choose partner: individual cannot easily object or rarely objects in fact
> bbb coef Fstat ddf pvalue VIF (Intercept) -0.060 0.000 2692.881 0.990 NA fyll 1.078 1.190 7298.215 0.275 1.940 <-- signif (keep) dateobs -0.001 1.051 3134.859 0.305 1.414 <-- signif (keep) cultints 0.261 3.398 1763.344 0.065 3.407 <-- signif (keep) cereals -1.043 6.474 2480.224 0.011 2.446 <-- signif (keep) anim 0.115 0.863 452.855 0.353 3.209 pigs -0.217 0.165 976.853 0.685 2.366 milk -0.860 2.094 1644.421 0.148 4.030 bovines 0.822 2.042 6146.618 0.153 4.322 tree 0.323 0.270 5314.739 0.603 1.682 foodtrade 0.022 1.854 2342.212 0.173 1.709 foodscarc 0.009 0.005 205.104 0.942 1.329 ecorich -0.060 0.166 1542.483 0.684 1.919 popdens -0.281 3.158 2587.804 0.076 3.679 <-- signif (keep) pathstress -0.050 0.852 1988.531 0.356 2.394 exogamy 0.297 5.067 1105.644 0.025 1.489 ncmallow -0.014 0.043 1232.077 0.836 1.598 settype 0.074 0.542 981.914 0.462 3.210 localjh 0.484 2.305 667.928 0.129 1.972 superjh 0.053 0.072 2452.986 0.789 3.105 moralgods 0.123 0.538 241.338 0.464 2.012 fempower -0.151 2.718 198.127 0.101 1.404 femsubs -0.153 1.852 1625.433 0.174 1.517 sexratio -0.158 0.248 38.922 0.621 1.466 war 0.006 0.058 306.361 0.810 1.741 himilexp -0.167 0.247 2008.805 0.619 1.561 money 0.069 0.245 1693.899 0.621 2.440 wagelabor -0.070 0.105 68.445 0.747 1.517 migr -0.219 0.287 67.033 0.594 1.694 brideprice 0.206 0.242 1955.443 0.623 2.310 nuclearfam 0.394 0.736 899.097 0.391 2.580 pctFemPolyg 0.020 9.234 299.922 0.003 1.822 <-- signif (keep) nonmatrel -0.010 0.002 151.180 0.964 1.415 lrgfam 0.024 0.174 1608.112 0.677 2.455 malesexag 0.010 0.008 90.312 0.929 1.425 segadlboys -0.101 0.650 289.232 0.421 1.462 agrlateboy 0.189 4.355 493.123 0.037 1.605 > r2 R2:final model R2:IV(distance) R2:IV(language) 0.3588984 0.9127452 0.9033638 > ccc Fstat df pvalue RESET 2.564 385.657 0.110 Wald on restrs. 2.588 297.945 0.109 NCV 0.083 10087.842 0.773 SWnormal 2.986 227.374 0.085 lagll 2.454 4523601.494 0.117 lagdd 2.190 3523.439 0.139
A|after removing non significant junk :)
DRW: Here's your error
+ #MODIFY THESE STATEMENTS FOR A NEW PROJECT + #--Stage 2 OLS estimate of restricted model-- + ###xR<-lm(depvar ~ fyll + cultints + roots + fish + + ### exogamy + settype + femsubs, data = m9) + xR<-lm(depvar~fyll+dateobs+ #1#fydd + cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ Error: unexpected symbol in: " cultints+cereals #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum anim" Doug 10:27, 3 November 2009 (PST) You forgot the + after cereals NEXT ERROR: > #MODIFY THESE STATEMENTS FOR A NEW PROJECT > #--Stage 2 OLS estimate of restricted model-- > ###xR<-lm(depvar ~ fyll + cultints + roots + fish + > ### exogamy + settype + femsubs, data = m9) > xR<-lm(depvar~fyll+dateobs+ #1#fydd + cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum + anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ + +ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ + settype+localjh+superjh+moralgods+fempower+femsubs+ + sexratio+war+himilexp+money+wagelabor+ + migr+brideprice+nuclearfam+pctFemPolyg + +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy + , data = m9) ###ADDED Error in inherits(x, "data.frame") : object "m9" not found Error: (DRW: extra + before ecorich RUN AGAIN AND LEARN HOW TO FIND AND COPY, SCAN, the first error
Program 1 --> Program 2 #MI--make the imputed datasets #--change the following path to the directory with your data and program-- setwd("C:/My Documents/MI") rm(list=ls(all=TRUE)) options(echo=TRUE) #--you need the following two packages--you must install them first-- library(foreign) library(mice) library(tripak) library(zoo) library(sp) library(maptools) library(spam) #--To find the citation for a package, use this function:--- citation("mice") #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in auxiliary variables--- load("vaux.Rdata",.GlobalEnv) row.names(vaux)<-NULL #--Read in the SCCS dataset--- load("SCCS.Rdata",.GlobalEnv) #--look at first 6 rows of vaux-- head(vaux) #--look at field names of vaux-- names(vaux) #--check to see that rows are properly aligned in the two datasets-- #--sum should equal 186--- sum((SCCS$socname==vaux$socname)*1) #--remove the society name field-- vaux<-vaux[,-28] names(vaux) #--Two nominal variables: brg and rlg---- #--brg: consolidated Burton Regions----- #0 = (rest of world) circumpolar, South and Meso-America, west North America #1 = Subsaharan Africa #2 = Middle Old World #3 = Southeast Asia, Insular Pacific, Sahul #4 = Eastern Americas #--rlg: Religion--- #'0 (no world religion)' #'1 (Christianity)' #'2 (Islam)' #'3 (Hindu/Buddhist)' #--check to see number of missing values in vaux, #--whether variables are numeric, #--and number of discrete values for each variable--- vvn<-names(vaux) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(vaux[,vvn[i]]))) numeric<-is.numeric(vaux[,vvn[i]]) numDiscrVals<-length(table(vaux[,vvn[i]])) pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals)) } row.names(pp)<-vvn pp #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--extract variables to be used from SCCS, put in dataframe fx-- fx<-data.frame( socname=SCCS$socname,socID=SCCS$"sccs#", valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476), dateobs=SCCS$v838,cultints=SCCS$v232,roots=(SCCS$v233==5)*1, cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204, fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890, pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1, bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1, foodtrade=SCCS$v819,foodscarc=SCCS$v1685, ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260, CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855, AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196, exogamy=SCCS$v72,ncmallow=SCCS$v227, ### famsize=SCCS$v80, settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237, moralgods=SCCS$v238,fempower=SCCS$v663, sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115), war=SCCS$v1648,himilexp=(SCCS$v899==1)*1, money=SCCS$v155,wagelabor=SCCS$v1732, migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1, nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872, nonmatrel=SCCS$v52,lrgfam=SCCS$v68,malesexag=SCCS$v175, segadlboys=SCCS$v242,agrlateboy=SCCS$v300) ###ADDED #--look at first 6 rows of fx-- head(fx) #--check to see number of missing values-- #--also check whether numeric-- vvn<-names(fx) pp<-NULL for (i in 1:length(vvn)){ nmiss<-length(which(is.na(fx[,vvn[i]]))) numeric<-is.numeric(fx[,vvn[i]]) pp<-rbind(pp,cbind(nmiss,data.frame(numeric))) } row.names(pp)<-vvn pp #--identify variables with missing values-- z<-which(pp[,1]>0) zv1<-vvn[z] zv1 #--identify variables with non-missing values-- z<-which(pp[,1]==0) zv2<-vvn[z] zv2 #----------------------------- #----Multiple imputation------ #----------------------------- #--number of imputed data sets to create-- nimp<-10 #--one at a time, loop through those variables with missing values-- for (i in 1:length(zv1)){ #--attach the imputand to the auxiliary data-- zxx<-data.frame(cbind(vaux,fx[,zv1[i]])) #--in the following line, the imputation is done-- aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long") #--during first iteration of the loop, create dataframe impdat-- if (i==1){ impdat<-data.frame(aqq[,c(".id",".imp")]) } #--the imputand is placed as a field in impdat and named-- impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)])) names(impdat)[NCOL(impdat)]<-zv1[i] } #--now the non-missing variables are attached to impdat-- gg<-NULL for (i in 1:nimp){ gg<-rbind(gg,data.frame(fx[,zv2])) } impdat<-cbind(impdat,gg) #--take a look at the top 6 and bottom 6 rows of impdat-- head(impdat) tail(impdat) #--impdat is saved as an R-format data file-- save(impdat,file="impdat.Rdata") Program 2 #MI--estimate model with network-lagged dependent variables, combine results rm(list=ls(all=TRUE)) #--Set path to your directory with data and program-- setwd("C:/My Documents/MI") options(echo=TRUE) #--need these packages for estimation and diagnostics-- library(foreign) library(spdep) library(car) library(lmtest) library(sandwich) #----------------------------- #--Read in data, rearrange---- #----------------------------- #--Read in original SCCS data--- load("SCCS.Rdata",.GlobalEnv) #--Read in two weight matrices-- ll<-as.matrix(read.dta("langwm.dta")[,-1]) dd<-as.matrix(read.dta("dist25wm.dta")[,c(-1,-2,-189)]) #--Read in the imputed dataset--- load("impdat.Rdata",.GlobalEnv) #HERE YOU CHANGE HOW THE DEPENDENT VARIABLE IS COMPUTED FOR A NEW PROJECT #--create dep.varb. you wish to use from SCCS data-- #--Here we sum variables measuring how much a society values children-- #--can replace "sum" with "max" ###depvar<-apply(SCCS[,c("v473","v474","v475","v476")],1,sum) depvar<-SCCS$v740###NEW #--find obs. for which dep. varb. is non-missing-- zdv<-which(!is.na(depvar)) depvar<-depvar[zdv] #HERE GIVE THE "NAME" OF THE DEPENDENT VARIABLE THAT IS COMPUTED ###depvarname<-"childvar" depvarname<-"fam_arr" #--can add additional SCCS variable, but only if it has no missing values--- #dateobs<-SCCS$v838 #dateobs<-dateobs[zdv] #--look at frequencies and quartiles for the dep. varb.-- summary(depvar) table(depvar) #--modify weight matrices--- #--set diagonal equal to zeros-- diag(ll)<-0 diag(dd)<-0 #--use only obs. where dep. varb. non-missing-- ll<-ll[zdv,zdv] dd<-dd[zdv,zdv] #--row standardize (rows sum to one) ll<-ll/rowSums(ll) dd<-dd/rowSums(dd) #--make weight matrix object for later autocorrelation test-- wmatll<-mat2listw(as.matrix(ll)) wmatdd<-mat2listw(as.matrix(dd)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT indpv<-c("femsubs","foodscarc","exogamy","ncmallow","superjh","moralgods", "fempower","sexratio","war","himilexp","wagelabor","settype", #1# "famsize", "localjh","money","cultints","roots","cereals","gath","hunt","fish", "anim","pigs","milk","plow","bovines","tree","foodtrade","dateobs", "ndrymonth","ecorich","popdens","pathstress","CVrain","rain", "temp","AP1","AP2","migr","brideprice","nuclearfam","pctFemPolyg", "nonmatrel","lrgfam","malesexag","segadlboys","agrlateboy") ###ADDED #----------------------------------------------------- #---Estimate model on each imputed dataset------------ #----------------------------------------------------- #--number of imputed datasets-- nimp<-10 #--will append values to these empty objects-- vif<-NULL ss<-NULL beta<-NULL dng<-NULL #--loop through the imputed datasets-- for (i in 1:nimp){ #--select the ith imputed dataset-- m9<-impdat[which(impdat$.imp==i),] #--retain only obs. for which dep. varb. is nonmissing-- m9<-m9[zdv,] #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--create spatially lagged dep. varbs. in stage 1 OLS-- y<-as.matrix(depvar) xx<-as.matrix(m9[,indpv]) #--for instruments we use the spatial lag of our indep. varbs.-- #--First, the spatially lagged varb. for distance-- xdy<-dd%*%xx cyd<-dd%*%y o<-lm(cyd~xdy) #--the fitted value is our instrumental variable-- fydd<-fitted(o) #--keep R2 from this regression-- dr2<-summary(o)$r.squared #--Then, the spatially lagged varb. for language-- xly<-ll%*%xx cyl<-ll%*%y o<-lm(cyl~xly) #--the fitted value is our instrumental variable-- fyll<-fitted(o) #--keep R2 from this regression-- lr2<-summary(o)$r.squared m9<-cbind(m9,fydd,fyll) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of unrestricted model-- xUR<-lm(depvar~fyll+fydd+dateobs+ cultints+roots+cereals+gath+plow+ hunt+fish+anim+pigs+milk+bovines+tree+foodtrade+foodscarc+ ecorich+popdens+pathstress+exogamy+ncmallow+ ### famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ sexratio+war+himilexp+money+wagelabor+ migr+brideprice+nuclearfam+pctFemPolyg+ nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy,data=m9) ###ADDED
#MODIFY THESE STATEMENTS FOR A NEW PROJECT #--Stage 2 OLS estimate of restricted model-- ###xR<-lm(depvar ~ fyll + cultints + roots + fish + ### exogamy + settype + femsubs, data = m9) xR<-lm(depvar~fyll+dateobs+ #1#fydd cultints+cereals+ #2#roots #3#gath #4#hunt #4#plow #5# fish #5#anum ecorich+popdens+pathstress+exogamy+ncmallow+ ###famsize+ settype+localjh+superjh+moralgods+fempower+femsubs+ migr+brideprice+nuclearfam+pctFemPolyg +nonmatrel+lrgfam+malesexag+segadlboys+agrlateboy , data = m9) ###ADDED
#--corrected sigma2 and R2 for 2SLS-- qxx<-m9 qxx[,"fydd"]<-cyd qxx[,"fyll"]<-cyl b<-coef(xR) incpt<-matrix(1,NROW(qxx),1) x<-as.matrix(cbind(incpt,qxx[,names(b)[-1]])) e<-y-x%*%as.matrix(b) cs2<-as.numeric(t(e)%*%e/(NROW(x)-NCOL(x))) cr2<-as.numeric(1-t(e)%*%e/sum((y-mean(y))^2)) #--collect coefficients and their variances-- ov<-summary(xR) vif<-rbind(vif,vif(xR)) ss<-rbind(ss,diag(ov$cov*cs2)) #--collect robust coef. variances when there is heteroskedasticity-- #eb<-e^2 #x<-as.matrix(cbind(incpt,m9[,names(b)[-1]])) #hcm<-inv(t(x)%*%x)%*%t(x)%*%diag(eb[1:length(eb)])%*%x%*%inv(t(x)%*%x) #ss<-rbind(ss,diag(hcm)) beta<-rbind(beta,coef(xR)) #MODIFY THESE STATEMENTS FOR A NEW PROJECT #--collect some model diagnostics-- dropt<-c("cereals","gath","plow","hunt","anim","dateobs", "pigs","milk","bovines","foodscarc","ecorich","localjh", #1# "famsize", "superjh","moralgods","fempower","sexratio","money", "fydd","wagelabor","war","himilexp","tree","foodtrade") #--Ramsey RESET test-- p1<-qchisq(resettest(xR,type="fitted")$"p.value",1,lower.tail=FALSE) #--Wald test (H0: dropped variables have coefficient equal zero)-- o<-linear.hypothesis(xUR,dropt,test="Chisq")$"Pr(>Chisq)"[2] p2<-qchisq(o,1,lower.tail=FALSE) #find Chisq with 1 d.f. and same pvalue #--Heteroskedasticity test (H0: homoskedastic residuals)-- p3<-ncv.test(xR)$ChiSquare #--Shapiro-Wilke normality test (H0: residuals normal) p4<-qchisq(shapiro.test(e)$p.value,1,lower.tail=FALSE) #--LaGrange Multiplier test for spatial autocorrelation: language-- o<-lm.LMtests(xR, wmatll, test=c("LMlag")) p5<-as.numeric(o$LMlag$statistic) #--LaGrange Multiplier test for spatial autocorrelation: distance-- o<-lm.LMtests(xR, wmatdd, test=c("LMlag")) p6<-as.numeric(o$LMlag$statistic) #--model R2-- p7<-cr2 dng<-rbind(dng,cbind(p1,p2,p3,p4,p5,p6,p7,dr2,lr2)) } #-------------------------------------------- #--Rubin's formulas for combining estimates-- #-------------------------------------------- #--first find final regr. coefs. and p-values-- mnb<-apply(beta,2,mean) vrb<-colSums((beta-t(matrix(mnb,length(mnb),10)))^2)/(nimp-1) mnv<-apply(ss,2,mean) vrT<-mnv+vrb*(1-nimp^(-1)) fst<-mnb^2/vrT r<-(1+nimp^(-1))*vrb/mnv v<-(nimp-1)*(1+r^(-1))^2 pval<-pf(fst,1,v,lower.tail=FALSE) bbb<-data.frame(round(cbind(mnb,fst,v,pval),3)) bbb$VIF[2:NROW(bbb)]<-round(apply(vif,2,mean),3) names(bbb)<-c("coef","Fstat","ddf","pvalue","VIF") #--Then combine the diagnostics we collected-- dng<-data.frame(dng) names(dng)<-c("RESET","Wald on restrs.","NCV","SWnormal","lagll","lagdd", "R2:final model","R2:IV(distance)","R2:IV(language)") r2<-apply(dng[,7:9],2,mean) adng<-dng[,1:6] mdm<-apply(adng,2,mean) vrd<-colSums((adng-t(matrix(mdm,length(mdm),nimp)))^2)/(nimp-1) aa<-4*mdm^2-2*vrd aa[which(aa<0)]<-0 rd<-(1+nimp^(-1))*vrd/(2*mdm+aa^.5) vd<-(nimp-1)*(1+rd^(-1))^2 Dm<-(mdm-(nimp-1)/(nimp+1)*rd)/(1+rd) #-All chi-sq we collected have df=1------- pvald<-pf(Dm,1,vd,lower.tail=FALSE) ccc<-data.frame(round(cbind(Dm,vd,pvald),3)) names(ccc)<-c("Fstat","df","pvalue") bbb r2 ccc
#Corrected to publication version with depvarname #--write results to csv file for perusal in spreadsheet-- write.csv(paste("2SLS model for ",depvarname,sep=""),file="OLSresults.csv", append=FALSE) write.csv(bbb,file="OLSresults.csv",append=TRUE) write.csv(r2,file="OLSresults.csv",append=TRUE) write.csv(ccc,file="OLSresults.csv",append=TRUE)