EduMod 0: 1-Make the imputed datasets
From InterSciWiki
Contents |
1
EduMod-0: Sum(valchild) Imputation and Regression
- Imputing_the_data#Eff_and_Dow
- EduMod-1: Max(valchild) Imputation and Regression - a variant of the dependent variable
2
3
Program 1 (9-22-09) current Eff and Dow publication version
You must edit the new working directory and comment out any duplicates! Then copy the needed files into the new working directory #Program 2 Estimate model, combine results
##MI--make the imputed datasets
#--change the following path to the directory with your data and program--
###setwd("d:/projects/MI")
setwd("C:/My Documents/MI")
rm(list=ls(all=TRUE))
options(echo=TRUE)
#--you need the following two packages--you must install them first--
library(foreign)
library(mice)
#--To find the citation for a package, use this function:---
citation("mice")
#-----------------------------
#--Read in data, rearrange----
#-----------------------------
#--Read in auxilliary variables---
load("vaux.Rdata",.GlobalEnv)
row.names(vaux)<-NULL
#--Read in the SCCS dataset---
load("SCCS.Rdata",.GlobalEnv)
#--look at first 6 rows of vaux--
head(vaux)
#--look at field names of vaux--
names(vaux)
#--check to see that rows are properly aligned in the two datasets--
#--sum should equal 186---
sum((SCCS$socname==vaux$socname)*1)
#--remove the society name field--
vaux<-vaux[,-28]
names(vaux)
#--Two nominal variables: brg and rlg----
#--brg: consolidated Burton Regions-----
#0 = (rest of world) circumpolar, South and Meso-America, west North America
#1 = Subsaharan Africa
#2 = Middle Old World
#3 = Southeast Asia, Insular Pacific, Sahul
#4 = Eastern Americas
#--rlg: Religion---
#'0 (no world religion)'
#'1 (Christianity)'
#'2 (Islam)'
#'3 (Hindu/Buddhist)'
#--check to see number of missing values in vaux,
#--whether variables are numeric,
#--and number of discrete values for each variable---
vvn<-names(vaux)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(vaux[,vvn[i]])))
numeric<-is.numeric(vaux[,vvn[i]])
numDiscrVals<-length(table(vaux[,vvn[i]]))
pp<-rbind(pp,cbind(data.frame(numeric),nmiss,numDiscrVals))
}
row.names(pp)<-vvn
pp
#--extract variables to be used from SCCS, put in dataframe fx--
fx<-data.frame(
socname=SCCS$socname,socID=SCCS$"sccs#",
valchild=(SCCS$v473+SCCS$v474+SCCS$v475+SCCS$v476),
cultints=SCCS$v232,roots=(SCCS$v233==5)*1,
cereals=(SCCS$v233==6)*1,gath=SCCS$v203,hunt=SCCS$v204,
fish=SCCS$v205,anim=SCCS$v206,femsubs=SCCS$v890,
pigs=(SCCS$v244==2)*1,milk=(SCCS$v245>1)*1,plow=(SCCS$v243>1)*1,
bovines=(SCCS$v244==7)*1,tree=(SCCS$v233==4)*1,
foodtrade=SCCS$v819,foodscarc=SCCS$v1685,
ecorich=SCCS$v857,popdens=SCCS$v156,pathstress=SCCS$v1260,
CVrain=SCCS$v1914/SCCS$v1913,rain=SCCS$v854,temp=SCCS$v855,
AP1=SCCS$v921,AP2=SCCS$v928,ndrymonth=SCCS$v196,
exogamy=SCCS$v72,ncmallow=SCCS$v227,famsize=SCCS$v80,
settype=SCCS$v234,localjh=(SCCS$v236-1),superjh=SCCS$v237,
moralgods=SCCS$v238,fempower=SCCS$v663,
sexratio=1+(SCCS$v1689>85)+(SCCS$v1689>115),
war=SCCS$v1648,himilexp=(SCCS$v899==1)*1,
money=SCCS$v155,wagelabor=SCCS$v1732,
migr=(SCCS$v677==2)*1,brideprice=(SCCS$v208==1)*1,
nuclearfam=(SCCS$v210<=3)*1,pctFemPolyg=SCCS$v872
)
#--look at first 6 rows of fx--
head(fx)
#--check to see number of missing values--
#--also check whether numeric--
vvn<-names(fx)
pp<-NULL
for (i in 1:length(vvn)){
nmiss<-length(which(is.na(fx[,vvn[i]])))
numeric<-is.numeric(fx[,vvn[i]])
pp<-rbind(pp,cbind(nmiss,data.frame(numeric)))
}
row.names(pp)<-vvn
pp
#--identify variables with missing values--
z<-which(pp[,1]>0)
zv1<-vvn[z]
zv1
#--identify variables with non-missing values--
z<-which(pp[,1]==0)
zv2<-vvn[z]
zv2
#-----------------------------
#----Multiple imputation------
#-----------------------------
#--number of imputed data sets to create--
nimp<-10
#--one at a time, loop through those variables with missing values--
for (i in 1:length(zv1)){
#--attach the imputand to the auxiliary data--
zxx<-data.frame(cbind(vaux,fx[,zv1[i]]))
#--in the following line, the imputation is done--
aqq<-complete(mice(zxx,maxit=100,m=nimp),action="long")
#--during first iteration of the loop, create dataframe impdat--
if (i==1){
impdat<-data.frame(aqq[,c(".id",".imp")])
}
#--the imputand is placed as a field in impdat and named--
impdat<-cbind(impdat,data.frame(aqq[,NCOL(zxx)]))
names(impdat)[NCOL(impdat)]<-zv1[i]
}
#--now the non-missing variables are attached to impdat--
gg<-NULL
for (i in 1:nimp){
gg<-rbind(gg,data.frame(fx[,zv2]))
}
impdat<-cbind(impdat,gg)
#--take a look at the top 6 and bottom 6 rows of impdat--
head(impdat)
tail(impdat)
#--impdat is saved as an R-format data file--
save(impdat,file="impdat.Rdata")

