EAv17.AWC2

From InterSciWiki
Jump to: navigation, search

AWC

Script R Gui SCCS to pull AWC out of EA September Version

setwd("/Users/drwhite/Desktop/R")
library(mice)
library(foreign)
library(stringr)
library(AER)
library(spdep)
library(psych)
library(geosphere) 
library(relaimpo)
library(linprog)
library(dismo)
library(forward)
library(pastecs)
library(classInt)
library(maps)   ###not  library(maps) ) - corrected
library(plyr)
library(aod)
library(reshape)
 library(RColorBrewer)
 library(XML)
 library(tm)
 library(mlogit)
library(mapproj) #trying URL 'http://cran.rstudio.com/bin/macosx/leopard/contrib/2.15/mapproj_1.2-1.tgz'
#library(map) used by Eff
#load(url("http://dl.dropbox.com/u/9256203/DEf01b.Rdata"), .GlobalEnv)  # dont use b
#ls()  #-can see the objects contained in DEf01b.Rdata
#Anthon added a field called "awc" to EA and EAfact in the workspace DEF01c. To use the AWC subset of EA, do the following:
load(url("http://dl.dropbox.com/u/9256203/DEf01c.Rdata"),.GlobalEnv)
setDS("EA")
dim(dx)
dx<-dx[which(!is.na(dx$awc)),]
dim(dx)
dim(dxf)
dxf<-dxf[which(!is.na(dxf$awc)),]
dim(dxf)
#The number of rows in dx should fall from 1265 to 558. 
#load(url("http://dl.dropbox.com/u/9256203/DEf01.Rdata"), .GlobalEnv)
ls()  #-can see the objects contained in DEf01b.Rdata
#setDS("EA") # Warning message: In sqrt(1 - a) : NaNs produced
#names(dx)
dx$code #the EA codes, e.g., Af32" "Af33" "Af34"
#This transformation works:
wddbkup<-wdd
wllbkup<-wll
dx$code[1:10] #= "Aa1" "Ab1" "Ab2" "Ac1" "Ac2" "Ad1" "Ad2" "Ae1" "Ae2" "Af1"
setwd("/Users/drwhite/Desktop/R")
AWC561<-read.csv("EA_AWC.csv", header=TRUE) 
EAawc<-AWC561$EA_AWC
#set dx$eanumber=1 if EAawc exists 
#So then the model is here, e.g., for patrilineal, but they are in EA - the idea being is that we bring more but not all EA (those in AWC) into EA which will now have about 559 variables, many missing SCCS codes *dpV v17 (EA) RiV' sdalt,bio.5,bio.9,v73,v6.d1,v66,v68,v34 NOW READ THE AWC CODES

load(url("http://dl.dropbox.com/u/9256203/DEf01.Rdata"), .GlobalEnv)
ls()  #-can see the objects contained in DEf01.Rdata
setDS("EA")
dx$largepat=dx$v17  
addesc("largepat","largepat")
#=== list and modify variables for use in model===
# --make new variables-- 
###dx$BridePrice<-dx
# --create new dummy variables
mkdummy("v6", 1) ## [1] "Dummy variable named v6.d1" Bridewealth
evm <- c("bio.4","largepat","sdalt","bio.5","bio.9","v73","v36","v38","v37","v6.d1","v66","v68","v70","v34","v35") ### v70 xtra #add dpV name = largepat
smi <- doMI(evm, nimp = 2, maxit = 3)
AWC   <-dx$code              #THE 1267 codes in EA : "Aa1"  "Aa2"  "Aa3"  "Aa4"
EAawc<-AWC561$EA_AWC  #THE SUBSET OF 561 codes in AWC : "Aa1"  "Aa2"  "Aa3"  "Aa5"  "Aa7"  
##FIND the  SUBSET EAawc<- (IN) AWC$EA_AWC  = AWC   <-dx$code
#set dx$eanumber=1 if EAawc exists in EA dx$code  <--- need to pull out those societies in EA that belong to EAawc
ind <- c()
for(i in 1:length(EAawc)){ind<-c(ind,which(EA$code==EAawc[i]))}  #N=563 ind  <--- this show that it works, pulling out the number 1-largest # for those in AWC
ind[1-10] # [1]    1  101  102  202  636  726    2  404  405  411  103  104  203  204  302  303    4  728  731  737  105  748  752  106  206  304  6 
#AT THIS POINT ONE COULD PULL OUT THE ENTIRE SUBSET OF N-561 AWC CASES WITHIN EA (EFF?) but for a given model, just take all the cases that are present in what's needed for the model, e.g. evm and other  TOLGA
#Anyway, TEST h<- for EAf01
#MADE CORRECT h for DEf01 -- THE WHOLE EA SAMPLE
dpV <- "largepat"
UiV <- c("bio.4","sdalt","bio.5","bio.9","v73","v36","v38","v37","v6.d1","v66","v68","v70","v34")
RiV <- c("sdalt","bio.5","bio.9","v73","v6.d1","v66","v34") 
h <- doOLS(smi, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = NULL, dw = TRUE, lw = TRUE, ew = FALSE, stepW = TRUE, relimp = TRUE, slmtests = FALSE)


#---NOW WE NEED THE SAME MODEL FOR THE n=565 AWC sample, We'll add avoidance data and additional evm variables as needed for our larger study---#
#---IF POSSIBLE THIS SUBSET METHOD COULD BE APPLIED TO THE ENTIRE SAMPLE ALONG WITH THE SUBSETTED dW, lW, and eW plus a comparable compression
#---of the completely coded variables used for imputation. This n=565 'quality subset dataset' isn't going to change!  It exhausts the cases of the AWC.
#---The big advantages of making AWC-Atlas a 5th database is that it can be crossed with SCCS or LRB or WNAI to form a quality database that will include
#---G P Murdock's Kin behavior data along with their the SCCS variables OR one of the two samples (LRB or WNAI) that are rich with forager societies.     ---#
z <- rep(FALSE,length(EA$code))
for(i in 1:length(ind)){z[ind[i]]=TRUE}
wdd<-wdd[z,z]
wll<-wll[z,z]
wee<-wee[z,z]
dx <- dx[z,]
z1 <- rep(z,5)
smi1<-smi[z1,]
 # OK TO HERE
#Error in `[.data.frame`(smi, z1, ) :   (subscript) logical subscript too long
#q <- doLogit(smi1, depvar = dpV, indpv = UiV, rindpv = RiV, dw = TRUE, lw = TRUE, ew = FALSE, doboot = 1000, mean.data = TRUE,  getismat = FALSE, othexog = NULL)
#Error: could not find function "doLogit"
#OR AND THIS SEEMS TO BE WRONG:  MATCH THE h<-- to the 01b
h <- doOLS(smi2, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = NULL, dw = TRUE, lw = TRUE, ew = FALSE, stepW = TRUE, boxcox = FALSE, getismat = FALSE, relimp = TRUE, slmtests = FALSE, haustest = c("v213.d3"), mean.data = TRUE, doboot = 500)
#OR
h <- doOLS(smi, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = oxog, dw = TRUE,  lw = TRUE, ew = FALSE, stepW = TRUE, relimp = TRUE, slmtests = FALSE)
CSVwrite(h, "AWCv17olsresultsNo.eW", FALSE)
h <- doOLS(smi, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = oxog, dw = TRUE,  lw = TRUE, ew = TRUE, stepW = TRUE, relimp = TRUE, slmtests = FALSE)
CSVwrite(h, "AWCv17olsresults.eW", FALSE)
#==October 5th==
#AWC
c()
for(i in 1:length(EAawc)){ind<-c(ind,which(EA$code==EAawc[i]))}  #N=563 ind
# [1]    1  101  102  202  636  726    2  404  405  411  103  104  203  204  302  303    4  728  731  737  105  748  752  106  206  304  6 
# OK TO HERE
z <- rep(FALSE,length(EA$code))
for(i in 1:length(ind)){z[ind[i]]=TRUE}
wdd<-wdd[z,z]
wll<-wll[z,z]
wee<-wee[z,z]
dx <- dx[z,]
z1 <- rep(z,5)
smi1<-smi[z1,]
#Error in `[.data.frame`(smi, z1, ) :   (subscript) logical subscript too long
#q <- doLogit(smi1, depvar = dpV, indpv = UiV, rindpv = RiV, dw = TRUE, lw = TRUE, ew = FALSE, doboot = 1000, mean.data = TRUE,  getismat = FALSE, othexog = NULL)
#Error: could not find function "doLogit"
#OR AND THIS SEEMS TO BE WRONG:  MATCH THE h<-- to the 01b
h <- doOLS(smi2, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = NULL, dw = TRUE, lw = TRUE, ew = FALSE, stepW = TRUE, boxcox = FALSE, getismat = FALSE, relimp = TRUE, slmtests = FALSE, haustest = c("v213.d3"), mean.data = TRUE, doboot = 500)
#OR
h <- doOLS(smi, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = oxog, dw = TRUE,  lw = TRUE, ew = FALSE, stepW = TRUE, relimp = TRUE, slmtests = FALSE)
CSVwrite(h, "AWCv17olsresultsNo.eW", FALSE)
h <- doOLS(smi, depvar = dpV, indpv = UiV, rindpv = RiV, othexog = oxog, dw = TRUE,  lw = TRUE, ew = TRUE, stepW = TRUE, relimp = TRUE, slmtests = FALSE)
CSVwrite(h, "AWCv17olsresults.eW", FALSE)