Dow Eff functions

DEf01f - .Dow-Eff Functions - DEf http://capone.mtsu.edu/eaeff --> http://capone.mtsu.edu/eaeff/Dow-Eff%20functions.html --> http://capone.mtsu.edu/eaeff/DEf01XC.html Using the Dow-Eff functions in R ---------- Using the Dow-Eff functions in R: working with the SCCS -------the functions will estimate OLS, logit, and multinomial logit models, using multiple imputation to handle the problem of missing data, and network lag terms to handle Galton’s Problem.

mkscale

```mkscale #--make a composite index from variables in compvarbs, from 1st principal component --# --remove redundant variables--
```
```function(compvarbs,udnavn=NULL,impdata,type="LP",add.descrip=NULL,set.direction=NULL,set.range=NULL){
#--make a composite index from variables in compvarbs, from 1st principal component--
# compvarbs=character string of variable names
# impdata=imputed dataset created using doMI
# udnavn=name of composite index created in impdata
# type= 1 of c("LP","pc1","mean")
if (is.null(udnavn)){udnavn<-paste(compvarbs,type,sep=".")}
print(udnavn)
compvarbs<-intersect(get(compvarbs),names(impdata))
#--remove redundant variables--
zxa<-compvarbs
gxg<-as.matrix(impdata[,zxa]);dim(gxg)
oyo<-qr(gxg);oyo\$rank
bgti<-zxa
zxa<-zxa[oyo\$pivot[seq(oyo\$rank)]]
if (length(setdiff(bgti,zxa))>0){
print(paste("To avoid singularity problems, variable ",paste(setdiff(bgti,zxa),collapse=",")," dropped",sep=""))
compvarbs<-zxa
}
# -------------------------

a<-rownames(suppressWarnings(alpha(impdata[,compvarbs])\$alpha.drop))
# following not necessary for princomp, but for others yes
# ng=vector of length compvarbs with values 1 or -1 (-1 inverts variable in compvarbs)
ng<-rep(1,length(compvarbs))
izi<-grep("-",a)
if (length(izi)>0){ng[izi]<-(-1)}
names(ng)<-gsub("-","",a)
s<-1
if (!is.null(set.direction)){s<-as.numeric(ng[set.direction])}
ng<-ng*s
ix<-impdata[,compvarbs]*matrix(ng[compvarbs],NROW(impdata),length(ng),byrow=TRUE)
ix<-apply(ix,2,scale)*1+10
if (min(ix)<0 & type=="LP"){
print("variable has negative values, not suitable for this method, try rescaling or set type='pc1'")
print(apply(ix,2,min))
stop("negative values")
}
v<-kln(data.frame(ix,.id=impdata\$.id,.imp=impdata\$.imp))
impdata[,udnavn]<-0

if (type=="pc1"){
wts<-pvex<-NULL
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
o<-princomp(v[z,compvarbs])
pvex<-rbind(pvex,(o\$sdev^2)/sum(o\$sdev^2))
impdata[z,udnavn]<-o\$scores[,1]*mscr
}
pvex<-apply(pvex,2,mean)
print("Pct Variance Explained by component")
print(pvex[which(pvex>.01)])
}

if (type=="mean"){
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
impdata[z,udnavn]<-apply(v[z,compvarbs],1,mean)
}
}

if (type=="LP"){
kyf<-kln(data.frame(.id=impdata[which(impdata\$.imp==1),".id"],ord=1:length(which(impdata\$.imp==1))))
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
Amat<-as.matrix(v[z,compvarbs])
row.names(Amat)<-v\$.id[z]
pnv<-row.names(Amat)
fc<-matrix(0,NCOL(Amat),NCOL(Amat))
diag(fc)<-1
rownames(fc)<-paste("ct",(1:NCOL(Amat)),sep="")
colnames(fc)<-colnames(Amat)
Amat<-rbind(fc,Amat)
sx<-NULL
iter<-0
o<-pnv
while (length(o)>1){
dea<-NULL
iter<-iter+1
bvec<-rep(1,NROW(Amat))
bvec[1:NROW(fc)]<-(1/10^6)
names(bvec)<-rownames(Amat)
f.dir<-rep(c(">=","<="),c(NROW(fc),length(o)))
for (i in 1:length(o)){
cvec<-(Amat[o[i],])
olp<-lp("max", cvec, Amat, f.dir, bvec)
dea<-rbind(dea,olp\$objval)
}
z<-which(round(dea[,1],7)==1)
sx<-rbind(sx,cbind(data.frame(o[z]),iter))
z<-which(round(dea[,1],7)!=1)
o<-o[z]
if (length(o)>=1){Amat<-Amat[c(rownames(fc),o),]}
}
sx<-kln(sx)
names(sx)[1]<-".id"

duf<-merge(kyf,sx,by=".id",all=TRUE)
z<-which(is.na(duf\$iter))
if (length(z)>0){duf\$iter[z]<-iter+1}
duf[,NCOL(duf)]<-(max(duf[,NCOL(duf)])+1)-duf[,NCOL(duf)]

z<-which(impdata\$.imp==imn)
k<-match(impdata\$.id[z],duf\$.id)
impdata[z,udnavn]<-duf\$iter[k]
}
}
# ------------------stats-----------------

dld<-NULL
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
if (!is.null(set.range)){  impdata[z,udnavn]<-resc(impdata[z,udnavn],set.range[1],set.range[2]) }
dld<-rbind(dld,kln(data.frame(varbs=compvarbs,cor=cor(impdata[z,compvarbs],impdata[z,udnavn]))))
}

dld<-kln(aggregate(dld[,c("cor")],list(dld\$varbs),mean))
k<-match(dld\$Group.1,key\$variable)
dld<-kln(data.frame(cor.w.scale=round(dld[,2],3),inv=ng[dld\$Group.1],varb=dld\$Group.1,
description=key[k,c("description")]))
rownames(dld)<-dld\$varb
if (type=="pc1"){
a<-t(apply(wts,2,range))
dld<-kln(data.frame(a,dld))
}
dld<-dld[order(dld\$cor.w.scale),]

stats1<-data.frame(suppressWarnings(alpha(ix)\$total[c(2)]))

levs<-quickdesc(dld\$varb)

impdata[,paste(udnavn,"Sq",sep="")]<-impdata[,udnavn]^2
h<-list(impdata[,c(udnavn,paste(udnavn,"Sq",sep=""))],stats1,dld,levs)
names(h)<-c("scales","stats","corrs","varb.desc")
cat(paste('c("',paste(dld\$varb,collapse='", "'),'")',sep=""), "\n")
return(h)
```

}

Objects

Objects contained in R workspace DEf1d.Rdata Manual_DEf.htm

```Datasets
EA --  Ethnographic Atlas dataset
EAkey --  Ethnographic Atlas metadata file
EAfact --  Ethnographic Atlas dataset with factor labels
EAcov --  Ethnographic Atlas variable covariates for imputation
LRB --  Binford forager dataset
LRBkey --  Binford forager metadata file
LRBfact --  Binford forager dataset with factor labels
LRBcov --  Binford forager variable covariates for imputation
SCCS --  Standard Cross-Cultural Sample dataset
SCCSkey --  Standard Cross-Cultural Sample metadata file
SCCSfact --  Standard Cross-Cultural Sample dataset with factor labels
SCCScov --  Standard Cross-Cultural Sample variable covariates for imputation
WNAI --  Western North American Indians dataset
WNAIkey --  Western North American Indians metadata file
WNAIfact --  Western North American Indians dataset with factor labels
WNAIcov --  Western North American Indians variable covariates for imputation
XC --  Merged 371 society dataset
XCkey - Merged 371 society metadata file
XCfact - Merged 371 society dataset with factor labels
XCcov - Merged 371 society variable covariates for imputation
llm - Matrix of linguistic proximities between all pairs of societies
Undocumented functions
chK - auxiliary function that finds some characteristics of variables in dataframe
chkpmc - auxiliary function that checks variables for high collinearity
gSimpStat - auxiliary function that obtains descriptive statistics for numeric variables in dataframe
kln - auxiliary function that converts all variables in a dataframe to either numeric or character
mmgg - auxiliary function that cleans up output from aggregate() function
quickdesc - auxiliary function that outputs summary of codebook description for variable
resc - auxiliary function that rescales a variable
rmcs - auxiliary function that removes characters common to a set of strings
rnkd - auxiliary function that assigns ranks to values (1=lowest)
showlevs - auxiliary function that describes largest and smallest values of a variable
spmang - auxiliary function that removes leading and trailing spaces from string
widen - auxiliary function that widens the range of a variable
Documented functions
setDS - sets up environment to work with one of the four datasets (EA, LRB, SCCS, WNAI)
mkdummy - makes dummy variable and creates entry for it in metadata
mknwlag - makes network lag variable
fv4scale - helper function to find variables for use in a scale
doMI - creates multiple imputed datasets
mkscale - makes a scale (composite index) from several similar variables
doOLS - estimates regression model using OLS with imputed datasets, including network lag term
doLogit - estimates regression model using logit with imputed datasets, including network lag term
doMNLogit - estimates model using multinomial logit with imputed datasets, including network lag term
CSVwrite - writes objects to csv format file
mkmappng - plots an ordinal variable on world map and writes a png format file
mkcatmappng - plots a categorical variable on world map and writes a png format file
plotSq - plots effects of all independent variables with squared terms and writes a png format file
MEplots - plots marginal effects of independent variables used in doMNLogit
```

Functions in addition to EA.. LRB.. SCCS.. WNAI.. XC.. and doLOGIT doOLS doMNLogit

There are listed after

```load(url("http://dl.dropbox.com/u/9256203/DEf01d.Rdata"),.GlobalEnv)
setDS("SCCS") #by
ls()
```

plotSq - plots effects of all independent variables with squared terms and writes a png format file

function(h,filetitle=NULL){

``` a<-h\$Rmodel
oo<-h\$data
tii<-gsub("Sq","",rownames(a)[grep("Sq",rownames(a))])
ltii<-length(tii)
if (ltii==0){stop("No squared terms in restricted model")}
if (!is.null(filetitle)) {png(file=paste(filetitle,".png",sep=""),width=8,height=5,units="in",pointsize=10,res=500)}
layout(matrix(1:ltii,1,ltii))
ii<-tii[1]
for (ii in tii){
bt<-t(a[grep(ii,rownames(a)),c("coef","bootpval")])
x<-oo[,ii];x<-x[order(x)]
ra<-table(x)
bt2<-bt["coef",paste(ii,"Sq",sep="")]
bt<-bt["coef",ii]
if (is.na(bt2)){bt2<-0}
if (is.na(bt)){bt<-0}
yf<-x*bt+(x^2)*bt2
ymf<-bt+2*(x)*bt2
#     plot(x,yf,type="b",pch=21,cex=.5,bg="black",col="black",ylim=widen(range(yf)),xlim=widen(range(x)),
#          main=ii,xlab=ii,ylab=paste("Effect of",ii,"on dependent variable.",sep=""))
#     rug(jitter(x),col="green",lwd=.6,side=3)
#     points(x,yf,cex=ra[rnkd(x)]^.5,pch=1,col="red")
#     abline(h=0,lty=2,lwd=.5,col="blue")
plot(x,ymf,type="b",pch=21,cex=.5,bg="black",col="black",ylim=widen(range(ymf)),xlim=widen(range(x)),
main=ii,xlab=ii,ylab=paste("Marg. Eff. of ",ii," on dep.varb.",sep=""))
rug(jitter(x),col="green",lwd=.6,side=3)
points(x,ymf,cex=ra[rnkd(x)]^.5,pch=1,col="red")
abline(h=0,lty=2,lwd=.5,col="blue")
}
layout(1)
if (!is.null(filetitle)) {dev.off()}
```

}

mmgg #--converts all variable rownames in a dataframe to either numeric or character

function(x){

``` x<-kln(x)
rownames(x)<-x[,1]
x<-x[,-1]
x
```

}

mkdummy #--make dummy variables and add a description

function(varb,val,showname=TRUE){

``` #--make dummy variables and add a description--
valz<-spmang(gsub("[^ 0-9a-zA-Z]"," ",val))
valz<-capwrd(valz)
valz<-spmang(gsub("[^0-9a-zA-Z]","",valz))
nvarb<-paste(varb,".d",valz,sep="")
dx[,nvarb]<<-(dx[,varb]==val)*1
a<-key[varb,]
a\$type<-"ordinal"
a\$class<-"numeric"
a\$usevb<-"data"
a\$NOTmissing<-a\$FNOTmissing<-sum(!is.na(dx[,varb]))
a\$nUniqVals<-a\$FnUniqVals<-2
a\$variable<-rownames(a)<-nvarb
jf<-paste(a\$description,"==",val)
if (!is.na(match(varb,names(dxf))) & class(dxf[,varb])=="factor"){
j<-kln(data.frame(table(dx[,varb],as.character(dxf[,varb]))))
j<-j[which(j\$Freq>0),]
jf<-paste(a\$description,"==",j\$Var2[which(j\$Var1==val)])
dxf[,nvarb]<<-factor(dx[,nvarb],levels=c(0,1),labels=c("otherwise",j\$Var2[which(j\$Var1==val)]))
}
a\$description<-jf
if (!is.na(key[nvarb,"variable"])){key[nvarb,]<<-a[,names(key)]}
if (is.na(key[nvarb,"variable"])){key<<-rbind(key,a)}
if (showname){print(paste("This dummy variable is named ",nvarb,sep=""))}
if (showname){print(paste("The variable description is: '",jf,"'",sep=""))}
```

}

function(nvbs,nvbsdes){

``` qii<-nvbs[which(!is.na(match(nvbs,names(dx))))]
if (length(qii)>0){dxf[,qii]<<-dx[,qii]}
# --add a description for newly created variables--
a<-key[1:length(nvbs),]
is.na(a)<-TRUE
a\$type<-"ordinal"
a\$class<-"numeric"
a\$usevb<-"data"
```

if (!is.na(match(nvbs,names(dx)))){

``` if (length(nvbs)>1){yz<-apply(!is.na(dx[,nvbs]),2,sum)}
if (length(nvbs)==1){yz<-sum(!is.na(dx[,nvbs]))}
a\$NOTmissing<-a\$FNOTmissing<-yz
a\$nUniqVals<-a\$FnUniqVals<-sapply(nvbs,function(x) length(table(dx[,x])))
```

}

``` a\$variable<-rownames(a)<-nvbs
a\$description<-nvbsdes
rownames(a)<-nvbs
k<-which(!is.na(match(nvbs,rownames(key))))
if (length(k)>0){
print(paste("WARNING: you have overwritten existing values of ",nvbs[k],sep=""))
key[nvbs[k],]<<-a[nvbs[k],]
a<-a[-k,]
}
k<-which(is.na(match(rownames(a),rownames(key))))
if (length(k)>0){key<<-rbind(key,a)}
```

}

capwrd

function(rr){

``` o<-gsub("^\\s+|\\s+\$", "",rr)
o<-gsub("(","-x-",o,fixed=TRUE)
o<-sapply(strsplit(o,"-"),
function(x) (paste(paste(toupper(substr(x,1,1)),(substr(x,2,nchar(x))),sep=""),collapse="-")))
o<-gsub("-X-","(",o,fixed=TRUE)
o<-sapply(strsplit(o," "),
function(x) (paste(paste(toupper(substr(x,1,1)),(substr(x,2,nchar(x))),sep=""),collapse=" ")))
return(o)
```

}

rmcs #--remove common substrings from set of strings

function(rg){

1. --remove common substrings from set of strings--
``` bg<-rg
a<-(sapply(rg,function(x) strsplit(x,"")))
z<-sapply(2:length(a),function(x) match(a1,ax))
z1<-a1
z1[which(rowSums(is.na(z))>0)]<-" "
z1<-unlist(strsplit(spmang(paste(z1,collapse=""))," "))
for (x in z1){bg<-gsub(paste(x,collapse=""),"",bg)}
return(bg)
```

}

rnkd # ranks with ties--no leaps in ranked values

function(a1){

1. ranks with ties--no leaps in ranked values
``` p<-unique(a1)
p<-p[order(p)]
r<-1:length(p)
k<-match(a1,p)
return(r[k])
```

}

spmang - auxiliary function that removes leading and trailing spaces from string

function(x){

``` x<-gsub("^\\s+|\\s+\$", "",x)
for (i in 1:6){x<-gsub("  "," ",x)}
return(x)
```

}

gSimpStat # --generate simple statistics

function(ww){

``` # --generate simple statistics--
z<-which(sapply(ww,function(x) class(x))!="character")
a<-round(data.frame(nobs=apply(!is.na(ww[,z]),2,sum,na.rm=TRUE),
mean=apply(ww[,z],2,mean,na.rm=TRUE),
min=apply(ww[,z],2,min,na.rm=TRUE),
max=apply(ww[,z],2,max,na.rm=TRUE),
sd=apply(ww[,z],2,sd,na.rm=TRUE)),3)
#   a<-data.frame(psych::describe(ww[,z]))
#   a<-round(a[,c(2:4,8:9)],3)
return(a)
```

}

MEplots

function(h,mod="R",varbs=NULL,filetitle=NULL,setylim=NULL,subgrps=NULL,dpires=500){

``` tt<-hpaste(mod,"marEff",sep="")
vxx<-varbs
if (is.null(varbs)){vxx<-setdiff(colnames(tt),c("ID","alt",subgrps))}
jr<-length(vxx)
a<-range(tt[,vxx])
ddx<-hpaste(mod,"meanME",sep="")"MEmean"
k<-match(tt\$alt,colnames(ddx))
if (!is.null(filetitle)) {png(file=paste(filetitle,".png",sep=""),width=8,height=5,units="in",pointsize=10,res=dpires)}
if (jr>10){layout(matrix(1:(3*ceil(jr/3)),3,ceil(jr/3)))}
if (jr<=10){layout(matrix(1:(2*ceil(jr/2)),2,ceil(jr/2)))}
iy<-vxx[1]
for (iy in vxx){
if (is.na(match(iy,setylim))){ay<-range(tt[,iy]);mtl<-NULL}
if (!is.na(match(iy,setylim))){ay<-range(tt[,setylim]);mtl<-": stndYaxis"}
if (is.null(subgrps)){
plot(factor(tt\$alt),tt[,iy],main=paste(iy,mtl,sep=""),lwd=.5,col=colors()[484],ylim=ay)
abline(h=0,col="red")
tt\$zi<-ddx[iy,k]
points(factor(tt\$alt),tt\$zi,col="blue",bg="blue",pch=18)
}
if (!is.null(subgrps)){
zg<-which(tt[,subgrps]==0)
plot(factor(tt\$alt[zg]),tt[zg,iy],main=paste(iy,mtl,sep=""),boxwex=.2,
cex.axis=.7,lwd=.5,ylim=ay,
at=(1:length(table(tt\$alt[zg])))-.22,col="yellow")
zg<-which(tt[,subgrps]==1)
plot(factor(tt\$alt[zg]),tt[zg,iy],boxwex=.2,cex=.7,
abline(h=0,col="blue")
}
}
layout(1)
if (!is.null(filetitle)) {dev.off()}
```

}

chK # some descriptors of the fields in a data.frame

function(hh){

``` # some descriptors of the fields in a data.frame
aa<-data.frame(NOTmissing=apply(!is.na(hh),2,sum),
class=sapply(hh,function(x) class(x)),
nUniqVals=sapply(hh,function(x) length(unique(x[which(!is.na(x))]))))
return(aa)
```

}

mknwlag #--make network lag variable

function(MIdata,wtMat,varb){

1. --make network lag variable--
``` nimp<-max(MIdata\$.imp)
newvarb<-paste("W",varb,sep="")
MIdata[,newvarb]<-0;is.na(MIdata[,newvarb])<-TRUE
for (iy in 1:nimp){
MIdata[which(MIdata\$.imp==iy),newvarb]<-wtMat%*%as.matrix(MIdata[which(MIdata\$.imp==iy),varb])
}
return(MIdata[,newvarb])
```

}

mkscale #--make a composite index from variables in compvarbs, from 1st principal component --# --remove redundant variables--

``` #--make a composite index from variables in compvarbs, from 1st principal component--
# compvarbs=character string of variable names
# impdata=imputed dataset created using doMI
# udnavn=name of composite index created in impdata
# type= 1 of c("LP","pc1","mean")
if (is.null(udnavn)){udnavn<-paste(compvarbs,type,sep=".")}
print(udnavn)
compvarbs<-intersect(get(compvarbs),names(impdata))
#--remove redundant variables--
zxa<-compvarbs
gxg<-as.matrix(impdata[,zxa]);dim(gxg)
oyo<-qr(gxg);oyo\$rank
bgti<-zxa
zxa<-zxa[oyo\$pivot[seq(oyo\$rank)]]
if (length(setdiff(bgti,zxa))>0){
print(paste("To avoid singularity problems, variable ",paste(setdiff(bgti,zxa),collapse=",")," dropped",sep=""))
compvarbs<-zxa
}
# -------------------------

a<-rownames(suppressWarnings(alpha(impdata[,compvarbs])\$alpha.drop))
# following not necessary for princomp, but for others yes
# ng=vector of length compvarbs with values 1 or -1 (-1 inverts variable in compvarbs)
ng<-rep(1,length(compvarbs))
izi<-grep("-",a)
if (length(izi)>0){ng[izi]<-(-1)}
names(ng)<-gsub("-","",a)
s<-1
if (!is.null(set.direction)){s<-as.numeric(ng[set.direction])}
ng<-ng*s
ix<-impdata[,compvarbs]*matrix(ng[compvarbs],NROW(impdata),length(ng),byrow=TRUE)
ix<-apply(ix,2,scale)*1+10
if (min(ix)<0 & type=="LP"){
print("variable has negative values, not suitable for this method, try rescaling or set type='pc1'")
print(apply(ix,2,min))
stop("negative values")
}
v<-kln(data.frame(ix,.id=impdata\$.id,.imp=impdata\$.imp))
impdata[,udnavn]<-0

if (type=="pc1"){
wts<-pvex<-NULL
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
o<-princomp(v[z,compvarbs])
pvex<-rbind(pvex,(o\$sdev^2)/sum(o\$sdev^2))
impdata[z,udnavn]<-o\$scores[,1]*mscr
}
pvex<-apply(pvex,2,mean)
print("Pct Variance Explained by component")
print(pvex[which(pvex>.01)])
}

if (type=="mean"){
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
impdata[z,udnavn]<-apply(v[z,compvarbs],1,mean)
}
}

if (type=="LP"){
kyf<-kln(data.frame(.id=impdata[which(impdata\$.imp==1),".id"],ord=1:length(which(impdata\$.imp==1))))
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
Amat<-as.matrix(v[z,compvarbs])
row.names(Amat)<-v\$.id[z]
pnv<-row.names(Amat)
fc<-matrix(0,NCOL(Amat),NCOL(Amat))
diag(fc)<-1
rownames(fc)<-paste("ct",(1:NCOL(Amat)),sep="")
colnames(fc)<-colnames(Amat)
Amat<-rbind(fc,Amat)
sx<-NULL
iter<-0
o<-pnv
while (length(o)>1){
dea<-NULL
iter<-iter+1
bvec<-rep(1,NROW(Amat))
bvec[1:NROW(fc)]<-(1/10^6)
names(bvec)<-rownames(Amat)
f.dir<-rep(c(">=","<="),c(NROW(fc),length(o)))
for (i in 1:length(o)){
cvec<-(Amat[o[i],])
olp<-lp("max", cvec, Amat, f.dir, bvec)
dea<-rbind(dea,olp\$objval)
}
z<-which(round(dea[,1],7)==1)
sx<-rbind(sx,cbind(data.frame(o[z]),iter))
z<-which(round(dea[,1],7)!=1)
o<-o[z]
if (length(o)>=1){Amat<-Amat[c(rownames(fc),o),]}
}
sx<-kln(sx)
names(sx)[1]<-".id"

duf<-merge(kyf,sx,by=".id",all=TRUE)
z<-which(is.na(duf\$iter))
if (length(z)>0){duf\$iter[z]<-iter+1}
duf[,NCOL(duf)]<-(max(duf[,NCOL(duf)])+1)-duf[,NCOL(duf)]

z<-which(impdata\$.imp==imn)
k<-match(impdata\$.id[z],duf\$.id)
impdata[z,udnavn]<-duf\$iter[k]
}
}
# ------------------stats-----------------

dld<-NULL
for (imn in 1:max(v\$.imp)){
z<-which(v\$.imp==imn)
if (!is.null(set.range)){  impdata[z,udnavn]<-resc(impdata[z,udnavn],set.range[1],set.range[2]) }
dld<-rbind(dld,kln(data.frame(varbs=compvarbs,cor=cor(impdata[z,compvarbs],impdata[z,udnavn]))))
}

dld<-kln(aggregate(dld[,c("cor")],list(dld\$varbs),mean))
k<-match(dld\$Group.1,key\$variable)
dld<-kln(data.frame(cor.w.scale=round(dld[,2],3),inv=ng[dld\$Group.1],varb=dld\$Group.1,
description=key[k,c("description")]))
rownames(dld)<-dld\$varb
if (type=="pc1"){
a<-t(apply(wts,2,range))
dld<-kln(data.frame(a,dld))
}
dld<-dld[order(dld\$cor.w.scale),]

stats1<-data.frame(suppressWarnings(alpha(ix)\$total[c(2)]))

levs<-quickdesc(dld\$varb)

impdata[,paste(udnavn,"Sq",sep="")]<-impdata[,udnavn]^2
h<-list(impdata[,c(udnavn,paste(udnavn,"Sq",sep=""))],stats1,dld,levs)
names(h)<-c("scales","stats","corrs","varb.desc")
cat(paste('c("',paste(dld\$varb,collapse='", "'),'")',sep=""), "\n")
return(h)
```

}

resc # rescale series

function(vb,bg=1,en=8){

``` # rescale series
b<-vb-min(vb,na.rm=TRUE)
b<-bg+b/max(b,na.rm=TRUE)*(en-bg)
return(b)
```

}

fv4scale

function(lookword,dropword=NULL,keepword=NULL,coreword=NULL,

```                  nmin=NULL,minalpha=.7,chklevels=FALSE,verbose=TRUE,
doscale=TRUE,setseed=NULL){
if (is.null(nmin)){nmin<-round(NROW(dx)/4)}
if (!is.null(setseed) & length(match(setseed,names(dx)))==0){
setseed<-NULL
}
lookword<-tolower(lookword)
if (chklevels){
zaa<-names(dxf)[which(sapply(dxf,class)=="factor")];length(zaa)
zpp<-grep(".d",zaa,fixed=TRUE)
if (length(zpp)>0){zaa<-zaa[-zpp]}
if (length(zaa)>0){
for (ih in zaa){
zi<-unique(unlist(sapply(lookword,function(x) grep(x,tolower(levels(dxf[,ih]))))))
if (length(zi)>0){
if (verbose) {print(ih)}
g<-plyr::count(data.frame(fac=dxf[,ih],num=dx[,ih]),c("fac","num"))
for (ib in zi){
if (g\$freq[ib]>=10){
mkdummy(ih,g\$num[ib],showname=FALSE)
}
}
}
}
}
zll<-rownames(key)[unique(unlist(sapply(lookword,function(x) grep(x,tolower(key\$description)))))]
if (!is.null(keepword)){
keepword<-tolower(keepword)
zkk<-rownames(key)[unique(unlist(sapply(keepword,function(x) grep(x,tolower(key\$description)))))]
zll<-intersect(zll,zkk)
}
if (!is.null(dropword)){
dropword<-tolower(dropword)
zdd<-rownames(key)[unique(unlist(sapply(dropword,function(x) grep(x,tolower(key\$description)))))]
zll<-setdiff(zll,zdd)
}
zaa<-rownames(key)[which(key\$usevb=="data" & key\$NOTmissing>nmin & key\$type=="categorical")]
zaa<-intersect(zaa,zll)
zpp<-grep(".d",zaa,fixed=TRUE)
if (length(zpp)>0){zaa<-zaa[-zpp]}
if (length(zaa)>0){
phh<-kln(dxf[,zaa])
nhh<-NULL
for (ih in zaa){
prr<-plyr::count(data.frame(fac=phh[,ih],num=dx[,ih]),c("fac","num"))
prr<-prr[which(!is.na(prr\$num) & !is.na(prr\$fac) & (prr\$freq>=10)),]
if (NROW(prr)>0){
for (ib in NROW(prr)){
mkdummy(ih,prr\$num[ib],showname=FALSE)
}
}
}
}
}
zi<-unique(unlist(sapply(lookword,function(x) grep(x,tolower(key\$description)))));zi
zi<-zi[which(key\$usevb[zi]=="data" & key\$NOTmissing[zi]>nmin & key\$type[zi]=="ordinal")];zi
zi<-zi[which(!is.na(match(rownames(key)[zi],names(dx))))]
if (!is.null(dropword)){
dropword<-tolower(dropword)
dxi<-unique(unlist(sapply(dropword,function(x) grep(x,tolower(key\$description)))));dxi
zi<-setdiff(zi,dxi)
}
if (!is.null(keepword)){
keepword<-tolower(keepword)
dxi<-unique(unlist(sapply(keepword,function(x) grep(x,tolower(key\$description)))));dxi
zi<-intersect(zi,dxi)
}
vxx<-rownames(key)[zi]
```

if (!is.null(setseed)){

``` vxx<-unique(c(vxx,setseed))
o<-abs(cor(dx[,setseed],dx[,vxx],use="pair"))
o<-o[1,which(!is.na(o))]
vxx<-names(o)[which(o>=.3)]
```

}

``` if (verbose & !doscale){print(quickdesc(vxx))}

if (doscale){
a<-apply(!is.na(dx[,vxx]),2,sum)
a<-a[order(-a)]
vxx<-names(a)
a<-round(cor(dx[,vxx],use="pair"),3)
diag(a)<-0
a[which(is.na(a))]<-1
z<-which(a==1,arr.ind=TRUE)
z<-z[which(z[,"col"]<z[,"row"]),"row"]
if (length(z)>0){vxx<-vxx[-z]}
vxx

if (!is.null(coreword)){
coreword<-tolower(coreword)
zi<-unique(unlist(sapply(coreword,function(x) grep(x,tolower(key\$description)))));zi
zi<-zi[which(key\$usevb[zi]=="data" & key\$NOTmissing[zi]>nmin & key\$type[zi]=="ordinal")];zi
zi<-zi[which(!is.na(match(rownames(key)[zi],names(dx))))]
cxx<-rownames(key)[zi];cxx
cxx<-intersect(cxx,vxx);cxx
ob<-which(rowSums(is.na(dx[,cxx]))==0);length(ob)
kxx<-vxx[which(apply(is.na(dx[ob,vxx]),2,sum)<length(ob)/2)];kxx
ti<-apply(abs(cor(dx[,cxx],dx[,kxx],use="pair")),2,mean)
ti<-ti[order(-ti)]
txx<-names(ti);txx
cxx<-cxx[order(match(cxx,txx))];cxx
oxx<-setdiff(txx,cxx)[1:length(cxx)];oxx
vxx<-union(cxx,oxx[which(!is.na(oxx))]);vxx
}
zig<-which(rowSums(is.na(dx[,vxx]))==0);length(zig)
if (length(zig)<2){print("Too many missing values to calculate alpha")}
if (length(zig)>1){
o<-chK(dx[zig,vxx])
vxx<-rownames(o)[which(o\$nUniqVals>1 & o\$class=="numeric")]
if  (verbose) {print(key[vxx,c("NOTmissing","nUniqVals","description")])}
puta<-suppressWarnings(alpha(dx[,vxx])\$total[2])
while (puta<minalpha){
o<-suppressWarnings(alpha(dx[,vxx])\$alpha.drop)
z<-which(o\$std.alpha==max(o\$std.alpha))
vxx<-vxx[-z]
if (length(vxx)==0){stop("all variables eliminated, try lower minalpha")}
if (sum(apply(is.na(dx[,vxx]),1,sum)>0)==NROW(dx)){stop("too many missing values, try higher nmin")}
puta<-suppressWarnings(alpha(dx[,vxx])\$total["std.alpha"])
}
if (verbose)  {print(puta)}
qwx<-suppressWarnings(alpha(dx[,vxx])\$alpha.drop)
if (verbose)  {print(qwx)}
```
1. if (verbose) {print(key[vxx,c("NOTmissing","nUniqVals","description")])}
``` phh<-kln(dxf[,vxx])
for (ii in vxx){
i2<-rownames(qwx)[match(ii,gsub("-","",rownames(qwx),fixed=TRUE))]
owwr<-"[--AS IS--]"
if (nchar(gsub("-","",i2,fixed=TRUE))<nchar(i2)){owwr<-"[!INVERTED!]"}
if (verbose)  {print(paste("============",i2,owwr,key[ii,"description"],"=============="))}
prr<-plyr::count(data.frame(fac=phh[,ii],num=dx[,ii]),c("fac","num"))
prr<-prr[which(!is.na(prr\$num) & !is.na(prr\$fac) & !is.na(prr\$freq)),]
prr<-prr[order(prr\$num),]
prr<-kln(prr[c(1,NROW(prr)),])
if (is.numeric(prr\$fac)){
pjj<-paste("range: ",paste(prr\$fac,collapse=" to "),sep="")
}
if (is.character(prr\$fac)){
pjj<-paste(paste("[",prr\$num,"]: ",substr(prr\$fac,1,75),sep=""),collapse=" |+| ")
}
if (verbose)  {print(pjj)}
}
}
}
```
``` if (verbose)  {print("--variable names below---")}
cat(paste('c("',paste(vxx,collapse='", "'),'")',sep=""), "\n")
return(vxx)
```

}

widen #widen or narrow the range between two values

function(var,rat=1.1){

``` #widen or narrow the range between two values
a<-diff(range(var))
minv<-min(var)-((rat-1)/2)*a
maxv<-max(var)+((rat-1)/2)*a
return(c(minv,maxv))
```

}

kln # clean up dataframe, convert factors to character, etc.

function(bgg){

``` # clean up dataframe, convert factors to character, etc.
if (is.data.frame(bgg)){
if (ncol(bgg)>1){
z<-which(sapply(bgg,function(x) class(x))=="factor")
if (length(z)>0){bgg[,z]<-sapply(bgg[,z],function(x) as.character(x))}
z<-which(sapply(bgg,function(x) class(x))=="character")
if (length(z)>0){
bgg[,z]<-sapply(bgg[,z],function(x) iconv(x,to="ASCII//TRANSLIT"))
bgg[,z]<-sapply(bgg[,z],function(x) gsub("^\\s+|\\s+\$", "", x))
}
suppressWarnings(z<-sapply(bgg,function(x) length(which(is.na(as.numeric(gsub("[ ,]","",x)))))==length(which(is.na(x)))))
suppressWarnings(bgg[,z]<-sapply(bgg[,z], function(x) as.numeric(gsub("[ ,]","",x))))
}
}
return(bgg)
```

}

mkmappng

```mkmappng
```

function(usedata,varb,filetitle=NULL,show="ydata",numnb.lg=3,numnb.lm=20,numch=0,

```                  pvlm=.05,dfbeta.show=FALSE,zoom=FALSE,landcolor="black",rimcol=NULL,
map.width=8,map.height=5,map.units="in",map.pointsize=10,map.res=500){
if (length(which(!is.na(match(c("long","lati"),names(usedata)))))!=2){
stop("input data needs to have two variables, one named 'long', the other 'lati'")
}
if (is.null(filetitle)){filetitle<-varb}
zdv<-which(!is.na(usedata[,varb]))
xy<-SpatialPoints(usedata[zdv,c("long","lati")])
lgt<-as.matrix(localG(usedata[zdv,varb],nb2listw(include.self(knn2nb(knearneigh(xy,k=numnb.lg,longlat=TRUE,RANN=FALSE))),style="B")))
lmt<-as.matrix(localmoran(usedata[zdv,varb],nb2listw(knn2nb(knearneigh(xy,k=numnb.lm,longlat=TRUE,RANN=FALSE)),style="B")))
ydata<-usedata[zdv,varb]
dfby<-ydata
is.na(dfby)<-TRUE
if (table(gsub("dfb.","",names(usedata),fixed=TRUE))[varb]==2){
dfby=usedata[zdv,paste("dfb.",varb,sep="")]
is.na(dfby[dfby==0])<-TRUE
}

splgt<-SpatialPointsDataFrame(coordinates(xy),
kln(data.frame(lgt=lgt,lmtz=lmt[,"Z.Ii"],
lmtp=(lmt[,"Pr(z > 0)"]<pvlm)*1,
ydata=ydata,dfbeta=dfby)))

png(file=paste(filetitle,"_",show,".png",sep=""),width=map.width,height=map.height,
units=map.units,pointsize=map.pointsize,res=map.res)
par(mar=c(0, 0, 0, 0) + 0.1)
if (zoom==FALSE){suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,ylim=c(-60,79),
xlim=c(-180,180),fill=TRUE,col=landcolor,lty=0,resolution=0))}
if (zoom==TRUE){suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,
ylim=widen(range(usedata[zdv,"lati"])),
xlim=widen(range(usedata[zdv,"long"])),
fill=TRUE,col=landcolor,lty=0,resolution=0))}
splgt<-splgt[order(-splgt@data[,show]),]
p1<-coordinates(splgt)
kk<-mapproject(p1[,"long"],p1[,"lati"],orientation=c(90, 155,0), projection="mercator")
kk<-SpatialPoints(as.matrix(cbind(kk\$x,kk\$y)))
blr<-resc(splgt@data[,show],.35,2.5)
brks<-quantile(blr,seq(0,1,by=.2),na.rm=TRUE);brks
farv<-colorRampPalette(c("yellow","brown"))(length(brks)-1);farv
nuk<-findInterval(blr, brks, all.inside=TRUE)
#   tff<-rep(2,length(nuk))
#   tff[which(!is.na(splgt@data\$dfbeta))]<-tff[which(!is.na(splgt@data\$dfbeta))]+sign(splgt@data\$dfbeta)[which(!is.na(splgt@data\$dfbeta))]
if (is.null(rimcol)){rimcol<-farv[nuk]}
if (dfbeta.show){
dfbx<-abs(splgt@data[,"dfbeta"]*10)
dfbsign<-(splgt@data[,"dfbeta"]>0)+1
#triangle points down shows societies whose inclusion signif. lowers coefficient
#triangle points up shows societies whose inclusion signif increases coefficient
}
z<-which(splgt@data\$lmtp==1 & splgt@data\$lmtz>0)
if (numch>0){
numch<-min(round(length(z)/3),numch)
ig<-as.matrix(dist(coordinates(kk[z,])))
v<-cutree(hclust(as.dist(ig)),k=numch);table(v)
for (ii in unique(v)){
z1<-z[which(v==ii)]
if (length(z1)>2){
g4<-dismo::convHull(kk[z1,])
}
}
}
par(mar=c(5, 4, 4, 2) + 0.1)
dev.off()
```

}

mkcatmappng

```mkcatmapping
```

function(usedata,varb,filetitle=NULL,zoom=FALSE,landcolor=colors()[387],

```                     map.width=8,map.height=5,map.units="in",map.pointsize=10,map.res=500){
if (length(which(!is.na(match(c("long","lati"),names(usedata)))))!=2){
stop("input data needs to have two variables, one named 'long', the other 'lati'")
}
if (is.null(filetitle)){filetitle<-varb}
usedata<-usedata[which(!is.na(usedata[,varb])),]
xy<-SpatialPoints(usedata[,c("long","lati")])
splgt<-SpatialPointsDataFrame(coordinates(xy),kln(data.frame(usedata)))
farv<-c(brewer.pal(12,"Set3"),brewer.pal(12,"Paired"),brewer.pal(8,"Accent"))
blr<-as.numeric(factor(splgt@data[,varb]))
gii<-plyr::count(data.frame(num=blr,fac=splgt@data[,varb]),c("num","fac"))
png(file=paste(filetitle,".png",sep=""),width=map.width,height=map.height,
units=map.units,pointsize=map.pointsize,res=map.res)
par(mar=c(0, 0, 0, 0) + 0.1)
if (zoom==FALSE){
suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,ylim=c(-60,79),
xlim=c(-180,180),fill=TRUE,col=landcolor,lty=0,resolution=0))
p1<-coordinates(splgt)
kk<-mapproject(p1[,"long"],p1[,"lati"],orientation=c(90, 155,0), projection="mercator")
kk<-SpatialPoints(as.matrix(cbind(kk\$x,kk\$y)))
legend("topright",legend=gii\$fac,pch=gii\$num,col=farv[gii\$num],cex=.45)
}
if (zoom==TRUE){
map("world",ylim=widen(range(usedata[,"lati"]),1.3),
xlim=widen(range(usedata[,"long"]),1.3),fill=TRUE,col=landcolor,lty=0,resolution=0) #13,387
legend("topright",legend=gii\$fac,pch=gii\$num,col=farv[gii\$num],cex=.45)
}
par(mar=c(5, 4, 4, 2) + 0.1)
dev.off()
```

}

CSVwrite #--write object to csv file--

function(object,filestem,appnd2=FALSE){

``` #--write object to csv file--
if (class(object)=="list"){
pff<-length(object)
suppressWarnings(write.table(paste(names(object)[1],"\n"),file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names =FALSE,
row.names = FALSE,append=appnd2))
suppressWarnings(write.table(object1,file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
row.names = FALSE,append=TRUE))
if (pff>1){
sapply(c(2:pff),function(x) {
for (qz in 1:2){suppressWarnings(write.table(" ",file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
row.names = FALSE,append=TRUE))}
suppressWarnings(write.table(paste(names(object)[x],"\n"),file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
row.names = FALSE,append=TRUE))
suppressWarnings(write.table(objectx,file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names = NA,
row.names = TRUE,append=TRUE))})
}
}
if (class(object)=="table" | class(object)=="matrix" |
class(object)=="numeric" | class(object)=="integer" |
class(object)=="data.frame"  | class(object)=="character"){
suppressWarnings(write.table(object,file=paste(filestem,".csv",sep=""),
quote=TRUE,sep=",",dec=".",qmethod="double",col.names = NA,
row.names = TRUE,append=appnd2))
}
```

} ==