Dow Eff functions

From InterSciWiki
Jump to: navigation, search

DEf01f - .Dow-Eff Functions - DEf http://capone.mtsu.edu/eaeff --> http://capone.mtsu.edu/eaeff/Dow-Eff%20functions.html --> http://capone.mtsu.edu/eaeff/DEf01XC.html Using the Dow-Eff functions in R ---------- Using the Dow-Eff functions in R: working with the SCCS -------the functions will estimate OLS, logit, and multinomial logit models, using multiple imputation to handle the problem of missing data, and network lag terms to handle Galton’s Problem.

mkscale

mkscale #--make a composite index from variables in compvarbs, from 1st principal component --# --remove redundant variables--
function(compvarbs,udnavn=NULL,impdata,type="LP",add.descrip=NULL,set.direction=NULL,set.range=NULL){
#--make a composite index from variables in compvarbs, from 1st principal component--
# compvarbs=character string of variable names
# impdata=imputed dataset created using doMI
# udnavn=name of composite index created in impdata
# type= 1 of c("LP","pc1","mean")
if (is.null(udnavn)){udnavn<-paste(compvarbs,type,sep=".")}
print(udnavn)
compvarbs<-intersect(get(compvarbs),names(impdata))
#--remove redundant variables--  
zxa<-compvarbs
gxg<-as.matrix(impdata[,zxa]);dim(gxg)
oyo<-qr(gxg);oyo$rank
bgti<-zxa
zxa<-zxa[oyo$pivot[seq(oyo$rank)]]
if (length(setdiff(bgti,zxa))>0){
  print(paste("To avoid singularity problems, variable ",paste(setdiff(bgti,zxa),collapse=",")," dropped",sep=""))
  compvarbs<-zxa
}
# -------------------------

a<-rownames(suppressWarnings(alpha(impdata[,compvarbs])$alpha.drop))
# following not necessary for princomp, but for others yes
# ng=vector of length compvarbs with values 1 or -1 (-1 inverts variable in compvarbs)
ng<-rep(1,length(compvarbs))
izi<-grep("-",a)
if (length(izi)>0){ng[izi]<-(-1)}
names(ng)<-gsub("-","",a)
s<-1
if (!is.null(set.direction)){s<-as.numeric(ng[set.direction])}
ng<-ng*s
ix<-impdata[,compvarbs]*matrix(ng[compvarbs],NROW(impdata),length(ng),byrow=TRUE)
ix<-apply(ix,2,scale)*1+10
if (min(ix)<0 & type=="LP"){
  print("variable has negative values, not suitable for this method, try rescaling or set type='pc1'")
  print(apply(ix,2,min))
  stop("negative values")
}
v<-kln(data.frame(ix,.id=impdata$.id,.imp=impdata$.imp))
impdata[,udnavn]<-0

if (type=="pc1"){
  wts<-pvex<-NULL
  for (imn in 1:max(v$.imp)){
    z<-which(v$.imp==imn)
    o<-princomp(v[z,compvarbs])
    pvex<-rbind(pvex,(o$sdev^2)/sum(o$sdev^2))
    mscr<-sign(o$loadings[1,1])
    impdata[z,udnavn]<-o$scores[,1]*mscr
    wts<-rbind(wts,t(o$loadings[,1])*mscr)
  }
  pvex<-apply(pvex,2,mean)
  print("Pct Variance Explained by component")
  print(pvex[which(pvex>.01)])
}

if (type=="mean"){
  for (imn in 1:max(v$.imp)){
    z<-which(v$.imp==imn)
    impdata[z,udnavn]<-apply(v[z,compvarbs],1,mean)
  }
}

if (type=="LP"){
  kyf<-kln(data.frame(.id=impdata[which(impdata$.imp==1),".id"],ord=1:length(which(impdata$.imp==1))))
  for (imn in 1:max(v$.imp)){
    z<-which(v$.imp==imn)
    Amat<-as.matrix(v[z,compvarbs])
    row.names(Amat)<-v$.id[z]
    pnv<-row.names(Amat)
    fc<-matrix(0,NCOL(Amat),NCOL(Amat))
    diag(fc)<-1
    rownames(fc)<-paste("ct",(1:NCOL(Amat)),sep="")
    colnames(fc)<-colnames(Amat)
    Amat<-rbind(fc,Amat)
    sx<-NULL
    iter<-0
    o<-pnv
    while (length(o)>1){
      dea<-NULL
      iter<-iter+1
      bvec<-rep(1,NROW(Amat))
      bvec[1:NROW(fc)]<-(1/10^6)
      names(bvec)<-rownames(Amat)
      f.dir<-rep(c(">=","<="),c(NROW(fc),length(o)))
      for (i in 1:length(o)){
        cvec<-(Amat[o[i],])
        olp<-lp("max", cvec, Amat, f.dir, bvec)
        dea<-rbind(dea,olp$objval)
      }
      z<-which(round(dea[,1],7)==1)
      sx<-rbind(sx,cbind(data.frame(o[z]),iter))
      z<-which(round(dea[,1],7)!=1)
      o<-o[z]
      if (length(o)>=1){Amat<-Amat[c(rownames(fc),o),]}
    }
    sx<-kln(sx)
    names(sx)[1]<-".id"
    
    duf<-merge(kyf,sx,by=".id",all=TRUE)
    z<-which(is.na(duf$iter))
    if (length(z)>0){duf$iter[z]<-iter+1}
    duf[,NCOL(duf)]<-(max(duf[,NCOL(duf)])+1)-duf[,NCOL(duf)]
    head(duf)
    
    z<-which(impdata$.imp==imn)
    k<-match(impdata$.id[z],duf$.id)
    impdata[z,udnavn]<-duf$iter[k]
  }
} 
# ------------------stats-----------------

dld<-NULL
for (imn in 1:max(v$.imp)){
  z<-which(v$.imp==imn)
  if (!is.null(set.range)){  impdata[z,udnavn]<-resc(impdata[z,udnavn],set.range[1],set.range[2]) }
  dld<-rbind(dld,kln(data.frame(varbs=compvarbs,cor=cor(impdata[z,compvarbs],impdata[z,udnavn]))))
}

dld<-kln(aggregate(dld[,c("cor")],list(dld$varbs),mean))
k<-match(dld$Group.1,key$variable)
dld<-kln(data.frame(cor.w.scale=round(dld[,2],3),inv=ng[dld$Group.1],varb=dld$Group.1,
                    description=key[k,c("description")]))
rownames(dld)<-dld$varb
if (type=="pc1"){
  a<-t(apply(wts,2,range))
  colnames(a)<-c("min.load","max.load")
  dld<-kln(data.frame(a,dld))
}
dld<-dld[order(dld$cor.w.scale),]

stats1<-data.frame(suppressWarnings(alpha(ix)$total[c(2)]))

levs<-quickdesc(dld$varb)    

impdata[,paste(udnavn,"Sq",sep="")]<-impdata[,udnavn]^2
if (is.null(add.descrip)){add.descrip<-udnavn}
addesc(udnavn,add.descrip)
addesc(paste(udnavn,"Sq",sep=""),paste(add.descrip," Squared",sep=""))
h<-list(impdata[,c(udnavn,paste(udnavn,"Sq",sep=""))],stats1,dld,levs)
names(h)<-c("scales","stats","corrs","varb.desc")
cat(paste('c("',paste(dld$varb,collapse='", "'),'")',sep=""), "\n")
return(h)

}

Objects

Objects contained in R workspace DEf1d.Rdata Manual_DEf.htm

Datasets
EA --  Ethnographic Atlas dataset
EAkey --  Ethnographic Atlas metadata file
EAfact --  Ethnographic Atlas dataset with factor labels
EAcov --  Ethnographic Atlas variable covariates for imputation
LRB --  Binford forager dataset
LRBkey --  Binford forager metadata file
LRBfact --  Binford forager dataset with factor labels
LRBcov --  Binford forager variable covariates for imputation
SCCS --  Standard Cross-Cultural Sample dataset
SCCSkey --  Standard Cross-Cultural Sample metadata file
SCCSfact --  Standard Cross-Cultural Sample dataset with factor labels
SCCScov --  Standard Cross-Cultural Sample variable covariates for imputation
WNAI --  Western North American Indians dataset
WNAIkey --  Western North American Indians metadata file
WNAIfact --  Western North American Indians dataset with factor labels
WNAIcov --  Western North American Indians variable covariates for imputation
XC --  Merged 371 society dataset
XCkey - Merged 371 society metadata file
XCfact - Merged 371 society dataset with factor labels
XCcov - Merged 371 society variable covariates for imputation
llm - Matrix of linguistic proximities between all pairs of societies
Undocumented functions
chK - auxiliary function that finds some characteristics of variables in dataframe
chkpmc - auxiliary function that checks variables for high collinearity
gSimpStat - auxiliary function that obtains descriptive statistics for numeric variables in dataframe
kln - auxiliary function that converts all variables in a dataframe to either numeric or character
mmgg - auxiliary function that cleans up output from aggregate() function
quickdesc - auxiliary function that outputs summary of codebook description for variable
resc - auxiliary function that rescales a variable
rmcs - auxiliary function that removes characters common to a set of strings
rnkd - auxiliary function that assigns ranks to values (1=lowest)
showlevs - auxiliary function that describes largest and smallest values of a variable
spmang - auxiliary function that removes leading and trailing spaces from string
widen - auxiliary function that widens the range of a variable
Documented functions
setDS - sets up environment to work with one of the four datasets (EA, LRB, SCCS, WNAI)
mkdummy - makes dummy variable and creates entry for it in metadata
mknwlag - makes network lag variable
addesc - adds or changes description of variable in metadata
fv4scale - helper function to find variables for use in a scale
doMI - creates multiple imputed datasets
mkscale - makes a scale (composite index) from several similar variables
doOLS - estimates regression model using OLS with imputed datasets, including network lag term
doLogit - estimates regression model using logit with imputed datasets, including network lag term
doMNLogit - estimates model using multinomial logit with imputed datasets, including network lag term
CSVwrite - writes objects to csv format file
mkmappng - plots an ordinal variable on world map and writes a png format file
mkcatmappng - plots a categorical variable on world map and writes a png format file
plotSq - plots effects of all independent variables with squared terms and writes a png format file
MEplots - plots marginal effects of independent variables used in doMNLogit

Functions in addition to EA.. LRB.. SCCS.. WNAI.. XC.. and doLOGIT doOLS doMNLogit

There are listed after

load(url("http://dl.dropbox.com/u/9256203/DEf01d.Rdata"),.GlobalEnv)
setDS("SCCS") #by
ls()

plotSq - plots effects of all independent variables with squared terms and writes a png format file

function(h,filetitle=NULL){

 a<-h$Rmodel
 oo<-h$data
 tii<-gsub("Sq","",rownames(a)[grep("Sq",rownames(a))])
 ltii<-length(tii)
 if (ltii==0){stop("No squared terms in restricted model")}
 if (!is.null(filetitle)) {png(file=paste(filetitle,".png",sep=""),width=8,height=5,units="in",pointsize=10,res=500)}
 layout(matrix(1:ltii,1,ltii))
 ii<-tii[1]
 for (ii in tii){
   bt<-t(a[grep(ii,rownames(a)),c("coef","bootpval")])
   x<-oo[,ii];x<-x[order(x)]
   ra<-table(x)
   bt2<-bt["coef",paste(ii,"Sq",sep="")]
   bt<-bt["coef",ii]
   if (is.na(bt2)){bt2<-0}
   if (is.na(bt)){bt<-0}
   yf<-x*bt+(x^2)*bt2
   ymf<-bt+2*(x)*bt2
#     plot(x,yf,type="b",pch=21,cex=.5,bg="black",col="black",ylim=widen(range(yf)),xlim=widen(range(x)),
#          main=ii,xlab=ii,ylab=paste("Effect of",ii,"on dependent variable.",sep=""))
#     rug(jitter(x),col="green",lwd=.6,side=3)
#     points(x,yf,cex=ra[rnkd(x)]^.5,pch=1,col="red")
#     abline(h=0,lty=2,lwd=.5,col="blue")
   plot(x,ymf,type="b",pch=21,cex=.5,bg="black",col="black",ylim=widen(range(ymf)),xlim=widen(range(x)),
   main=ii,xlab=ii,ylab=paste("Marg. Eff. of ",ii," on dep.varb.",sep=""))
   rug(jitter(x),col="green",lwd=.6,side=3)
   points(x,ymf,cex=ra[rnkd(x)]^.5,pch=1,col="red")
   abline(h=0,lty=2,lwd=.5,col="blue")
 }
 layout(1)
 if (!is.null(filetitle)) {dev.off()}

}

mmgg #--converts all variable rownames in a dataframe to either numeric or character

function(x){

 x<-kln(x)
 rownames(x)<-x[,1]
 x<-x[,-1]
 x

}

mkdummy #--make dummy variables and add a description

function(varb,val,showname=TRUE){

 #--make dummy variables and add a description--
 valz<-spmang(gsub("[^ 0-9a-zA-Z]"," ",val))
 valz<-capwrd(valz)
 valz<-spmang(gsub("[^0-9a-zA-Z]","",valz))
 nvarb<-paste(varb,".d",valz,sep="")
 dx[,nvarb]<<-(dx[,varb]==val)*1
 a<-key[varb,]
 a$type<-"ordinal"
 a$class<-"numeric"
 a$usevb<-"data"
 a$NOTmissing<-a$FNOTmissing<-sum(!is.na(dx[,varb]))
 a$nUniqVals<-a$FnUniqVals<-2
 a$variable<-rownames(a)<-nvarb
 jf<-paste(a$description,"==",val)
 if (!is.na(match(varb,names(dxf))) & class(dxf[,varb])=="factor"){
   j<-kln(data.frame(table(dx[,varb],as.character(dxf[,varb]))))
   j<-j[which(j$Freq>0),]
   jf<-paste(a$description,"==",j$Var2[which(j$Var1==val)])
   dxf[,nvarb]<<-factor(dx[,nvarb],levels=c(0,1),labels=c("otherwise",j$Var2[which(j$Var1==val)]))
 }
 a$description<-jf
 if (!is.na(key[nvarb,"variable"])){key[nvarb,]<<-a[,names(key)]}
 if (is.na(key[nvarb,"variable"])){key<<-rbind(key,a)}
 if (showname){print(paste("This dummy variable is named ",nvarb,sep=""))}
 if (showname){print(paste("The variable description is: '",jf,"'",sep=""))}

}

addesc #--add descriptive label to created variable

function(nvbs,nvbsdes){

 qii<-nvbs[which(!is.na(match(nvbs,names(dx))))]
 if (length(qii)>0){dxf[,qii]<<-dx[,qii]}
 # --add a description for newly created variables--
 a<-key[1:length(nvbs),]
 is.na(a)<-TRUE
 a$type<-"ordinal"
 a$class<-"numeric"
 a$usevb<-"data"

if (!is.na(match(nvbs,names(dx)))){

 if (length(nvbs)>1){yz<-apply(!is.na(dx[,nvbs]),2,sum)}
 if (length(nvbs)==1){yz<-sum(!is.na(dx[,nvbs]))}
 a$NOTmissing<-a$FNOTmissing<-yz
 a$nUniqVals<-a$FnUniqVals<-sapply(nvbs,function(x) length(table(dx[,x])))

}

 a$variable<-rownames(a)<-nvbs
 a$description<-nvbsdes
 rownames(a)<-nvbs
 k<-which(!is.na(match(nvbs,rownames(key))))
 if (length(k)>0){
   print(paste("WARNING: you have overwritten existing values of ",nvbs[k],sep=""))
   key[nvbs[k],]<<-a[nvbs[k],]
   a<-a[-k,]
 }
 k<-which(is.na(match(rownames(a),rownames(key))))
 if (length(k)>0){key<<-rbind(key,a)}

}

capwrd

function(rr){

 o<-gsub("^\\s+|\\s+$", "",rr)
 o<-gsub("(","-x-",o,fixed=TRUE)
 o<-sapply(strsplit(o,"-"),
           function(x) (paste(paste(toupper(substr(x,1,1)),(substr(x,2,nchar(x))),sep=""),collapse="-")))
 o<-gsub("-X-","(",o,fixed=TRUE)
 o<-sapply(strsplit(o," "),
           function(x) (paste(paste(toupper(substr(x,1,1)),(substr(x,2,nchar(x))),sep=""),collapse=" ")))
 return(o)

}

rmcs #--remove common substrings from set of strings

function(rg){

  1. --remove common substrings from set of strings--
 bg<-rg
 a<-(sapply(rg,function(x) strsplit(x,"")))
 z<-sapply(2:length(a),function(x) match(a1,ax))
 z1<-a1
 z1[which(rowSums(is.na(z))>0)]<-" "
 z1<-unlist(strsplit(spmang(paste(z1,collapse=""))," "))
 for (x in z1){bg<-gsub(paste(x,collapse=""),"",bg)}
 return(bg)

}

rnkd # ranks with ties--no leaps in ranked values

function(a1){

  1. ranks with ties--no leaps in ranked values
 p<-unique(a1)
 p<-p[order(p)]
 r<-1:length(p)
 k<-match(a1,p)
 return(r[k])

}

spmang - auxiliary function that removes leading and trailing spaces from string

function(x){

 x<-gsub("^\\s+|\\s+$", "",x)
 for (i in 1:6){x<-gsub("  "," ",x)}
 return(x)

}

gSimpStat # --generate simple statistics

function(ww){

 # --generate simple statistics--
 z<-which(sapply(ww,function(x) class(x))!="character")
 a<-round(data.frame(nobs=apply(!is.na(ww[,z]),2,sum,na.rm=TRUE),
                     mean=apply(ww[,z],2,mean,na.rm=TRUE),
                     min=apply(ww[,z],2,min,na.rm=TRUE),
                     max=apply(ww[,z],2,max,na.rm=TRUE),
                     sd=apply(ww[,z],2,sd,na.rm=TRUE)),3)
 #   a<-data.frame(psych::describe(ww[,z]))  
 #   a<-round(a[,c(2:4,8:9)],3)
 return(a)

}

MEplots

function(h,mod="R",varbs=NULL,filetitle=NULL,setylim=NULL,subgrps=NULL,dpires=500){

 tt<-hpaste(mod,"marEff",sep="")
 vxx<-varbs
 if (is.null(varbs)){vxx<-setdiff(colnames(tt),c("ID","alt",subgrps))}
 jr<-length(vxx)
 a<-range(tt[,vxx])
 ddx<-hpaste(mod,"meanME",sep="")"MEmean"
 k<-match(tt$alt,colnames(ddx))
 if (!is.null(filetitle)) {png(file=paste(filetitle,".png",sep=""),width=8,height=5,units="in",pointsize=10,res=dpires)}
 if (jr>10){layout(matrix(1:(3*ceil(jr/3)),3,ceil(jr/3)))}
 if (jr<=10){layout(matrix(1:(2*ceil(jr/2)),2,ceil(jr/2)))}
 iy<-vxx[1]
 for (iy in vxx){
   if (is.na(match(iy,setylim))){ay<-range(tt[,iy]);mtl<-NULL}
   if (!is.na(match(iy,setylim))){ay<-range(tt[,setylim]);mtl<-": stndYaxis"}
   if (is.null(subgrps)){
     plot(factor(tt$alt),tt[,iy],main=paste(iy,mtl,sep=""),lwd=.5,col=colors()[484],ylim=ay)
     abline(h=0,col="red")
     tt$zi<-ddx[iy,k]
     points(factor(tt$alt),tt$zi,col="blue",bg="blue",pch=18)
   }  
   if (!is.null(subgrps)){
     zg<-which(tt[,subgrps]==0)
     plot(factor(tt$alt[zg]),tt[zg,iy],main=paste(iy,mtl,sep=""),boxwex=.2,
          cex.axis=.7,lwd=.5,ylim=ay,
          at=(1:length(table(tt$alt[zg])))-.22,col="yellow")
     zg<-which(tt[,subgrps]==1)
     plot(factor(tt$alt[zg]),tt[zg,iy],boxwex=.2,cex=.7,
          at=(1:length(table(tt$alt[zg])))+0,col="red",add=TRUE,col.axis=NA)
     abline(h=0,col="blue")
   }  
 }
 layout(1)
 if (!is.null(filetitle)) {dev.off()}

}

chK # some descriptors of the fields in a data.frame

function(hh){

 # some descriptors of the fields in a data.frame
 aa<-data.frame(NOTmissing=apply(!is.na(hh),2,sum),
                class=sapply(hh,function(x) class(x)),
                nUniqVals=sapply(hh,function(x) length(unique(x[which(!is.na(x))]))))
 return(aa)

}

mknwlag #--make network lag variable

function(MIdata,wtMat,varb){

  1. --make network lag variable--
 nimp<-max(MIdata$.imp)
 newvarb<-paste("W",varb,sep="")
 MIdata[,newvarb]<-0;is.na(MIdata[,newvarb])<-TRUE
 for (iy in 1:nimp){
   MIdata[which(MIdata$.imp==iy),newvarb]<-wtMat%*%as.matrix(MIdata[which(MIdata$.imp==iy),varb])
 }
 return(MIdata[,newvarb])

}

mkscale #--make a composite index from variables in compvarbs, from 1st principal component --# --remove redundant variables--

function(compvarbs,udnavn=NULL,impdata,type="LP",add.descrip=NULL,set.direction=NULL,set.range=NULL){

 #--make a composite index from variables in compvarbs, from 1st principal component--
 # compvarbs=character string of variable names
 # impdata=imputed dataset created using doMI
 # udnavn=name of composite index created in impdata
 # type= 1 of c("LP","pc1","mean")
 if (is.null(udnavn)){udnavn<-paste(compvarbs,type,sep=".")}
 print(udnavn)
 compvarbs<-intersect(get(compvarbs),names(impdata))
 #--remove redundant variables--  
 zxa<-compvarbs
 gxg<-as.matrix(impdata[,zxa]);dim(gxg)
 oyo<-qr(gxg);oyo$rank
 bgti<-zxa
 zxa<-zxa[oyo$pivot[seq(oyo$rank)]]
 if (length(setdiff(bgti,zxa))>0){
   print(paste("To avoid singularity problems, variable ",paste(setdiff(bgti,zxa),collapse=",")," dropped",sep=""))
   compvarbs<-zxa
 }
 # -------------------------
 
 a<-rownames(suppressWarnings(alpha(impdata[,compvarbs])$alpha.drop))
 # following not necessary for princomp, but for others yes
 # ng=vector of length compvarbs with values 1 or -1 (-1 inverts variable in compvarbs)
 ng<-rep(1,length(compvarbs))
 izi<-grep("-",a)
 if (length(izi)>0){ng[izi]<-(-1)}
 names(ng)<-gsub("-","",a)
 s<-1
 if (!is.null(set.direction)){s<-as.numeric(ng[set.direction])}
 ng<-ng*s
 ix<-impdata[,compvarbs]*matrix(ng[compvarbs],NROW(impdata),length(ng),byrow=TRUE)
 ix<-apply(ix,2,scale)*1+10
 if (min(ix)<0 & type=="LP"){
   print("variable has negative values, not suitable for this method, try rescaling or set type='pc1'")
   print(apply(ix,2,min))
   stop("negative values")
 }
 v<-kln(data.frame(ix,.id=impdata$.id,.imp=impdata$.imp))
 impdata[,udnavn]<-0
 
 if (type=="pc1"){
   wts<-pvex<-NULL
   for (imn in 1:max(v$.imp)){
     z<-which(v$.imp==imn)
     o<-princomp(v[z,compvarbs])
     pvex<-rbind(pvex,(o$sdev^2)/sum(o$sdev^2))
     mscr<-sign(o$loadings[1,1])
     impdata[z,udnavn]<-o$scores[,1]*mscr
     wts<-rbind(wts,t(o$loadings[,1])*mscr)
   }
   pvex<-apply(pvex,2,mean)
   print("Pct Variance Explained by component")
   print(pvex[which(pvex>.01)])
 }
 
 if (type=="mean"){
   for (imn in 1:max(v$.imp)){
     z<-which(v$.imp==imn)
     impdata[z,udnavn]<-apply(v[z,compvarbs],1,mean)
   }
 }
 
 if (type=="LP"){
   kyf<-kln(data.frame(.id=impdata[which(impdata$.imp==1),".id"],ord=1:length(which(impdata$.imp==1))))
   for (imn in 1:max(v$.imp)){
     z<-which(v$.imp==imn)
     Amat<-as.matrix(v[z,compvarbs])
     row.names(Amat)<-v$.id[z]
     pnv<-row.names(Amat)
     fc<-matrix(0,NCOL(Amat),NCOL(Amat))
     diag(fc)<-1
     rownames(fc)<-paste("ct",(1:NCOL(Amat)),sep="")
     colnames(fc)<-colnames(Amat)
     Amat<-rbind(fc,Amat)
     sx<-NULL
     iter<-0
     o<-pnv
     while (length(o)>1){
       dea<-NULL
       iter<-iter+1
       bvec<-rep(1,NROW(Amat))
       bvec[1:NROW(fc)]<-(1/10^6)
       names(bvec)<-rownames(Amat)
       f.dir<-rep(c(">=","<="),c(NROW(fc),length(o)))
       for (i in 1:length(o)){
         cvec<-(Amat[o[i],])
         olp<-lp("max", cvec, Amat, f.dir, bvec)
         dea<-rbind(dea,olp$objval)
       }
       z<-which(round(dea[,1],7)==1)
       sx<-rbind(sx,cbind(data.frame(o[z]),iter))
       z<-which(round(dea[,1],7)!=1)
       o<-o[z]
       if (length(o)>=1){Amat<-Amat[c(rownames(fc),o),]}
     }
     sx<-kln(sx)
     names(sx)[1]<-".id"
     
     duf<-merge(kyf,sx,by=".id",all=TRUE)
     z<-which(is.na(duf$iter))
     if (length(z)>0){duf$iter[z]<-iter+1}
     duf[,NCOL(duf)]<-(max(duf[,NCOL(duf)])+1)-duf[,NCOL(duf)]
     head(duf)
     
     z<-which(impdata$.imp==imn)
     k<-match(impdata$.id[z],duf$.id)
     impdata[z,udnavn]<-duf$iter[k]
   }
 } 
 # ------------------stats-----------------
 
 dld<-NULL
 for (imn in 1:max(v$.imp)){
   z<-which(v$.imp==imn)
   if (!is.null(set.range)){  impdata[z,udnavn]<-resc(impdata[z,udnavn],set.range[1],set.range[2]) }
   dld<-rbind(dld,kln(data.frame(varbs=compvarbs,cor=cor(impdata[z,compvarbs],impdata[z,udnavn]))))
 }
 
 dld<-kln(aggregate(dld[,c("cor")],list(dld$varbs),mean))
 k<-match(dld$Group.1,key$variable)
 dld<-kln(data.frame(cor.w.scale=round(dld[,2],3),inv=ng[dld$Group.1],varb=dld$Group.1,
                     description=key[k,c("description")]))
 rownames(dld)<-dld$varb
 if (type=="pc1"){
   a<-t(apply(wts,2,range))
   colnames(a)<-c("min.load","max.load")
   dld<-kln(data.frame(a,dld))
 }
 dld<-dld[order(dld$cor.w.scale),]
 
 stats1<-data.frame(suppressWarnings(alpha(ix)$total[c(2)]))
 
 levs<-quickdesc(dld$varb)    
 
 impdata[,paste(udnavn,"Sq",sep="")]<-impdata[,udnavn]^2
 if (is.null(add.descrip)){add.descrip<-udnavn}
 addesc(udnavn,add.descrip)
 addesc(paste(udnavn,"Sq",sep=""),paste(add.descrip," Squared",sep=""))
 h<-list(impdata[,c(udnavn,paste(udnavn,"Sq",sep=""))],stats1,dld,levs)
 names(h)<-c("scales","stats","corrs","varb.desc")
 cat(paste('c("',paste(dld$varb,collapse='", "'),'")',sep=""), "\n")
 return(h)

}

resc # rescale series

function(vb,bg=1,en=8){

 # rescale series
 b<-vb-min(vb,na.rm=TRUE)
 b<-bg+b/max(b,na.rm=TRUE)*(en-bg)
 return(b)

}

fv4scale

function(lookword,dropword=NULL,keepword=NULL,coreword=NULL,

                  nmin=NULL,minalpha=.7,chklevels=FALSE,verbose=TRUE,
                  doscale=TRUE,setseed=NULL){
 if (is.null(nmin)){nmin<-round(NROW(dx)/4)}
 if (!is.null(setseed) & length(match(setseed,names(dx)))==0){
   print(paste("setseed string ",setseed," not found in data.frame dx. Set to NULL"))
   setseed<-NULL
 }
 lookword<-tolower(lookword)
 if (chklevels){
   zaa<-names(dxf)[which(sapply(dxf,class)=="factor")];length(zaa)
   zpp<-grep(".d",zaa,fixed=TRUE)
   if (length(zpp)>0){zaa<-zaa[-zpp]}
   if (length(zaa)>0){
   for (ih in zaa){
     zi<-unique(unlist(sapply(lookword,function(x) grep(x,tolower(levels(dxf[,ih]))))))
     if (length(zi)>0){
       if (verbose) {print(ih)}
       g<-plyr::count(data.frame(fac=dxf[,ih],num=dx[,ih]),c("fac","num"))
       for (ib in zi){
         if (g$freq[ib]>=10){
           mkdummy(ih,g$num[ib],showname=FALSE)
         }
       }
     }
   }
   }
   zll<-rownames(key)[unique(unlist(sapply(lookword,function(x) grep(x,tolower(key$description)))))]
   if (!is.null(keepword)){
     keepword<-tolower(keepword)
     zkk<-rownames(key)[unique(unlist(sapply(keepword,function(x) grep(x,tolower(key$description)))))]
     zll<-intersect(zll,zkk)
   }
   if (!is.null(dropword)){
     dropword<-tolower(dropword)
     zdd<-rownames(key)[unique(unlist(sapply(dropword,function(x) grep(x,tolower(key$description)))))]
     zll<-setdiff(zll,zdd)
   }
   zaa<-rownames(key)[which(key$usevb=="data" & key$NOTmissing>nmin & key$type=="categorical")]
   zaa<-intersect(zaa,zll)
   zpp<-grep(".d",zaa,fixed=TRUE)
   if (length(zpp)>0){zaa<-zaa[-zpp]}
   if (length(zaa)>0){
     phh<-kln(dxf[,zaa])
     nhh<-NULL
       for (ih in zaa){
       prr<-plyr::count(data.frame(fac=phh[,ih],num=dx[,ih]),c("fac","num"))
       prr<-prr[which(!is.na(prr$num) & !is.na(prr$fac) & (prr$freq>=10)),]
       if (NROW(prr)>0){
         for (ib in NROW(prr)){
           mkdummy(ih,prr$num[ib],showname=FALSE)
       }
     }
   }
   }
 }
 zi<-unique(unlist(sapply(lookword,function(x) grep(x,tolower(key$description)))));zi
 zi<-zi[which(key$usevb[zi]=="data" & key$NOTmissing[zi]>nmin & key$type[zi]=="ordinal")];zi
 zi<-zi[which(!is.na(match(rownames(key)[zi],names(dx))))]
 if (!is.null(dropword)){
   dropword<-tolower(dropword)
   dxi<-unique(unlist(sapply(dropword,function(x) grep(x,tolower(key$description)))));dxi
   zi<-setdiff(zi,dxi)
 }
 if (!is.null(keepword)){
   keepword<-tolower(keepword)
   dxi<-unique(unlist(sapply(keepword,function(x) grep(x,tolower(key$description)))));dxi
   zi<-intersect(zi,dxi)
 }
 vxx<-rownames(key)[zi]

if (!is.null(setseed)){

 vxx<-unique(c(vxx,setseed))
 o<-abs(cor(dx[,setseed],dx[,vxx],use="pair"))
 o<-o[1,which(!is.na(o))]
 vxx<-names(o)[which(o>=.3)]

}

 if (verbose & !doscale){print(quickdesc(vxx))}
 
 if (doscale){
   a<-apply(!is.na(dx[,vxx]),2,sum)
   a<-a[order(-a)]
   vxx<-names(a)
   a<-round(cor(dx[,vxx],use="pair"),3)
   diag(a)<-0
   a[which(is.na(a))]<-1
   z<-which(a==1,arr.ind=TRUE)
   z<-z[which(z[,"col"]<z[,"row"]),"row"]
   if (length(z)>0){vxx<-vxx[-z]}
   vxx
   
   if (!is.null(coreword)){
   coreword<-tolower(coreword)
   zi<-unique(unlist(sapply(coreword,function(x) grep(x,tolower(key$description)))));zi
   zi<-zi[which(key$usevb[zi]=="data" & key$NOTmissing[zi]>nmin & key$type[zi]=="ordinal")];zi
   zi<-zi[which(!is.na(match(rownames(key)[zi],names(dx))))]
   cxx<-rownames(key)[zi];cxx
   cxx<-intersect(cxx,vxx);cxx
   ob<-which(rowSums(is.na(dx[,cxx]))==0);length(ob)
   kxx<-vxx[which(apply(is.na(dx[ob,vxx]),2,sum)<length(ob)/2)];kxx
   ti<-apply(abs(cor(dx[,cxx],dx[,kxx],use="pair")),2,mean)
   ti<-ti[order(-ti)]
   txx<-names(ti);txx
   cxx<-cxx[order(match(cxx,txx))];cxx
   oxx<-setdiff(txx,cxx)[1:length(cxx)];oxx
   vxx<-union(cxx,oxx[which(!is.na(oxx))]);vxx
 }
 zig<-which(rowSums(is.na(dx[,vxx]))==0);length(zig)
 if (length(zig)<2){print("Too many missing values to calculate alpha")}
 if (length(zig)>1){
 o<-chK(dx[zig,vxx])
 vxx<-rownames(o)[which(o$nUniqVals>1 & o$class=="numeric")]
 if  (verbose) {print(key[vxx,c("NOTmissing","nUniqVals","description")])}
 puta<-suppressWarnings(alpha(dx[,vxx])$total[2])
 while (puta<minalpha){
   o<-suppressWarnings(alpha(dx[,vxx])$alpha.drop)
   z<-which(o$std.alpha==max(o$std.alpha))
   vxx<-vxx[-z]
   if (length(vxx)==0){stop("all variables eliminated, try lower minalpha")}
   if (sum(apply(is.na(dx[,vxx]),1,sum)>0)==NROW(dx)){stop("too many missing values, try higher nmin")}
   puta<-suppressWarnings(alpha(dx[,vxx])$total["std.alpha"])
 }
 if (verbose)  {print(puta)}
 qwx<-suppressWarnings(alpha(dx[,vxx])$alpha.drop)
 if (verbose)  {print(qwx)}
  1. if (verbose) {print(key[vxx,c("NOTmissing","nUniqVals","description")])}
 phh<-kln(dxf[,vxx])
 for (ii in vxx){
   i2<-rownames(qwx)[match(ii,gsub("-","",rownames(qwx),fixed=TRUE))]
   owwr<-"[--AS IS--]"
   if (nchar(gsub("-","",i2,fixed=TRUE))<nchar(i2)){owwr<-"[!INVERTED!]"}
   if (verbose)  {print(paste("============",i2,owwr,key[ii,"description"],"=============="))}
   prr<-plyr::count(data.frame(fac=phh[,ii],num=dx[,ii]),c("fac","num"))
   prr<-prr[which(!is.na(prr$num) & !is.na(prr$fac) & !is.na(prr$freq)),]
   prr<-prr[order(prr$num),]
   prr<-kln(prr[c(1,NROW(prr)),])
   if (is.numeric(prr$fac)){
     pjj<-paste("range: ",paste(prr$fac,collapse=" to "),sep="")
   }
   if (is.character(prr$fac)){
     pjj<-paste(paste("[",prr$num,"]: ",substr(prr$fac,1,75),sep=""),collapse=" |+| ")
   }
   if (verbose)  {print(pjj)}
 }
 }
 }
 if (verbose)  {print("--variable names below---")}
 cat(paste('c("',paste(vxx,collapse='", "'),'")',sep=""), "\n")
 return(vxx)

}

widen #widen or narrow the range between two values

function(var,rat=1.1){

 #widen or narrow the range between two values
 a<-diff(range(var))
 minv<-min(var)-((rat-1)/2)*a
 maxv<-max(var)+((rat-1)/2)*a
 return(c(minv,maxv))

}

kln # clean up dataframe, convert factors to character, etc.

function(bgg){

 # clean up dataframe, convert factors to character, etc.
 if (is.data.frame(bgg)){
   if (ncol(bgg)>1){
 z<-which(sapply(bgg,function(x) class(x))=="factor")
 if (length(z)>0){bgg[,z]<-sapply(bgg[,z],function(x) as.character(x))}
 z<-which(sapply(bgg,function(x) class(x))=="character")
 if (length(z)>0){
 bgg[,z]<-sapply(bgg[,z],function(x) iconv(x,to="ASCII//TRANSLIT"))
 bgg[,z]<-sapply(bgg[,z],function(x) gsub("^\\s+|\\s+$", "", x))
 }
 suppressWarnings(z<-sapply(bgg,function(x) length(which(is.na(as.numeric(gsub("[ ,]","",x)))))==length(which(is.na(x)))))
 suppressWarnings(bgg[,z]<-sapply(bgg[,z], function(x) as.numeric(gsub("[ ,]","",x))))
 }
 }
 return(bgg)

}

mkmappng

mkmappng

function(usedata,varb,filetitle=NULL,show="ydata",numnb.lg=3,numnb.lm=20,numch=0,

                  pvlm=.05,dfbeta.show=FALSE,zoom=FALSE,landcolor="black",rimcol=NULL,
                  map.width=8,map.height=5,map.units="in",map.pointsize=10,map.res=500){
 if (length(which(!is.na(match(c("long","lati"),names(usedata)))))!=2){
   stop("input data needs to have two variables, one named 'long', the other 'lati'")
 }
 if (is.null(filetitle)){filetitle<-varb}
 zdv<-which(!is.na(usedata[,varb]))
 xy<-SpatialPoints(usedata[zdv,c("long","lati")])
 lgt<-as.matrix(localG(usedata[zdv,varb],nb2listw(include.self(knn2nb(knearneigh(xy,k=numnb.lg,longlat=TRUE,RANN=FALSE))),style="B")))  
 lmt<-as.matrix(localmoran(usedata[zdv,varb],nb2listw(knn2nb(knearneigh(xy,k=numnb.lm,longlat=TRUE,RANN=FALSE)),style="B")))
 ydata<-usedata[zdv,varb]
 dfby<-ydata
 is.na(dfby)<-TRUE
 if (table(gsub("dfb.","",names(usedata),fixed=TRUE))[varb]==2){
   dfby=usedata[zdv,paste("dfb.",varb,sep="")]
   is.na(dfby[dfby==0])<-TRUE
 }
 
 splgt<-SpatialPointsDataFrame(coordinates(xy),
                               kln(data.frame(lgt=lgt,lmtz=lmt[,"Z.Ii"],
                                              lmtp=(lmt[,"Pr(z > 0)"]<pvlm)*1,
                                              ydata=ydata,dfbeta=dfby)))
 
 png(file=paste(filetitle,"_",show,".png",sep=""),width=map.width,height=map.height,
     units=map.units,pointsize=map.pointsize,res=map.res)
 par(mar=c(0, 0, 0, 0) + 0.1)
 if (zoom==FALSE){suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,ylim=c(-60,79),
                                       xlim=c(-180,180),fill=TRUE,col=landcolor,lty=0,resolution=0))}
 if (zoom==TRUE){suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,
                                      ylim=widen(range(usedata[zdv,"lati"])),
                                      xlim=widen(range(usedata[zdv,"long"])),
                                      fill=TRUE,col=landcolor,lty=0,resolution=0))}
 splgt<-splgt[order(-splgt@data[,show]),]  
 p1<-coordinates(splgt)
 kk<-mapproject(p1[,"long"],p1[,"lati"],orientation=c(90, 155,0), projection="mercator")
 kk<-SpatialPoints(as.matrix(cbind(kk$x,kk$y)))
 blr<-resc(splgt@data[,show],.35,2.5)
 brks<-quantile(blr,seq(0,1,by=.2),na.rm=TRUE);brks
 farv<-colorRampPalette(c("yellow","brown"))(length(brks)-1);farv
 nuk<-findInterval(blr, brks, all.inside=TRUE)
 #   tff<-rep(2,length(nuk))
 #   tff[which(!is.na(splgt@data$dfbeta))]<-tff[which(!is.na(splgt@data$dfbeta))]+sign(splgt@data$dfbeta)[which(!is.na(splgt@data$dfbeta))]
 if (is.null(rimcol)){rimcol<-farv[nuk]}
 plot(kk,pch=21,col=rimcol,cex=blr,bg=farv[nuk],add=TRUE,lwd=.5)
 if (dfbeta.show){
   dfbx<-abs(splgt@data[,"dfbeta"]*10)
   dfbsign<-(splgt@data[,"dfbeta"]>0)+1
   #triangle points down shows societies whose inclusion signif. lowers coefficient
   #triangle points up shows societies whose inclusion signif increases coefficient
   plot(kk,pch=c(6,2)[dfbsign],col=c("green","green")[dfbsign],cex=2,add=TRUE)
 }
 z<-which(splgt@data$lmtp==1 & splgt@data$lmtz>0)
 if (numch>0){
   numch<-min(round(length(z)/3),numch)
   ig<-as.matrix(dist(coordinates(kk[z,])))
   v<-cutree(hclust(as.dist(ig)),k=numch);table(v)
   for (ii in unique(v)){
     z1<-z[which(v==ii)]
     if (length(z1)>2){
       g4<-dismo::convHull(kk[z1,])
       plot(g4@polygons,add=TRUE,border="cyan")
     }
   }
 }
 par(mar=c(5, 4, 4, 2) + 0.1)
 dev.off()

}

mkcatmappng

mkcatmapping

function(usedata,varb,filetitle=NULL,zoom=FALSE,landcolor=colors()[387],

                     map.width=8,map.height=5,map.units="in",map.pointsize=10,map.res=500){
 if (length(which(!is.na(match(c("long","lati"),names(usedata)))))!=2){
   stop("input data needs to have two variables, one named 'long', the other 'lati'")
 }
 if (is.null(filetitle)){filetitle<-varb}
 usedata<-usedata[which(!is.na(usedata[,varb])),]
 xy<-SpatialPoints(usedata[,c("long","lati")])
 splgt<-SpatialPointsDataFrame(coordinates(xy),kln(data.frame(usedata)))
 farv<-c(brewer.pal(12,"Set3"),brewer.pal(12,"Paired"),brewer.pal(8,"Accent"))
 blr<-as.numeric(factor(splgt@data[,varb]))
 gii<-plyr::count(data.frame(num=blr,fac=splgt@data[,varb]),c("num","fac"))
 png(file=paste(filetitle,".png",sep=""),width=map.width,height=map.height,
     units=map.units,pointsize=map.pointsize,res=map.res)
 par(mar=c(0, 0, 0, 0) + 0.1)
 if (zoom==FALSE){
   suppressWarnings(map("world",orientation=c(90, 155,0), projection="mercator", wrap=TRUE,ylim=c(-60,79),
                        xlim=c(-180,180),fill=TRUE,col=landcolor,lty=0,resolution=0))
   p1<-coordinates(splgt)
   kk<-mapproject(p1[,"long"],p1[,"lati"],orientation=c(90, 155,0), projection="mercator")
   kk<-SpatialPoints(as.matrix(cbind(kk$x,kk$y)))
   plot(kk,pch=blr,col=farv[blr],cex=.7,add=TRUE)
   legend("topright",legend=gii$fac,pch=gii$num,col=farv[gii$num],cex=.45)
 }
 if (zoom==TRUE){
   map("world",ylim=widen(range(usedata[,"lati"]),1.3),
       xlim=widen(range(usedata[,"long"]),1.3),fill=TRUE,col=landcolor,lty=0,resolution=0) #13,387
   plot(xy,pch=blr,col=farv[blr],cex=.7,add=TRUE)
   legend("topright",legend=gii$fac,pch=gii$num,col=farv[gii$num],cex=.45)
 }
 par(mar=c(5, 4, 4, 2) + 0.1)
 dev.off()

}

CSVwrite #--write object to csv file--

function(object,filestem,appnd2=FALSE){

 #--write object to csv file--
 if (class(object)=="list"){
   pff<-length(object)
   suppressWarnings(write.table(paste(names(object)[1],"\n"),file=paste(filestem,".csv",sep=""),
                                quote=TRUE,sep=",",dec=".",qmethod="double",col.names =FALSE,
                                row.names = FALSE,append=appnd2))
   suppressWarnings(write.table(object1,file=paste(filestem,".csv",sep=""),
                                quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
                                row.names = FALSE,append=TRUE))
   if (pff>1){
     sapply(c(2:pff),function(x) {
       for (qz in 1:2){suppressWarnings(write.table(" ",file=paste(filestem,".csv",sep=""),
                                    quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
                                    row.names = FALSE,append=TRUE))}
       suppressWarnings(write.table(paste(names(object)[x],"\n"),file=paste(filestem,".csv",sep=""),
                                    quote=TRUE,sep=",",dec=".",qmethod="double",col.names = FALSE,
                                    row.names = FALSE,append=TRUE))
       suppressWarnings(write.table(objectx,file=paste(filestem,".csv",sep=""),
                                    quote=TRUE,sep=",",dec=".",qmethod="double",col.names = NA,
                                    row.names = TRUE,append=TRUE))})
   }
 }
 if (class(object)=="table" | class(object)=="matrix" | 
       class(object)=="numeric" | class(object)=="integer" | 
       class(object)=="data.frame"  | class(object)=="character"){
   suppressWarnings(write.table(object,file=paste(filestem,".csv",sep=""),
                                quote=TRUE,sep=",",dec=".",qmethod="double",col.names = NA,
                                row.names = TRUE,append=appnd2))
 }

} ==