How to normalize and integer-scale a variable in a data frame

From InterSciWiki
Jump to: navigation, search

How to normalize and integer-scale data in a data frame

Scale the values of a variable to start with integer 1 let the user choose a number K of successive integer values such that:

1. the values are normalized using the normalizing function scale, for v1650: v1650.n12 <- round(3+scale(dx$v1650),1)*10 as integers, e.g.,

19 21 22 24 25 27 28 30 31 33 34 36 37 39 40 42 43 (each with a different frequency)

S <- min(v1650.n12, na.rm = TRUE) ; v1650.n00 <- v1650.n12 - S +1 ; table(v1650.n00)  # 
2. Those values may be all positive: if not and S is 0 or negative THEN redefine
if (S < 1) {S <- min(v1650.n00, na.rm = TRUE) ; v1650.n01 <- v1650.n00 + S -1; table(v1650.n01)} # 
 1  3  4  6  7  9 10 12 13 15 16 18 19 21 22 24 25 
52  2  8  1 10  3  7  2  6  3  3  3  5  1  7  4 37  #approximating a normal distribution with no negative or zero values

01 03 04 06 07 09 10 12 13 15 16 18 19 21 22 24 25 each with a different frequency and

3 These options allow the user to convert ordinal variables to dichotomies ("Dummy .d variables") into options that can let the user name both "Dummy or Normalized n. variables" (these being mutually exclusive) where, for example, bio.6.n12 rounds bio.6 to a variable that is normalized but with 12 easy-to-map intervals, a lowest value 1 and highest 12 or greater (with some empty intervals for normalization). Changes like these are at the level of modeling single dependent-variables outcomes with missing data and controls for autocorrelation. #ECSS 5 key questions 2014-15
4 If the user chooses K=9 these kinds of variables with create suitable color maps, mkmappng and mkcatmappng. The color maps should be named according to the variable name, either in the codebook or as defined in CoSSci, e.g.., v1650 similarly for mk1650 or mkcat1650.
5. We might consider a plot(popdenmean,6+scale(dx$v1650)) option as a graphic in the maps window.

Libraries

Script

library(bnlearn)         #[1] "Not found in data: inhreal, inhmove, marrgood. Have been dropped"
library(AER)
library(aod)
library(classInt)
library(dismo)
library(foreign)
library(forward)
library(geosphere)
library(Hmisc)
library(linprog)
library(maps)
library(mapproj) 
library(mice)
library(mlogit)
library(mclogit)
library(pastecs)
library(plyr)
library(psych)
library(RColorBrewer)
library(relaimpo)
library(reshape)
library(stringr)
library(spdep)
#library(tm) #no package
library(XML)

#The Dow-Eff functions, as well as the five ethnological datasets, are contained in an R-workspace, located in the cloud.
#load(url("http://dl.dropbox.com/u/9256203/DEf01d.Rdata"), .GlobalEnv)
#getwd() [1] "/Users/drwhite/Documents"
load(url("http://capone.mtsu.edu/eaeff/downloads/DEf01f.Rdata"))
#setwd("/Users/drwhite/Documents")
#load("DEf01e.Rdata", .GlobalEnv)
ls()  #-can see the objects contained in DEf01d.Rdata

##  [1] "addesc"      "capwrd"      "chK"         "CSVwrite"    "doLogit"     "doMI"        "doMNLogit"   "doOLS"       "EA"          "EAcov"      
## [11] "EAfact"      "EAkey"       "fv4scale"    "gSimpStat"   "kln"         "llm"         "LRB"         "LRBcov"      "LRBfact"     "LRBkey"     
## [21] "MEplots"     "mkcatmappng" "mkdummy"     "mkmappng"    "mknwlag"     "mkscale"     "mkSq"        "mmgg"        "plotSq"      "quickdesc"  
## [31] "resc"        "rmcs"        "rnkd"        "SCCS"        "SCCScov"     "SCCSfact"    "SCCSkey"     "setDS"       "showlevs"    "spmang"     
## [41] "widen"       "WNAI"        "WNAIcov"     "WNAIfact"    "WNAIkey"     "XC"          "XCcov"       "XCfact"      "XCkey"

#The setDS( xx ) command sets one of the four ethnological datasets as the source for the subsequent analysis. The five valid options for xx are: “XC”, “LRB”, “EA”, “SCCS”, and “WNAI”. The setDS() command creates objects:
setDS("SCCS")

Scale

?scale
#A <- matrix(c(2,3,-2,1,2,2),3,2)
popscale153=round(scale(dx$v153, center = TRUE, scale = TRUE),1)
popscale156=round(scale(dx$v156, center = TRUE, scale = TRUE),1)
popscale1130=round(scale(dx$v1130, center = TRUE, scale = TRUE),1)
popscale=matrix(c(popscale153,popscale156,popscale1130),186,3)
popscale=matrix(c(scale(dx$v153),scale(dx$v156),scale(dx$v1130)),186,3)
popdenmean=6+round(1.5*rowMeans(popscale),1)
                                         popmean
                                          -2 -1  0  1  2
 Absent                                   17 21 13  9 
 Present, inactive, unconcerned           11  9 12 11  8
 Present, active, unconcerned with humans  4  4  7  4  4
 Present, active, supportive of morality   2  8  8 16  9
plot(popdenmean,scale(dx$v1650))
AgpotScale=6+matrix(c(scale(dx$v921),scale(dx$v928)),186,2)  #AgriPot 1&2
Agpotmean=6+round(rowMeans(AgpotScale),1)
SizeJHscale=6+matrix(c(scale(dx$v63),scale(dx$v237)),186,2)  #CmtySize & Superjh
SizeJHmean=6+round(rowMeans(SizeJHscale),1)