-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path07-grmsd.r
72 lines (46 loc) · 1.61 KB
/
07-grmsd.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
install.packages("yaImpute")
library(yaImpute)
library(magrittr)
library(dplyr)
data(iris)
set.seed(12345)
# form some test data
refs=sample(rownames(iris),50)
x <- iris[,1:2] # Sepal.Length Sepal.Width
tail(x)
dim(x)
y <- iris[refs,3:4] # Petal.Length Petal.Width
tail(y)
dim(y)
# build yai objects using 2 methods
msn <- yai(x=x,y=y)
mal <- yai(x=x,y=y,method="mahalanobis")
lmFit=lm(as.matrix(y) ~ ., data=x[refs,])
# compute the average distances between observed and imputed (predicted)
grmsd(msn,mal,lmFit)
# use the all variables and observations in iris
# Species is a factor and is automatically deleted with a warning
grmsd(msn,mal,ancillaryData=iris[,1:4])
# here is an example using lm, and another using column
# means as predictions.
impMean <- y
colnames(impMean) <- paste0(colnames(impMean),".o")
impMean <- cbind(impMean,y)
tail(impMean)
dim(impMean)
# set the predictions to the mean's of the variables
impMean[,"Petal.Length"] <- mean(impMean[,"Petal.Length"])
impMean[,"Petal.Width"] <- mean(impMean[,"Petal.Width"])
grmsd(msn, mal, lmFit=lm(as.matrix(y) ~ ., data=x[refs,]), impMean )
# compare to using function rmsd (values match):
msnimp <- na.omit(impute(msn))
grmsd(msnimp[,c("Petal.Length","Petal.Length.o")])
rmsd(msnimp[,c("Petal.Length","Petal.Length.o")],scale=TRUE)
# these are multivariate cases and they don't match
# because the covariance of the two variables is > 0.
grmsd(msnimp)
colSums(rmsd(msnimp,scale=TRUE))/2
# get the vectors and make a boxplot, identify outliers
stats <- boxplot(grmsd(msn,mal,ancillaryData=iris[,-5],rtnVectors=TRUE),
ylab="Mahalanobis distance")
stats$out