Polski: Estimation of five independent real variables with either the observed means (black circles) or linear regression predictions (red diamonds). Illustrates the Stein's Paradox understood as a basic regression.
Based upon "Galtonian Perspective on Shrinkage Estimators" (Stigler, 1990). Generated in R with the following code:
library(ggplot2)
p <- 5; n <- 100 # number of variables and simulation runs
fakedata <- setNames(data.frame(matrix(nrow=p, ncol=3)), c("real", "observed", "predicted"))
RMSE <- setNames(data.frame(matrix(nrow=n, ncol=2)), c("RMSE of mean", "RMSE of regression"))
for (j in 1:n) {
for (i in 1:p) {
fakedata[i,1] <-runif(1,1,10)
fakedata[i,2] <- mean(rnorm(100, fakedata[i,1], runif(1,1,20)))
}
fit <- lm(observed~real, fakedata)
fakedata$predicted <- predict(fit)
RMSE[j,1] <- mean((fakedata$real - fakedata$observed)^2)
RMSE[j,2] <- mean((fakedata$real - fakedata$predicted)^2)
}
sprintf("RMSE of means = %.2f, RMSE of regressions = %.2f", mean(RMSE$`RMSE of mean`), mean(RMSE$`RMSE of regression`))
ggplot(fakedata, aes(x=real,y=observed)) +
geom_point(shape=21, fill="black", size=3) +
geom_point(aes(y=predicted), fill="red", shape=23, size=3) +
theme(axis.title.x = element_blank(), axis.title.y = element_blank())