Customized Scatterplot Ideas

Panel function for visualizing univariate statistics

panel.dist_summary <- function(x, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 3) )
  #hist(x, probability=T, add=T)
&nbsp;  
  #default color and line style for density plot
  density.col = 'gray'
  density.lty = 3
&nbsp;  
  # is this a normally distributed dataset?
  # if so, change the color of the density plot
  # The test rejects the null hypothesis if W is too small.
  s.W <- shapiro.test(x)$statistic
 if( (s.W > 0.91) == TRUE)
   {
   density.col = 'gray'
   density.lty = 1
   }
&nbsp;  
  # compute and plot density
  d <- density(x)
  dy <- d$y / max(d$y) * .5
  lines(d$x, dy, col=density.col, lty=density.lty)
&nbsp;  
  # get a small increment to use in the next tests:
  delta <- abs(min(x) - max(x)) / 100
&nbsp;  
  y_mean <- dy[d$x < mean(x) + delta & d$x > mean(x) - delta][1]
  y_median <- dy[d$x < median(x) + delta & d$x > median(x) - delta][1]
&nbsp;  
  debug
  #print(y_median)
&nbsp;  
  #add points on the density plot for the mean and median
  points( c(mean(x), median(x)), c(y_mean, y_median), col=c('red', 'orange'), pch=16)
&nbsp;  
  #add a boxplot
  boxplot(x, horizontal=TRUE, boxwex=0.3, add=T)
&nbsp;  
  #debugging
  #print(s.W)
}

Panel function for printing joint correlation statistic

panel.cor <- function(x, y, digits=2, prefix="", cex.cor, cor.method="pearson")
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y, method=cor.method))
  txt <- format(c(r, 0.123456789), digits=digits)[1]
  txt <- paste(prefix, txt, sep="")
  if(missing(cex.cor)) cex <- 0.8/strwidth(txt)
  text(0.5, 0.5, txt, cex = cex * r, col='gray')
&nbsp;  
  # might be interesting to use ks.test
  # http://www.physics.csbsju.edu/stats/KS-test.html
}

Example usage with built-in datasets

# enforce square plotting area
par(pty='s')
&nbsp;
pairs(USJudgeRatings[1:5], upper.panel=panel.cor, lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp
pairs(iris[1:4], upper.panel=panel.cor, lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp;
# use spearman correlation calculation instead of default person:
pairs(iris[1:4], upper.panel=function(...) panel.cor(..., cor.method="spearman"), lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp;
# color iris specis: note location of 'col=' argument
pairs(iris[1:4], upper.panel=panel.cor, lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1, pch=16, col=c("red4", "green3", "blue4")[unclass(iris$Species)]), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2 )
&nbsp;
pairs(trees, upper.panel=panel.cor, lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp;
pairs(swiss, upper.panel=panel.cor, lower.panel=function(...) panel.smooth(...,
col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp;
# using formula notation:
pairs( ~ Fertility + Education + Catholic, data=swiss, upper.panel=panel.cor, lower.panel=function(...) panel.smooth(..., col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)
&nbsp;
pairs(longley, upper.panel=panel.cor, lower.panel=function(...) panel.smooth(...,
col.smooth=gray(.5), lty=1), diag.panel=panel.dist_summary, cex.labels = 2, font.labels=2)

Attachments:

panel-functions.R.txt
pairs-soil_data.pdf
pairs-iris_data.pdf