NavigationUser login |
Computing Statistics from Poorly Formatted Data (plyr and reshape packages for R)Submitted by dylan on Thu, 2009-07-09 15:26.
2.521 2.312 2.720 2.254 * 2.922 * * 2.291 2.038 * * 1.151
1.675 1.646 1.860 2.517 * 1.986 * * 3.279 3.420 * * 3.059 1.734 1.305 1.774 2.366 * 2.909 * * 2.863 2.958 * * 2.973 1.637 1.632 2.040 1.807 * 1.889 * * 2.081 2.267 * * 2.655 1.967 8.307 8.331 8.698 * 8.236 * * 7.990 8.255 * * 8.041 1.670 1.744 1.982 2.029 * 2.159 * * 3.330 2.945 * * 3.301 1.668 1.816 1.832 2.100 * 2.289 * * 2.745 2.703 * * 3.216 2.304 2.413 2.749 2.827 * 2.978 * * 3.011 3.244 * * 4.494 1.505 2.827 3.375 1.923 * 4.250 * * 1.542 3.094 * * 1.480
site intercept slope Rsq
1 1 2.8123 -0.0894 0.5115 2 2 1.5229 0.1512 0.7682 3 3 1.5499 0.1351 0.7445 4 4 1.5581 0.0738 0.8453 5 5 6.1738 0.2174 0.1727 6 6 1.4787 0.1527 0.9026 7 7 1.5340 0.1270 0.9871 8 8 2.1403 0.1437 0.8224 9 9 2.7546 -0.0425 0.0306 # these may need to be installed with install.packages() library(plyr) library(reshape) # this one comes with the base install of R library(lattice) # read in the data as pasted from original format d1 <- read.table('d1.txt', na.strings='*') d2 <- read.table('d2.txt') # transpose and convert to long format d1.long <- melt(t(d1)) d2.long <- melt(t(d2)) # give resonable names names(d1.long) <- c('obs','site','value') names(d2.long) <- c('obs','site','value') # add time variable # 1:n obs * m sites d1.long$time <- rep(1:13, 9) d2.long$time <- rep(1:8, 7) xyplot(value ~ time | factor(site), type=c('p','r'), data=d1.long, as.table=TRUE) xyplot(value ~ time | factor(site), type=c('p','r'), data=d2.long, as.table=TRUE) fit.summary <- function(i) { # fit linear model to this set of the data l <- lm(value ~ time, data=i) # extract model terms l.coef <- coef(l) # extract R-squared l.rsq <- summary(l)$r.squared # combine model details into a single vector l.details <- c(l.coef, l.rsq) # rename elements of the vector names(l.details) <- c('intercept', 'slope', 'Rsq') # return rounded values to the calling function return(round(l.details, 4)) } # compute lm details by site ddply(d1.long, .(site), fit.summary) ddply(d2.long, .(site), fit.summary)
Reply |