# Working with Spatial Data

A collection of notes, examples, references, and thoughts on working with spatial data.

# Converting Alpha-Shapes into SP Objects

Just read about a new R package called alphahull (paper) that sounds like it might be a good candidate for addressing this request regarding concave hulls. Below are some notes on computing alpha-shapes and alpha-hulls from spatial data and converting the results returned by ashape() and ahull() into SP-class objects. Note that the functions are attached at the bottom of the page. Be sure to read the license for the alphahull package if you plan to use it in your work.

Alpha-Shape Example

## not widely tested!

# need these
library(sp)
library(spgrass6)
library(alphahull)

source('alpha-functions.R')

# read point vector in from GRASS

# extract coordinates
x.coords <- coordinates(x)

# alpha-shape: 100 meter threshold
x.as <- ashape(x.coords[,1], x.coords[,2], alpha=100)

# alpha-hull: 30 meter threshold
x.ah <- ahull(x.coords[,1], x.coords[,2], alpha=30)

plot(x.as, cex=0.5, pch=4, xlab='Easting (m)', ylab='Northing (m)', main=expression(paste('100m ', alpha, '-Shape')), asp=1)

plot(x.ah, cex=0.5, pch=4, xlab='Easting (m)', ylab='Northing (m)', main=expression(paste('30m ', alpha, '-Hull')), asp=1)

## convert into SP objects

# alpha-shape
x.as.spldf <- ashape_to_SPLDF(x.as, proj4string=x@proj4string)

# alpha-hull
x.ah.spldf <- ahull_to_SPLDF(x.ah, proj4string=x@proj4string)

# check: OK
pdf(file='ashape_ahull_demo.pdf', width=6, height=6)
par(mar=c(1,1,1,1))
plot(x.as.spldf)
lines(x.ah.spldf, col='red')
points(x, cex=0.5, pch=4, col='blue')
legend('bottomright', legend=c(expression(paste('100m ', alpha, '-Shape')), expression(paste('30m ', alpha, '-Hull')), 'Observation'), lty=c(1,1,NA), pch=c(NA,NA,4), col=c('black', 'red', 'blue'), bty='n')
dev.off()

# save back to GRASS: OK
writeVECT6(x.as.spldf, 'rtk_ashape')

# save back to GRASS: OK
writeVECT6(x.ah.spldf, 'rtk_ahull')

# Customizing Maps in R: spplot() and latticeExtra functions

I recently noticed the new latticeExtra page on R-forge, which contains many very interesting demos of new lattice-related functionality. There are strong opinions about the "best" graphics system in R (base graphics, grid graphics, lattice, ggplot, etc.)-- I tend to use base graphics for simple figures and lattice for depicting multivariate or structured data. The sp package defines classes for storing spatial data in R, and contains several useful plotting methods such as the lattice-based spplot(). This function, and back-end helper functions, provide a generalized framework for plotting many kinds of spatial data. However, sometimes with great abstraction comes great ambiguity-- many of the arguments that would otherwise allow fine tuning of the figure are buried in documentation for lattice functions. Examples are more fun than links to documentation, so I put together a couple of them below. They describe several strategies for placing and adjusting map legends-- either automatically, or manually added with the update() function. The last example demonstrates an approach for over-plotting 2 rasters. All of the examples are based on the meuse data set, from the gstat package.

Extended spplot() examples

Examples

# setup environment
library(gstat)
library(latticeExtra)
library(grid)

data(meuse.grid)
data(meuse)
data(meuse.alt)

# convert to SpatialPointsDataFrame
coordinates(meuse.grid) <- ~ x + y
coordinates(meuse) <- ~ x + y
coordinates(meuse.alt) <- ~ x + y

# converto SpatialPixelsDataFram
gridded(meuse.grid) <- TRUE

# convert 'soil' to factor and re-label
meuse.grid\$soil <- factor(meuse.grid\$soil, labels=c('A','B','C'))
meuse\$soil <- factor(meuse\$soil, levels=c('1','2','3'), labels=c('A','B','C'))

#
# example 1
#

# setup color scheme
cols <- brewer.pal(n=3, 'Set1')
p.pch <- c(2,3,4)

# generate list of trellis settings
tps <- list(regions=list(col=cols), superpose.polygon=list(col=cols), superpose.symbol=list(col='black', pch=p.pch))

# init list of overlays
spl <- list('sp.points', meuse, cex=0.75, pch=p.pch[meuse\$soil], col='black')

# setup trellis options
trellis.par.set(tps)

# initial plot, missing key
p1 <- spplot(meuse.grid, 'soil', sp.layout=spl, colorkey=FALSE, col.regions=cols, cuts=length(cols)-1)

# add a key at the top + space for key
p1 <- update(p1, key=simpleKey(levels(meuse.grid\$soil), points=FALSE, lines=FALSE, rect=TRUE, regions=TRUE, columns=3, title='Class', cex=0.75))

# add a key on the right + space for key
p1 <- update(p1, key=simpleKey(levels(meuse\$soil), points=TRUE, columns=1, title='Class', cex=0.75, space='right', ))

p1

#
# example 2
#

# generate list of trellis settings
tps <- list(regions=custom.theme()\$regions, superpose.symbol=list(col='black', pch=p.pch), fontsize=list(text=16))

trellis.par.set(tps)
p2 <- spplot(meuse.grid, 'dist', sp.layout=spl, colorkey=list(space='bottom', title='Distance'), scales=list(draw=TRUE, cex=0.5))

p2 <- update(p2, key=simpleKey(levels(meuse\$soil), points=TRUE, columns=1, space='right'))

p2

#
# example 3
# more colorkey tweaking and...
# overlay 2 grids with layer()
#

sp.grid <- function (obj, col = 1, alpha = 1, ...)
{
if (is.character(obj))
obj = get(obj)
xy = coordinates(obj)
if (length(col) != 1 && length(col) != nrow(xy)) {
}
gt = as(getGridTopology(obj), "data.frame")
grid.rect(x = xy[, 1], y = xy[, 2], width = gt\$cellsize[1],
height = gt\$cellsize[2], default.units = "native", gp = gpar(fill = col, col = NA, alpha = alpha))
}

trellis.par.set(regions=custom.theme()\$regions, superpose.polygon=list(col='black', alpha=0.25))

# first grid covers entire extent
p3 <- spplot(meuse.grid, 'dist', colorkey=list(space='bottom', width=1, height=0.5, tick.number=3))

# overlay partially transparent, kind of a hack...
p3 <- p3 + layer(sp.grid(meuse.grid[meuse.grid\$soil == 'A', ], col='black', alpha=0.25))

p3 <- update(p3, key=simpleKey('Shaded Region', points=FALSE, lines=FALSE, rect=TRUE, columns=1, space='top'))

p3

#
# example 4: merge all three together
#

# order matters
p4 <- c(p3,p2,p1, layout=c(3,1))
p4 <- update(p4, key=simpleKey(levels(meuse\$soil), points=TRUE, columns=1, space='right'))

p4

# save to file: note that we have to reset the 'regions' colors
png(file='spplot_examples.png', width=700, height=350)
trellis.par.set(regions=custom.theme()\$regions)
print(p4)
dev.off()

# Generation of Sample Site Locations [sp package for R]

Premise
Setting up sampling designs is a non-trivial aspect to any field experiment that includes a spatial component. The sp package for R provides a simple framework for generating point sampling schemes based on region-defining features (lines or polygons) and a sampling type (regular spacing, non-aligned, random, random-stratified, hexagonal grid, etc.). The rgdal package provides a set of functions for importing/exporting common vector data formats. This example demonstrates simple import/export, iterating over sp objects, and reconstituting new objects from lists of objects. A more complex sampling scheme is demonstrated here.

1. Setup the environment, load some sample polygons, and tryout the spsample() function.
library(sp)
library(rgdal)

# note the funky syntax
# note that this should have a REAL projection defined
# an incorrect definition may result in an error from readOGR

# spsample will not sample each polygon, rather it works on the union of polys
# try it:
plot(x) ; points(spsample(x, n=100, type='random'), col='red', pch=3, cex=0.5)
2. Sampling with spsample example 1

3. Iterate through all polygons in our original dataset, generating approximately 100 sample points within each polygon. Note that we use sapply() it iterate through the list of polygons, and do.call('rbind', ...) to 'stack' the list elements back into a single SpatialPoints object.
# hexagonal grid from lower-left corner
s <- sapply(slot(x, 'polygons'), function(i) spsample(i, n=100, type='hexagonal', offset=c(0,0)))

# we now have a list of SpatialPoints objects, one entry per polygon of original data
plot(x) ; points(s[[4]], col='red', pch=3, cex=.5)

# stack into a single SpatialPoints object
s.merged <- do.call('rbind', s)
4. Sampling with spsample example 2

5. Now that the sample points for each polygon have been merged into a single SpatialPoints object, we need to attach a dataframe with the ID associating each sample point with its parent polygon. Attaching this data will "promote" the SpatialPoints object to a SpatialPointsDataFrame object.
# add an id, based on source polygon:
#
# extract the original IDs
ids <- sapply(slot(x, 'polygons'), function(i) slot(i, 'ID'))

# determine the number of ACTUAL sample points generated for each poly
npts <- sapply(s, function(i) nrow(i@coords))

# generate a reconstituted vector of point IDs
pt_id <- rep(ids, npts)

# promote to SpatialPointsDataFrame
s.final <- SpatialPointsDataFrame(s.merged, data=data.frame(poly_id=pt_id))

# check:
plot(x) ; points(s.final, col=s.final\$poly_id, pch=3, cex=0.5)
6. Sampling with spsample example 3

7. Copy over the spatial reference system data from the polygons object, and save sample points to a new shapefile. Note that you can only write to a shapefile if the object in question is a SpatialPointsDataFrame object.
# copy source data spatial reference system to new object
s.final@proj4string <- x@proj4string

# write out to new file
writeOGR(s.final, dsn='polys/', layer='rnd_pts', driver='ESRI Shapefile')

# Ordinary Kriging Example: GRASS-R Bindings

Update: 2012-02-13
Many of the examples used in this demonstration are now somewhat dated, probably inefficient, and in need of revision. I'll spend some time on an updated version for the GRASS wiki soon.

Overview:
A simple example of how to use GRASS+R to perform interpolation with ordinary kriging, using data from the spearfish sample dataset. This example makes use of the gstat library for R.

• Issaks, E.H. & Srivastava, R.M. An Introduction to Applied Geostatistics Oxford University Press, 1989
• GSTAT Manual
• GSTAT Examples

Elevation Data and Sample Points: 300 randomly placed points where elevation data was sampled.

Data Prep:
As a contrived example, we will generate 300 random points within the current region, and sample an elevation raster at each of these points.

# set region:
g.region rast=elevation.dem

# extract some random points from an elevation dataset
v.random out=rs n=300

# create attribute table:

# extract raster data at points
v.what.rast vect=rs rast=elevation.dem column=elev

# simple display:
d.rast elevation.dem
d.vect rs size=4

# start R
R

Remember that you will need to install these R packages onto your computer.

library(gstat)
library(spgrass6)

## read in vector dataset from above
G <- gmeta6()

# remove records with NA
x <- x.has.na[-which(is.na(x.has.na@data\$elev)),]

## create a grid wihch will define the interpolation
## note that it is based on the current GRASS region settings
grd <- gmeta2grd()

## make a new grid of (1)s
## be sure to use original data's proj data...
## doesn't work with the stuff stored in G...
new_data <- SpatialGridDataFrame(grd, data=data.frame(k=rep(1,G\$cols*G\$rows)), proj4string=CRS(x@proj4string@projargs))

## optionally we can use another raster of 1's as our interpolation mask
## need to manually set the coordinate system information:
## this new object could then be used in the 'newdata' argument to predict(), i.e.
## x.pred_OK <- predict(g, id='elev', newdata=mask)

Variogram Modeling:
A very simple example, using default parameters for a non-directional variogram is presented below. Modeling the variogram for an actual spatial problem requires knowlege of both your dataset (distribution, collection methods, etc.), the natural processes involved (stationarity vs. anisotropy ?), and a bit about the assumptions of geostatistics.

## init our gstat object, with the model formula
## note that coordinates are auto-identified from the GRASS object
g <- gstat(id="elev", formula=elev ~ 1, data=x)

## view a variogram with specified parameters
plot(variogram(g, width=250, cutoff=10000, map=FALSE))

## optionally make a variogram map, and plot semi-variance for 10-point pairs or greater:
plot(variogram(g, width=250, cutoff=10000, map=TRUE), threshold=10)

## fit a linear variogram model- as this looks appropriate
## ... using default parameters
v.fit <- fit.variogram(variogram(g) ,vgm(model="Lin") )
plot(variogram(g, width=250, cutoff=10000, map=FALSE), model=v.fit)

## update gstat object with fitted variogram model
g <- gstat(g, id="elev", model=v.fit )

Variogram and Fit Model: A Linear variogram model was fit to the elevation data.

Interpolation by Ordinary Kriging:
The prediction is done for every instance of a '1' in the object passed to the newdata= argument.

## interpolate with ord. kriging
x.pred_OK <- predict(g, id='elev', newdata=new_data)

Send Results Back to GRASS:

## write raster back to GRASS: interpolation and kriging variance:
## system('g.remove rast=elev.ok')
writeRAST6(x.pred_OK, 'elev.ok', zcol='elev.pred')
writeRAST6(x.pred_OK, 'elev.ok_var', zcol='elev.var')

## quit:
q()

Viewing Results in GRASS:

# reset colors to match original data:
r.colors map=elev.ok rast=elevation.dem

# give the kriging variance a grey color map
r.colors map=elev.ok_var color=rules <<EOF
0% white
100% black
EOF

#
# display the kriged interpolation, with intensity | saturation based on variance
d.his s=elev.ok_var h=elev.ok
# optional method:
# d.his i=elev.ok_var h=elev.ok
d.vect rs size=4

Interpolated Elevation Data via Ordinary Kriging: Hue is interpolated elevation value, saturation is based on the kriging variance.

Simple Comparison with RST:
RST (regularized splines with tension) and OK (ordinary kriging) are two common interpolation methods. Computing the RMSE (root-mean-square-error) between the interpolated raster and the original raster provides a simple quantitative measure of how well the interpolation performed, at least in terms mean magnitude of error. A spatial description of interpolation error can be generated by subtracting the new raster from the original. Note that the steps involve cell-wise computation of the square-error (SE), region-wise computation of the mean-square-error (MSE); the square root of MSE gives the root-mean-square-error or RMSE.

#
# compare with RST - tension of 60, same points
#
v.surf.rst in=rs elev=elev.rst zcol=elev tension=60
r.colors map=elev.rst rast=elevation.dem

# compute SE between kriged map and original
r.mapcalc "d = (elev.ok - elevation.dem)^2 "
r.colors map=d color=rainbow
d.rast d
d.vect rs size=4

# compute SE between RST map and original
r.mapcalc "d2 = (elev.rst - elevation.dem)^2"
r.colors map=d2 color=rainbow
d.rast d2
d.vect rs size=4

#
# compare results:
#

# compute RMSE for OK [sqrt(MSE)]
r.univar d

# compute RMSE for RST [sqrt(MSE)]
r.univar d2
# see table below:

Root-mean-square-error Comparison:
Looks like the RSME are pretty close...

 Method OK RST RMSE 61 meters 64 meters

# Ordinary Kriging Example: R via text file

Overview:
A simple example of how to use R to perform interpolation with ordinary kriging, using data from a text file. This example makes use of the gstat library for R. Additional examples of how to use the following gstat functions are included:

• variogram maps
• directional variogram plots
• ploting the interpolated surface directly from R

Note that this example is not meant to be an authoritative guide on variogram selection, or proper modeling of anisotropy-- just an example. The Kansas Geological Survey has an interesting set of reports that illustrate selection of a directional variogram in the presence of a strong, regional trend.

Original elevation data and sample points: 300 randomly placed points where elevation data was sampled.

Data Prep:
Export the coordinates and elevation values from the previous example. See attached file elev.txt.

# two new columns to the random point vector from the previous example
v.db.addcol map=rs columns="x double, y double"
v.to.db option=coor column=x,y map=rs
# export to text file
db.select rs fs="," > elev.csv

Start R:
Load in the text file, and coerce to format that gstat can use.

library(gstat)

## gstat does not like missing data, subset original data:
e <- na.omit(d)

## convert simple data frame into a spatial data frame object:
coordinates(e) <- ~ x+y

## test result with simple bubble plot:
bubble(e, zcol='elev', fill=FALSE, do.sqrt=FALSE, maxsize=2)

## create a grid onto which we will interpolate:
## first get the range in data
x.range <- as.integer(range(e@coords[,1]))
y.range <- as.integer(range(e@coords[,2]))

## now expand to a grid with 500 meter spacing:
grd <- expand.grid(x=seq(from=x.range[1], to=x.range[2], by=500), y=seq(from=y.range[1], to=y.range[2], by=500) )

## convert to SpatialPixel class
coordinates(grd) <- ~ x+y
gridded(grd) <- TRUE

## test it out:
plot(grd, cex=0.5)
points(e, pch=1, col='red', cex=0.7)
title("Interpolation Grid and Sample Points")

Interpolation Grid

Create GSTAT Objects:
Make some diagnostic plots, model variogram, check for anisotropy, etc.

## make gstat object:
g <- gstat(id="elev", formula=elev ~ 1, data=e)

## the original data had a large north-south trend, check with a variogram map
plot(variogram(g, map=TRUE, cutoff=4000, width=200), threshold=10)

## another approach:
# create directional variograms at 0, 45, 90, 135 degrees from north (y-axis)
v <- variogram(g, alpha=c(0,45,90,135))

## 0 and 45 deg. look good. lets fit a linear variogram model:
## an un-bounded variogram suggests additional source of anisotropy... oh well.
v.fit <- fit.variogram(v, model=vgm(model='Lin' , anis=c(0, 0.5)))

## plot results:
plot(v, model=v.fit, as.table=TRUE)

## update the gstat object:
g <- gstat(g, id="elev", model=v.fit )

Variogram Map

Directional Variogram Plots

Perform OK and View Results:
Examples using standard and lattice graphics.

## perform ordinary kriging prediction:
p <- predict(g, model=v.fit, newdata=grd)

## visualize it:

## base graphics
par(mar=c(2,2,2,2))
image(p, col=terrain.colors(20))
points(e, pch=4, cex=0.5)
title('OK Prediction')

## lattice graphics: thanks for R. Bivand's advice on this
##
## alternatively plot quantiles with
## ... col.regions=terrain.colors(6), cuts=quantile(p\$elev.pred) ...
##
pts <- list("sp.points", e, pch = 4, col = "black", cex=0.5)
spplot(p, zcol="elev.pred", col.regions=terrain.colors(20), cuts=19, sp.layout=list(pts), contour=TRUE, labels=FALSE, pretty=TRUE, col='brown', main='OK Prediction')

## plot the kriging variance as well
spplot(p, zcol='elev.var', col.regions=heat.colors(100), cuts=99, main='OK Variance',sp.layout=list(pts) )

## quit and convert saved EPS files to PNG:
## for i in *.eps ; do convert \$i `basename \$i .eps`.png ; done

OK Prediction: created with the spplot() function

OK Variance

# Point-process modelling with the sp and spatstat packages

Some simple examples of importing spatial data from text files, converting between R datatype, creation of a point process model and evaluating the model. Input data sources are: soil pit locations with mollic and argillic diagnostic horizons (mollic-pedons.txt and argillic-pedons.txt), and a simplified outline of Pinnacles National Monument (pinn.txt). The outline polygon is used to define a window in which all operations are conducted.

The 'sp' package for R contains the function spsample(), can be used to create a sampling plan for a given region of interest: i.e. the creation of n points within that region based on several algorithms. This example illustrates the creation of 50 sampling points within Pinnacles, according to the following criteria: regular (points are located on a regular grid), nonaligned (points are located on a non-aligned grid-like pattern), random (points are located at random), stratified (collectively exhaustive, see details here).

The 'spatstat' package for R contains several functions for creating point-process models: models describing the distribution of point events: i.e. the distribution of tree species within a forest. If covariate data is present (i.e. gridded precipitation, soil moisture, aspect, etc.) these covariates can be incorporated into the point-process model. Without covariate data, the model is based on an spatial distribution estimator function. Note that the development of such models is complicated by factors such as edge-effects, degree of stochasticity, spatial connectivity, and stationarity. These are rather contrived examples, so please remember to read up on any functions you plan to use for your own research. An excellent tutorial on Analyzing spatial point patterns in R was recently published.

Spatstat Quick Reference

Four sampling designs

Spatial density of each pedon type

Spatial density of the four sampling designs

Example point-process model of mollic soils

Diagnostics of a simple point-process model

Note: This code should be updated to use the slot() function instead of the '@' syntax for accessing slots!

library(sp)
library(spatstat)

# read in pinnacles boundary polygon: x,y coordinates
# use the GRASS vector, as it should be topologically correct
# v.out.ascii format=standard in=pinn_bnd > pinn.txt ... edit out extra crud

# read in mollic and argillic pedons
# see ogrinfo hack

# add a flag for the type of pedon
m\$type <- factor('M')
a\$type <- factor('A')

#combine into a single dataframe
pedons <- rbind.data.frame(a,m)

Using the functions from the 'sp' package create a polygon object from the pinn.txt coordinates

# create a spatial polygon class object
pinn.poly <- SpatialPolygons(list(Polygons(list(Polygon( pinn )), "x")))

# inspect this new object with str()
# total area of all polygons
pinn.poly@polygons[[1]]@area

# coordinates of first polygon: this is rough syntax!
pinn.poly@polygons[[1]]@Polygons[[1]]@coords

Generate a sampling plan for 50 sites using regular grid, non-aligned grid, random, and random stratified approaches

# generate random points within the pinnacled boundary
p.regular <- spsample(pinn.poly, n = 50, "regular")
p.nalign <- spsample(pinn.poly, n = 50, "nonaligned")
p.random <- spsample(pinn.poly, n = 50, "random")
p.stratified <- spsample(pinn.poly, n = 50, "stratified")

# setup plot environment
par(mfrow=c(2,2))

# each of the sampling designs
plot(pinn.poly)
title("Regular")
points(p.regular, pch=16, col='red', cex=0.3)

plot(pinn.poly)
title("Nonaligned")
points(p.nalign, pch=16, col='red', cex=0.3)

plot(pinn.poly)
title("Random")
points(p.random, pch=16, col='red', cex=0.3)

plot(pinn.poly)
title("Stratified")
points(p.stratified, pch=16, col='red', cex=0.3)

Convert 'sp' class objects to 'spatstat' analogues note the use of 'slots'

# pinn boundary:
# extract coordinates: and get a length - 1 value
p.temp <- pinn.poly@polygons[[1]]@Polygons[[1]]@coords
n <- length(p.temp[,1]) - 1

# create two vectors: x and y
# these will contain the reversed vertices, minus the last point
# in order to adhere to the spatstat specs: no repeating points, in counter-clockwise order
x <- rev(p.temp[,1][1:n])
y <- rev(p.temp[,2][1:n])

# make a list of coordinates: note that we are removing the last vertex
p.list <- list(x=x,y=y)

# make a spatstat window object from the polygon vertices
W <- owin(poly=p.list)

# pedons with their 'marks' i.e. pedon type, and the pinn boundary as the 'window'
pedons.ppp <- ppp(pedons\$x, pedons\$y, xrange=c(min(pedons\$x), max(pedons\$x)), yrange=c(min(pedons\$y), max(pedons\$y)) , window=W, marks=pedons\$type)

Plot and summarize the new combined set of pedon data

# plot and summarize the pedons data:
# note the method used to subset the two 'marks'
par(mfrow=c(1,2))
plot(density.ppp(pedons.ppp[pedons.ppp\$marks == 'M']), main="Mollic Point Density")
points(pedons.ppp[pedons.ppp\$marks == 'M'], cex=0.2, pch=16)

plot(density.ppp(pedons.ppp[pedons.ppp\$marks == 'A']), main="Argillic Point Density")
points(pedons.ppp[pedons.ppp\$marks == 'A'], cex=0.2, pch=16)

summary(pedons.ppp)

Marked planar point pattern: 151 points
Average intensity 1.38e-06 points per unit area
Marks:
frequency proportion intensity
A 62 0.411 5.67e-07
M 89 0.589 8.14e-07

Window: polygonal boundary
single connected closed polygon with 309 vertices
enclosing rectangle: [ 657228.3125 , 670093.8125 ] x [ 4030772.75 , 4047986.25 ]
Window area = 109337135.585938

Convert the sampling design points (from above) to 'spatstat' objects and plot their density

# convert the random datasets: using the same window:
ppp.regular <- ppp(p.regular@coords[,1], p.regular@coords[,2], window=W)
ppp.nalign <- ppp(p.nalign@coords[,1], p.nalign@coords[,2], window=W)
ppp.random <- ppp(p.random@coords[,1], p.random@coords[,2], window=W)
ppp.stratified <- ppp(p.stratified@coords[,1], p.stratified@coords[,2], window=W)

# visually check density of random points:
par(mfrow=c(2,2))
plot(density.ppp(ppp.regular), main="Regular Sampling")
points(ppp.regular, pch=16, cex=0.2)

plot(density.ppp(ppp.nalign), main="Non-Aligned Sampling")
points(ppp.nalign, pch=16, cex=0.2)

plot(density.ppp(ppp.random), main="Random Sampling")
points(ppp.random, pch=16, cex=0.2)

plot(density.ppp(ppp.stratified), main="Stratified Sampling")
points(ppp.stratified, pch=16, cex=0.2)

Simple, and probably flawed attempt to use a point-process model for the pedon data Third order polynomial model for the distribution of pedons with a mollic epipedon. See manula page for ppm() for detailed examples.

# model the spatial occurance of Mollic epipedons with a 3rd-order polynomial, using the Poisson Process Theory
fit <- ppm( unmark(pedons.ppp[pedons.ppp\$marks == 'M']), ~polynom(x,y,3), Poisson())

# view the fitted model
par(mfcol=c(2,2))
plot(unmark(pedons.ppp[pedons.ppp\$marks == 'M']), main="Mollic Pedons")
plot(fit)

# plot some diagnostics on the fitted model: Pearson residuals (see references)
diagnose.ppm(fit, type="pearson")

#another example using a buil-in dataset: the Lansing Forest
# fit nonstationary marked Poisson process
# with different log-cubic trend for each species
data(lansing)
fit <- ppm(lansing, ~ marks * polynom(x,y,3), Poisson())
plot(fit)

Point-process model diagnostic references from the spatstat manual
Baddeley, A., Turner, R., Moller, J. and Hazelton, M. (2005) Residual analysis for spatial point processes. Journal of the Royal Statistical Society, Series B 67, 617â€“666.
Stoyan, D. and Grabarnik, P. (1991) Second-order characteristics for stochastic structures connected with Gibbs point processes. Mathematische Nachrichten, 151:95â€“100.

# Simple Map Creation

library(maps)

map('county', 'ca', interior=TRUE)
map.scale()
map.axes()

## add some user data in lon/lat format:
points(x, pch=4, cex=0.5, col=1)
points(y, pch=4, cex=0.5, col=2)
points(z, pch=4, cex=0.5, col=3)

Example Map

# Some Ideas on Interpolation of Categorical Data

## Premise

Wanted to make something akin to an interpolated surface for some spatially auto-correlated categorical data (presence/absence). I quickly generated some fake spatially auto-correlated data to work with using r.surf.fractal in GRASS. These data were converted into a binary map using an arbitrary threshold that looked about right-- splitting the data into something that looked 'spatially clumpy'.

Fig. 1: Simulated auto-correlated, categorical variable, with sampling points and derived voronoi polygons.

I had used voronoi polygons in the past to display connectivity of categorical data recorded at points, even though sparsely sampled areas tend to be over emphasized. Figure 1 shows the fake spatially auto-correlated data (grey = presence /white = not present), sample points (yellow boxes), and voronoi polygons. The polygons with thicker, red boundaries represent the "voronoi interpolation" of the categorical feature.

## Interpolation by RST

Wanting something a little more interesting, I tried interpolating the presence/absence data by via RST. Numerical interpolation of categorical data is usually not preferred as it creates a continuum between discreet classes-- i.e. values that do not have a sensible interpretation. Throwing that aside for the sake of making a neat map, a color scheme was selected to emphasize the categorical nature of the interpolated surface (Figure 2).

Fig. 2: RST interpolation of 0-1 continuum: red=1, blue=0.

## Conditional Indicator Simulation

Finally, I gave conditional indicator simulation a try-- this required two steps: 1) fitting a model variogram, 2) simulation. This approach generates different output on each simulation, however, the output represents the original spatial pattern and variability. A more interesting map could be generated by running 1000 simulations and converting them into a single probability map.

Indicator Variogram: Empirical semi-variogram for indicator=1, with spherical model fit.

Categorical Interpolation 3: Single conditional indicator simulation.

## Code Snippets

Generate Some Data in GRASS

# set a reasonable resolution
g.region res=10 -ap

# simulate some spatially auto-correlated data
# and convert to categorical map
r.surf.fractal --o dimension=2.5 out=fractal
r.mapcalc "fractal.bin = if(fractal > 0, 1, 0)"
r.colors fractal.bin color=rules <<EOF
0 white
1 grey
EOF

v.random --o out=v n=100
v.db.addcol map=v column="fractal double, class integer"
v.what.rast vect=v rast=fractal column=fractal
v.what.rast vect=v rast=fractal.bin column=class

# simplest approach
v.voronoi --o in=v out=v_vor

# try interpolation of classes...
v.surf.rst --o in=v zcol=class elev=v.interp
r.colors map=v.interp color=rules <<EOF
0% blue
0.5 white
100% red
EOF

Perform Indicator Simulation in R

# indicator simulation
library(spgrass6)
library(gstat)

# convert class to factor
d@data <- transform(d@data, class=factor(class))

# inspect variogram of x\$class == 1
plot(v <- variogram(I(class == 1) ~ 1, data = d))

# fit a spherical variogram with nugget
# not sure about the syntax
v.fit <- fit.variogram(v, vgm(psill=1, model='Sph', range='250', 1))

# png(file='indicator_variogram.png', width=400, height=400, bg='white')
plot(v, model=v.fit)
# dev.off()

# make a grid to predict onto
G <- gmeta6()
grd <- gmeta2grd()

# new grid
new_data <- SpatialGridDataFrame(grd, data=data.frame(k=rep(1,G\$cols*G\$rows)), proj4string=CRS(d@proj4string@projargs))

# conditional indicator simulation:
# need to study this syntax
# make more simulations for an estimated probabilitry
p <- krige(I(class == 1) ~ 1, d, new_data, v.fit, nsim=1, indicators=TRUE, nmax=40)

# write one back to GRASS
writeRAST6(p, 'indicator.sim', zcol='sim1')

Make Some Maps in GRASS

# fix colors of the simulated map
r.colors map=indicator.sim color=rules << EOF
0 white
1 grey
EOF

# simple maps
d.erase
d.rast v.interp
d.vect v icon=basic/box  size=7 fcol=yellow
d.vect v_vor type=area fcol=none where=class=0
d.vect v_vor type=area fcol=none width=2 where=class=1
d.out.file --o out=example1

d.erase
d.rast fractal.bin
d.vect v icon=basic/box  size=7 fcol=yellow
d.vect v_vor type=area fcol=none where=class=0
d.vect v_vor type=area fcol=none col=red width=2 where=class=1
d.out.file --o out=example2

d.erase
d.vect v_vor type=area fcol=white where=class=0
d.vect v_vor type=area fcol=grey where=class=1
d.vect v icon=basic/box  size=7 fcol=yellow
d.out.file --o out=example3

d.erase
d.rast indicator.sim
d.vect v icon=basic/box size=7 fcol=yellow
d.out.file --o out=example4

# Target Practice and Spatial Point Process Models

Overview:
Simple application of spatial point-process models to spread patterns after some backyard target practice. Note that only a cereal box and 2 sheets of graph paper were injured in this exercise. Data files are attached at the bottom of this page; all distance units are in cm.

A simple experiment was conducted, solely for the purpose of collecting semi-random coordinates on a plane, where a target was hit with 21 shots from a distance of 15 and 30 feet. The ppm() function (spatstat package) in R was used to create point density maps, along with a statistical description of the likelihood of where each target would be hit were the experiment to be conducted again (via point-process modeling). While normally used to model the occurrence of natural phenomena or biological entities, point-process models can be used to analyze one's relative accuracy at set target distances. One more way in which remote sensing or GIS techniques can be applied to smaller, non-georeferenced coordinate systems.

Density ComparisonPattern densities from the two experiments: 30 and 15 feet from target.

Load Data and Compute Density Maps:

library(spatstat)
library(RColorBrewer)

## an initial plot
plot(t_30, xlim=c(0,35), ylim=c(0,50))
points(t_15, col='red')

## convert to spatstat objects
t_30.ppp <- ppp(t_30\$x, t_30\$y, xrange=c(0,35), yrange=c(0,50) )
t_15.ppp <- ppp(t_15\$x, t_15\$y, xrange=c(0,35), yrange=c(0,50) )

## check via plot
plot(t_30.ppp)
points(t_15.ppp, col='red')

Fit Point-Process Models:

## fit point-process model
t_30_fit <- ppm(t_30.ppp, ~polynom(x,y,3), Poisson())
t_15_fit <- ppm(t_15.ppp, ~polynom(x,y,3), Poisson())

## plot density comparisons between two ranges
par(mfcol=c(1,2))
plot( density(t_30.ppp), col=brewer.pal('Blues', n=9), main="30 Feet")
points(t_30.ppp, pch=4, cex=1)

plot( density(t_15.ppp), col=brewer.pal('Oranges', n=9), main="15 Feet")
points(t_15.ppp, pch=4, cex=1)

##
## plot a fit of the 30 foot pattern
##
par(mfcol=c(2,2))
plot( density(t_30.ppp), col=brewer.pal('Blues', n=9), main="30 Feet")
points(t_30.ppp, pch=4, cex=1)

plot(t_30_fit, col=brewer.pal('Blues', n=9), trend=TRUE, cif=FALSE, pause=FALSE, how="image")
plot(t_30_fit, trend=TRUE, cif=FALSE, pause=FALSE, how="contour")
plot(t_30_fit, colmap=brewer.pal('Blues', n=9), trend=TRUE, cif=FALSE, pause=FALSE, how="persp", theta=0, phi=45)

##
## plot a fit of the 15 foot pattern
##
par(mfcol=c(2,2))
plot( density(t_15.ppp), col=brewer.pal('Oranges', n=9), main="15 Feet")
points(t_15.ppp, pch=4, cex=1)

plot(t_15_fit, col=brewer.pal('Oranges', n=9), trend=TRUE, cif=FALSE, pause=FALSE, how="image")
plot(t_15_fit, trend=TRUE, cif=FALSE, pause=FALSE, how="contour")
plot(t_15_fit, colmap=brewer.pal('Oranges', n=9), trend=TRUE, cif=FALSE, pause=FALSE, how="persp", theta=0, phi=45)

30 Foot PPM

15 Foot PPM

Tidy-up:

##
## convert to png:
for i in *.pdf ; do convert -density 300 +antialias \$i `basename \$i .pdf`.png ; done
for i in *.png ; do mogrify -reisize 25% \$i ; done

# Visual Interpretation of Principal Coordinates (of) Neighbor Matrices (PCNM)

Principal Coordinates (of) Neighbor Matrices (PCNM) is an interesting algorithm, developed by P. Borcard and P. Legendre at the University of Montreal, for the multi-scale analysis of spatial structure. This algorithm is typically applied to a distance matrix, computed from the coordinates where some environmental data were collected. The resulting "PCNM vectors" are commonly used to describe variable degrees of possible spatial structure and its contribution to variability in other measured parameters (soil properties, species distribution, etc.)-- essentially a spectral decomposition spatial connectivity. This algorithm has been recently updated by and released as part of the PCNM package for R. Several other implementations of the algorithm exist, however this seems to be the most up-to-date.

Related Presentations and Papers on PCNM

• http://biol09.biol.umontreal.ca/ESA_SS/Borcard_&_PL_talk.pdf
• Borcard, D. and Legendre, P. 2002. All-scale spatial analysis of ecological data by means of principal coordinates of neighbour matrices. Ecological Modelling 153: 51-68.
• Borcard, D., P. Legendre, Avois-Jacquet, C. & Tuomisto, H. 2004. Dissecting the spatial structures of ecologial data at all scales. Ecology 85(7): 1826-1832.

I was interested in using PCNM vectors for soils-related studies, however I encountered some in difficulty interpreting what they really meant when applied to irregularly-spaced site locations. As a demonstration, I generated several (25 to be exact) PCNM vectors from a regular grid of points. Using an example from the PCNM manual page, I have plotted the values of the PCNM vectors at the grid nodes (below). The interpretation of the PCNM vectors derived from a 2D, regular grid is fairly simple: lower order vectors represent regional-scale groupings, higher order vectors represent more local-scale groupings. One thing to keep in mind is that these vectors give us a multi-scale metric for grouping sites, and are not computed by any properties that may have been measured at the sites. The size of the symbols are proportional to the PCNM vectors and the color represents the sign.

PCNM - Regular Grid

Soil survey operations are rarely conducted on a regular grid, so I re-computed PCNM vectors from the same simulated grid, but after randomly perturbing each site. The resulting map of PCNM vectors is presented below. The patterns are a little complex, but quickly decipherable with guidance from the PCNM vectors derived from a regular grid. Neat!

PCNM - Randomly Perturbed Regular Grid

R code used to make figures

library(PCNM)

# fake data
g <- expand.grid(x=1:10, y=1:10)
x.coords <- data.frame(x=g\$x, y=g\$y)

# PCNM
x.sub.dist <- dist(x.coords[,c('x','y')])
x.sub.pcnm <- PCNM(x.sub.dist, dbMEM=TRUE)

# plot first 25 PCNM vectors
pdf(file='PCNM-grid-example.pdf', width=10, height=10)

par(mfrow=c(5,5))
for(i in 1:25)
s.value(x.coords[,c('x','y')], x.sub.pcnm\$vectors[,i], clegend=0, sub=paste("PCNM", i), csub=1.5, addaxes=FALSE, origin=c(1,1))

dev.off()

# jitter the same input and try again
x.coords <- data.frame(x=jitter(g\$x, factor=2), y=jitter(g\$y, factor=2))
x.sub.dist <- dist(x.coords[,c('x','y')])
x.sub.pcnm <- PCNM(x.sub.dist, dbMEM=TRUE)

# plot first 25 PCNM vectors
pdf(file='PCNM-jittered_grid-example.pdf', width=10, height=10)

par(mfrow=c(5,5))
for(i in 1:25)
s.value(x.coords[,c('x','y')], x.sub.pcnm\$vectors[,i], clegend=0, sub=paste("PCNM", i), csub=1.5, addaxes=FALSE, origin=c(1,1))

dev.off()

# Visualizing Random Fields and Select Components of Spatial Autocorrelation

Premise
I have always had a hard time thinking about various parameters associated with random fields and empirical semi-variograms. The gstat package for R has an interesting interface for simulating random fields, based on a semi-variogram model. It is possible to quickly visualize the effect of altering semi-variogram parameters, by "seeding" the random number generator with the same value at each iteration. Of primary interest were visualization of principal axis of anisotropy, semi-variogram sill, and semi-variogram range. The code used to produce the images is included below. For more information on the R implementation of gstat, see the R-sig-GEO mailing list.

Setup

library(gstat)

# setup a grid
xy <- expand.grid(1:100, 1:100)
names(xy) <- c("x","y")

Demonstration of Anisotropy Direction

Demonstrate Anisotropy Direction

var.model <- vgm(psill=1, model="Exp", range=15)
set.seed(1)
sim <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata = xy, nsim = 1)

var.model <- vgm(psill=1, model="Exp", range=15, anis=c(0, 0.5))
set.seed(1)
sim\$sim2 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=1, model="Exp", range=15, anis=c(45, 0.5))
set.seed(1)
sim\$sim3 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=1, model="Exp", range=15, anis=c(90, 0.5))
set.seed(1)
sim\$sim4 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=1, model="Exp", range=15, anis=c(135, 0.5))
set.seed(1)
sim\$sim5 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

# promote to SP class object
gridded(sim) = ~x+y

new.names <- c('iso', 'aniso 0 deg', 'aniso 45 deg', 'aniso 90 deg', 'aniso 135 deg')
p1 <- spplot(sim, names.attr=new.names, col.regions=topo.colors(100), as.table=TRUE, main="Demonstration of Anisotropy")

Demonstration of Range Parameter

Demonstrate Range Parameter

var.model <- vgm(psill=1, model="Exp", range=1)
set.seed(1)
sim <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata = xy, nsim = 1)

var.model <- vgm(psill=1, model="Exp", range=5)
set.seed(1)
sim\$sim2 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=1, model="Exp", range=15)
set.seed(1)
sim\$sim3 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=1, model="Exp", range=30)
set.seed(1)
sim\$sim4 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

# promote to SP class object
gridded(sim) = ~x+y

new.names <- c('range = 1', 'range = 5', 'range = 10', 'range = 30')
p2 <- spplot(sim, names.attr=new.names, col.regions=topo.colors(100), as.table=TRUE, main="Demonstration of Range Parameter")

Demonstration of Sill Parameter

Demonstrate Sill Parameter

var.model <- vgm(psill=0.5, model="Exp", range=15)
set.seed(1)
sim <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata = xy, nsim = 1)

var.model <- vgm(psill=1, model="Exp", range=15)
set.seed(1)
sim\$sim2 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=2, model="Exp", range=15)
set.seed(1)
sim\$sim3 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

var.model <- vgm(psill=4, model="Exp", range=15)
set.seed(1)
sim\$sim4 <- predict(gstat(formula=z~1, locations= ~x+y, dummy=TRUE, beta=0, model=var.model, nmax=20), newdata=xy, nsim=1)\$sim1

# promote to SP class object
gridded(sim) = ~x+y

new.names <- c('sill = 0.5', 'sill = 1', 'sill = 2', 'sill = 4')
p3 <- spplot(sim, names.attr=new.names, col.regions=topo.colors(100), as.table=TRUE, main="Demonstration of Sill Parameter")