X Tutup
#' --- #' title: "Introduction to Raster Package" #' --- #' #' #'
#' [Open presentation in a new tab](presentations/PS_06_raster.html){target="_blank"} #'
#'
#' #' _Click on presentation and then use the space bar to advance to the next slide or escape key to show an overview._ #'
#'
#'
#' #' #' ## Download #' #' | [
R Script](`r output_nocomment`) | [
Commented R Script](`r output`) | [
Rmd Script](`r fullinput`)| #' |:--:|:-:|:-:| #' #' ## Libraries #' ## ----message=F,warning=FALSE, results='hide'----------------------------- library(dplyr) library(tidyr) library(sp) library(ggplot2) library(rgeos) library(maptools) # load data for this course # devtools::install_github("adammwilson/DataScienceData") library(DataScienceData) # New libraries library(raster) library(rasterVis) #visualization library for raster #' #' # Raster Package #' #' ## `getData()` #' #' Raster package includes access to some useful (vector and raster) datasets with `getData()`: #' #' * Elevation (SRTM 90m resolution raster) #' * World Climate (Tmin, Tmax, Precip, BioClim rasters) #' * Countries from CIA factsheet (vector!) #' * Global Administrative boundaries (vector!) #' #' `getData()` steps for GADM: #' #' 1. _Select Dataset_: ‘GADM’ returns the global administrative boundaries. #' 2. _Select country_: Country name of the boundaries using its ISO A3 country code #' 3. _Specify level_: Level of of administrative subdivision (0=country, 1=first level subdivision). #' #' ## GADM: Global Administrative Areas #' Administrative areas in this database are countries and lower level subdivisions. #' #' alt text #' #' Divided by country (see website for full dataset). Explore country list: ## ------------------------------------------------------------------------ getData("ISO3")%>% as.data.frame%>% filter(NAME=="South Africa") #' #' Download data for South Africa ## ---- eval=F------------------------------------------------------------- ## za=getData('GADM', country='ZAF', level=1) #' #' Or use the version in the DataScienceData ## ------------------------------------------------------------------------ data(southAfrica) za=southAfrica # rename for convenience #' #' ## ---- eval=F------------------------------------------------------------- ## plot(za) #' Danger: `plot()` works, but can be slow for complex polygons. If you want to speed it up, you can plot a simplified version as follows: #' #' ## ------------------------------------------------------------------------ za %>% gSimplify(0.01) %>% plot() #' #' #' ### Check out attribute table #' ## ------------------------------------------------------------------------ za@data #' #' #' Plot a subsetted region: ## ------------------------------------------------------------------------ subset(za,NAME_1=="Western Cape") %>% gSimplify(0.01) %>% plot() #' #' #'
#' ## Your turn #' #' Use the method above to download and plot the boundaries for a country of your choice. #' #' #'
#' #'
#'
#' #' #' #' #' # Raster Data #' #' ## Raster introduction #' #' Spatial data structure dividing region ('grid') into rectangles (’cells’ or ’pixels’) storing one or more values each. #' #' Some examples from the [Raster vignette](http://cran.r-project.org/web/packages/raster/vignettes/Raster.pdf) by Robert J. Hijmans. #' #' * `rasterLayer`: 1 band #' * `rasterStack`: Multiple Bands #' * `rasterBrick`: Multiple Bands of _same_ thing. #' #' ## ---- echo=TRUE---------------------------------------------------------- x <- raster() x #' ## ------------------------------------------------------------------------ str(x) #' #' #' ## ---- echo=T, results=T-------------------------------------------------- x <- raster(ncol=36, nrow=18, xmn=-1000, xmx=1000, ymn=-100, ymx=900) res(x) res(x) <- 100 res(x) ncol(x) #' #' #' ## ------------------------------------------------------------------------ # change the numer of columns (affects resolution) ncol(x) <- 18 ncol(x) res(x) #' #' ## Raster data storage #' ## ------------------------------------------------------------------------ r <- raster(ncol=10, nrow=10) ncell(r) #' But it is an empty raster ## ------------------------------------------------------------------------ hasValues(r) #' #' #' #' Use `values()` function: ## ------------------------------------------------------------------------ values(r) <- 1:ncell(r) hasValues(r) values(r)[1:10] #' #' #'
#' ## Your turn #' #' Create and then plot a new raster with: #' #' 1. 100 rows #' 2. 50 columns #' 3. Fill it with random values (`rnorm()`) #' #' #'
#' #'
#'
#' #' #' #' #' Raster memory usage #' ## ------------------------------------------------------------------------ inMemory(r) #' > You can change the memory options using the `maxmemory` option in `rasterOptions()` #' #' ## Raster Plotting #' #' Plotting is easy (but slow) with `plot`. #' ## ------------------------------------------------------------------------ plot(r, main='Raster with 100 cells') #' #' #' #' ### ggplot and rasterVis #' #' rasterVis package has `gplot()` for plotting raster data in the `ggplot()` framework. #' ## ------------------------------------------------------------------------ gplot(r,maxpixels=50000)+ geom_raster(aes(fill=value)) #' #' #' Adjust `maxpixels` for faster plotting of large datasets. #' ## ------------------------------------------------------------------------ gplot(r,maxpixels=10)+ geom_raster(aes(fill=value)) #' #' #' #' Can use all the `ggplot` color ramps, etc. #' ## ------------------------------------------------------------------------ gplot(r)+geom_raster(aes(fill=value))+ scale_fill_distiller(palette="OrRd") #' #' #' ## Spatial Projections #' #' Raster package uses standard [coordinate reference system (CRS)](http://www.spatialreference.org). #' #' For example, see the projection format for the [_standard_ WGS84](http://www.spatialreference.org/ref/epsg/4326/). ## ------------------------------------------------------------------------ projection(r) #' #' ## Warping rasters #' #' Use `projectRaster()` to _warp_ to a different projection. #' #' `method=` `ngb` (for categorical) or `bilinear` (continuous) #' ## ------------------------------------------------------------------------ r2=projectRaster(r,crs="+proj=sinu +lon_0=0",method = "ngb") par(mfrow=c(1,2));plot(r);plot(r2) #' #' #' # WorldClim #' #' ## Overview of WorldClim #' #' Mean monthly climate and derived variables interpolated from weather stations on a 30 arc-second (~1km) grid. #' See [worldclim.org](http://www.worldclim.org/methods) #' #' #' #' ## Bioclim variables #' #' #' #' Variable Description #' - - #' BIO1 Annual Mean Temperature #' BIO2 Mean Diurnal Range (Mean of monthly (max temp – min temp)) #' BIO3 Isothermality (BIO2/BIO7) (* 100) #' BIO4 Temperature Seasonality (standard deviation *100) #' BIO5 Max Temperature of Warmest Month #' BIO6 Min Temperature of Coldest Month #' BIO7 Temperature Annual Range (BIO5-BIO6) #' BIO8 Mean Temperature of Wettest Quarter #' BIO9 Mean Temperature of Driest Quarter #' BIO10 Mean Temperature of Warmest Quarter #' BIO11 Mean Temperature of Coldest Quarter #' BIO12 Annual Precipitation #' BIO13 Precipitation of Wettest Month #' BIO14 Precipitation of Driest Month #' BIO15 Precipitation Seasonality (Coefficient of Variation) #' BIO16 Precipitation of Wettest Quarter #' BIO17 Precipitation of Driest Quarter #' BIO18 Precipitation of Warmest Quarter #' BIO19 Precipitation of Coldest Quarter #' #' #' #' #' ## Download climate data #' #' Download the data: #' ## ---- eval=F------------------------------------------------------------- ## clim=raster::getData('worldclim', var='bio', res=10) #' #' `res` is resolution (0.5, 2.5, 5, and 10 minutes of a degree) #' #' Instead of downloading the full dataset, we'll use the copy in the `DataScienceData` package: #' ## ------------------------------------------------------------------------ data(worldclim) #rename for convenience clim=worldclim #' #' #' ### Gain and Offset #' ## ------------------------------------------------------------------------ clim #' #' Note the min/max of the raster. What are the units? Always check metadata, the [WorldClim temperature dataset](http://www.worldclim.org/formats) has a `gain` of 0.1, meaning that it must be multipled by 0.1 to convert back to degrees Celsius. We'll set the temperature variables (see table above) to 0.1 and leave the others at 1. #' ## ------------------------------------------------------------------------ gain(clim)=c(rep(0.1,11),rep(1,7)) #' #' #' #' ### Plot with `plot()` #' ## ------------------------------------------------------------------------ plot(clim) #' #' #' #' ## Faceting in ggplot #' #' Or use `rasterVis` methods with gplot ## ------------------------------------------------------------------------ gplot(clim[[13:19]])+geom_raster(aes(fill=value))+ facet_wrap(~variable)+ scale_fill_gradientn(colours=c("brown","red","yellow","darkgreen","green"),trans="log10")+ coord_equal() #' #' #' #' Let's dig a little deeper into the data object: #' ## ------------------------------------------------------------------------ ## is it held in RAM? inMemory(clim) ## How big is it? object.size(clim) ## can we work with it directly in RAM? canProcessInMemory(clim) #' #' #' ## Subsetting and spatial cropping #' #' Use `[[1:3]]` to select raster layers from raster stack. #' ## ------------------------------------------------------------------------ ## crop to a latitude/longitude box r1 <- raster::crop(clim[[1]], extent(10,35,-35,-20)) ## Crop using a Spatial polygon r1 <- raster::crop(clim[[1]], bbox(za)) #' #' #' ## ------------------------------------------------------------------------ r1 plot(r1) #' #' ## Spatial aggregation ## ------------------------------------------------------------------------ ## aggregate using a function aggregate(r1, 3, fun=mean) %>% plot() #' #'
#' ## Your turn #' Create a new raster by aggregating to the minimum (`min`) value of `r1` within a 10 pixel window #' #' #'
#' #'
#'
#' #' #' #' ## Focal ("moving window") ## ------------------------------------------------------------------------ ## apply a function over a moving window focal(r1, w=matrix(1,3,3), fun=mean) %>% plot() #' #' #' ## ------------------------------------------------------------------------ ## apply a function over a moving window rf_min <- focal(r1, w=matrix(1,11,11), fun=min) rf_max <- focal(r1, w=matrix(1,11,11), fun=max) rf_range=rf_max-rf_min ## or do it all at once range2=function(x,na.rm=F) { max(x,na.rm)-min(x,na.rm) } rf_range2 <- focal(r1, w=matrix(1,11,11), fun=range2) plot(rf_range) plot(rf_range2) #' #' #'
#' ## Your turn #' #' Plot the focal standard deviation of `r1` over a 3x3 window. #' #' #'
#' #'
#'
#' #' #' #' #' ## Raster calculations #' #' the `raster` package has many options for _raster algebra_, including `+`, `-`, `*`, `/`, logical operators such as `>`, `>=`, `<`, `==`, `!` and functions such as `abs`, `round`, `ceiling`, `floor`, `trunc`, `sqrt`, `log`, `log10`, `exp`, `cos`, `sin`, `max`, `min`, `range`, `prod`, `sum`, `any`, `all`. #' #' So, for example, you can ## ------------------------------------------------------------------------ cellStats(r1,range) ## add 10 s = r1 + 10 cellStats(s,range) #' #' #' ## ------------------------------------------------------------------------ ## take the square root s = sqrt(r1) cellStats(s,range) # round values r = round(r1) cellStats(r,range) # find cells with values less than 15 degrees C r = r1 < 15 plot(r) #' #' #' #' ### Apply algebraic functions ## ------------------------------------------------------------------------ # multiply s times r and add 5 s = s * r1 + 5 cellStats(s,range) #' #' ## Extracting Raster Data #' #' * points #' * lines #' * polygons #' * extent (rectangle) #' * cell numbers #' #' Extract all intersecting values OR apply a summarizing function with `fun`. #' #' #' ### Point data #' #' `sampleRandom()` generates random points and automatically extracts the raster values for those points. Also check out `?sampleStratified` and `sampleRegular()`. #' #' Generate 100 random points and the associated climate variables at those points. ## ------------------------------------------------------------------------ ## define a new dataset of points to play with pts=sampleRandom(clim,100,xy=T,sp=T) plot(pts);axis(1);axis(2) #' #' ### Extract data using a `SpatialPoints` object #' Often you will have some locations (points) for which you want data from a raster* object. You can use the `extract` function here with the `pts` object (we'll pretend it's a new point dataset for which you want climate variables). ## ------------------------------------------------------------------------ pts_data=raster::extract(clim[[1:4]],pts,df=T) head(pts_data) #' > Use `package::function` to avoid confusion with similar functions. #' #' #' ### Plot the global dataset with the random points ## ------------------------------------------------------------------------ gplot(clim[[1]])+ geom_raster(aes(fill=value))+ geom_point( data=as.data.frame(pts), aes(x=x,y=y),col="red")+ coord_equal() #' #' ### Summarize climate data at point locations #' Use `gather()` to reshape the climate data for easy plotting with ggplot. #' ## ----warning=F----------------------------------------------------------- d2=pts_data%>% gather(ID) colnames(d2)[1]="cell" head(d2) #' #' And plot density plots (like histograms). ## ------------------------------------------------------------------------ ggplot(d2,aes(x=value))+ geom_density()+ facet_wrap(~cell,scales="free") #' #' #' ### Lines #' #' Extract values along a transect. ## ------------------------------------------------------------------------ transect = SpatialLinesDataFrame( SpatialLines(list(Lines(list(Line( rbind(c(19, -33.5),c(26, -33.5)))), ID = "ZAF"))), data.frame(Z = c("transect"), row.names = c("ZAF"))) # OR transect=SpatialLinesDataFrame( readWKT("LINESTRING(19 -33.5,26 -33.5)"), data.frame(Z = c("transect"))) gplot(r1)+geom_tile(aes(fill=value))+ geom_line(aes(x=long,y=lat),data=fortify(transect),col="red") #' #' #' #' ### Plot Transect #' ## ------------------------------------------------------------------------ trans=raster::extract(x=clim[[12:14]], y=transect, along=T, cellnumbers=T)%>% data.frame() head(trans) #' #' #### Add other metadata and reshape ## ------------------------------------------------------------------------ trans[,c("lon","lat")]=coordinates(clim)[trans$cell,] trans$order=as.integer(rownames(trans)) head(trans) #' ## ------------------------------------------------------------------------ transl=group_by(trans,lon,lat)%>% gather(variable, value, -lon, -lat, -cell, -order) head(transl) #' ## ------------------------------------------------------------------------ ggplot(transl,aes(x=lon,y=value, colour=variable, group=variable, order=order))+ geom_line() #' #' #' #' ### _Zonal_ statistics #' Calculate mean annual temperature averaged by province (polygons). #' ## ------------------------------------------------------------------------ rsp=raster::extract(x=r1, y=gSimplify(za,0.01), fun=mean, sp=T) #spplot(rsp,zcol="bio1") #' ## ------------------------------------------------------------------------ ## add the ID to the dataframe itself for easier indexing in the map rsp$id=as.numeric(rownames(rsp@data)) ## create fortified version for plotting with ggplot() frsp=fortify(rsp,region="id") ggplot(rsp@data, aes(map_id = id, fill=bio1)) + expand_limits(x = frsp$long, y = frsp$lat)+ scale_fill_gradientn( colours = c("grey","goldenrod","darkgreen","green"))+ coord_map()+ geom_map(map = frsp) #' #' > For more details about plotting spatialPolygons, see [here](https://github.com/hadley/ggplot2/wiki/plotting-polygon-shapefiles) #' #' ## Example Workflow #' #' #' 1. Download the Maximum Temperature dataset using `getData()` #' 2. Set the gain to 0.1 (to convert to degrees Celcius) #' 2. Crop it to the country you downloaded (or ZA?) #' 2. Calculate the overall range for each variable with `cellStats()` #' 3. Calculate the focal median with an 11x11 window with `focal()` #' 4. Create a transect across the region and extract the temperature data. #' ## ------------------------------------------------------------------------ country=getData('GADM', country='TUN', level=1)%>%gSimplify(0.01) tmax=getData('worldclim', var='tmax', res=10) gain(tmax)=0.1 names(tmax) #' #' Default layer names can be problematic/undesirable. ## ------------------------------------------------------------------------ sort(names(tmax)) ## Options month.name month.abb sprintf("%02d",1:12) sprintf("%04d",1:12) #' See `?sprintf` for details #' ## ------------------------------------------------------------------------ names(tmax)=sprintf("%02d",1:12) tmax_crop=crop(tmax,country) tmaxave_crop=mean(tmax_crop) # calculate mean annual maximum temperature tmaxavefocal_crop=focal(tmaxave_crop, fun=median, w=matrix(1,11,11)) #' #' > Only a few datasets are available usig `getData()` in the raster package, but you can download almost any file on the web with `file.download()`. #' #' Report quantiles for each layer in a raster* object ## ------------------------------------------------------------------------ cellStats(tmax_crop,"quantile") #' #' #' ## Create a Transect (SpatialLinesDataFrame) ## ------------------------------------------------------------------------ transect=SpatialLinesDataFrame( readWKT("LINESTRING(8 36,10 36)"), data.frame(Z = c("T1"))) #' #' #' ## Plot the timeseries of climate data ## ----fig.width=20,fig.height=20------------------------------------------ gplot(tmax_crop)+ geom_tile(aes(fill=value))+ scale_fill_gradientn( colours=c("brown","red","yellow","darkgreen","green"), name="Temp")+ facet_wrap(~variable)+ ## now add country overlays geom_path(data=fortify(country), mapping=aes(x=long,y=lat, group=group, order=order))+ # now add transect line geom_line(aes(x=long,y=lat), data=fortify(transect),col="red",size=3)+ coord_map() #' #' #' ## Extract and clean up the transect data ## ------------------------------------------------------------------------ trans=raster::extract(tmax_crop, transect, along=T, cellnumbers=T)%>% as.data.frame() trans[,c("lon","lat")]=coordinates(tmax_crop)[trans$cell] trans$order=as.integer(rownames(trans)) head(trans) #' #' Reformat to 'long' format. ## ------------------------------------------------------------------------ transl=group_by(trans,lon,lat)%>% gather(variable, value, -lon, -lat, -cell, -order)%>% separate(variable,into = c("X","month"),1)%>% mutate(month=as.numeric(month),monthname=factor(month.name[month],ordered=T,levels=month.name)) head(transl) #' #' ## Plot the transect data ## ------------------------------------------------------------------------ ggplot(transl, aes(x=lon,y=value, colour=month, group=month, order=order))+ ylab("Maximum Temp")+ scale_color_gradientn( colors=c("blue","green","red"), name="Month")+ geom_line() #' #' Or the same data in a levelplot: ## ------------------------------------------------------------------------ ggplot(transl, aes(x=lon,y=monthname, fill=value))+ ylab("Month")+ scale_fill_distiller( palette="PuBuGn", name="Tmax")+ geom_raster() #' #' #' ## Raster Processing #' #' Things to consider: #' #' * RAM limitations #' * Disk space and temporary files #' * Use of external programs (e.g. GDAL) #' * Use of external GIS viewer (e.g. QGIS)
X Tutup