Assumption:
you are familiar with basics of R, data import, general plots, some analysis. However, no need to now ggplot priory
This require “ggplot2” package. Please install first if not done yet.
The load library
library(ggplot2)
Here I have used to different data sets, first from Sharma et. al. (2016), Do composition and richness of woody plants vary between gaps and closed canopy patches in subtropical forests?, Journal of Vegetation Science, 10.1111/jvs.12445. URL: http://onlinelibrary.wiley.com/doi/10.1111/jvs.12445/full
Please download the data from here: https://drive.google.com/open?id=0BzXwqrXOWFTtaGZwN0VicWZsbTA
dt.df<- read.csv("...../myData.csv", header=T)
head(dt.df)
## plot.no site sites.code habitat habitat.code woody.richness
## 1 S1 Simaldhap 1 Canopy 0 17
## 2 S2 Simaldhap 1 Gap 1 23
## 3 S3 Simaldhap 1 Canopy 0 28
## 4 S4 Simaldhap 1 Gap 1 24
## 5 S5 Simaldhap 1 Canopy 0 24
## 6 S6 Simaldhap 1 Canopy 0 16
## tree.richness Herb.cover
## 1 11 35
## 2 16 60
## 3 18 40
## 4 16 80
## 5 17 35
## 6 11 58
Check the data by calling
str(dt.df)
## 'data.frame': 128 obs. of 8 variables:
## $ plot.no : Factor w/ 128 levels "K1","K10","K11",..: 65 76 87 98 109 120 126 127 128 66 ...
## $ site : Factor w/ 2 levels "Kasara","Simaldhap": 2 2 2 2 2 2 2 2 2 2 ...
## $ sites.code : int 1 1 1 1 1 1 1 1 1 1 ...
## $ habitat : Factor w/ 2 levels "Canopy","Gap": 1 2 1 2 1 1 2 2 2 1 ...
## $ habitat.code : int 0 1 0 1 0 0 1 1 1 0 ...
## $ woody.richness: int 17 23 28 24 24 16 24 17 31 17 ...
## $ tree.richness : int 11 16 18 16 17 11 16 11 21 11 ...
## $ Herb.cover : int 35 60 40 80 35 58 55 50 22 43 ...
dim(dt.df)
## [1] 128 8
How ggplot is structured?
- ggplot syntax works in different layers
- like in GIS
- each layer is a component of a final plot
- we can add one by one or all at once
- first define basic part and save with a name (e.g. gp1)
- then add required layers
- it has more control and no control compare to base plot
- the “+” sign must be at the end of line, not at beginnig.
Structure of syntax
- lets diagnose the ggplot syntax
gp1<- ggplot(data=dt.df, aes(x=site, y=Herb.cover))
- here, there basic plot is saved in gp1, it will not displayed until called
gp1
- The plot is blank, there is no geometry added
- lets add another layer
Update previous fiture
- here we add the geometry point. The blank parenthesis means X and Y axis data be same as defined above.
gp1.p<- gp1+geom_point()
gp1.p
Boxplot
- Normal boxplot
gp1.bn<- gp1+ geom_boxplot()
gp1.bn
- Clustered boxplot
gp1.bc <- gp1 + geom_boxplot(aes(col=habitat))
gp1.bc
Barplot
- Count bar
gp1.br <- ggplot(data=dt.df, aes(x= woody.richness )) + geom_bar() # should have only one variable
gp1.br
Histogram plot
gp1.h <- ggplot(data=dt.df, aes(x= tree.richness ))+ geom_histogram( bins=15) # Bins should be change for better illustration
gp1.h
Scatter plot
- It is simple X Y scatter plot
gp1.sc<- ggplot(dt.df, aes(tree.richness, Herb.cover))+ geom_point()
gp1.sc
Scattered plot with Regression line
- We can fit the regression model in ggplot
gp1.scl <- gp1.sc + geom_smooth(method= lm) #lm = linear model
gp1.scl
- with out confidence interval
gp1.scl <- gp1.sc + geom_smooth(method= lm, se=F)
gp1.scl
Smoothing line
- Just simple smoothing line without regression
gp1.scs <- ggplot(dt.df, aes(tree.richness, Herb.cover)) +
geom_point(shape=4)+ # shape to change the point types
geom_smooth()
gp1.scs
## `geom_smooth()` using method = 'loess'
Path or Line plot
- It is point connected line
- Better example will be monthly temperature or precipitation data
- download the data from here URL: https://drive.google.com/open?id=0BzXwqrXOWFTtVEhGdkd6T2FUeHc
t.df<-read.csv("....../tmean.csv", header=T)
t.df[1:4, 1:3]
## month X1977 X1978
## 1 Jan 10.3 10.3
## 2 Feb 11.8 8.0
## 3 Mar 15.4 10.8
## 4 Apr 18.7 16.9
t.df$month<- factor(t.df$month, levels = unique(as.character(t.df$month))) #This will prevent the alphabetic sorting of month names in plot
- ggplot works best when data is the long format (as below) rather than the wide format (as above)
- Let’s transform the data first
library(tidyr) # to transform the data from wide to long format
df1 <- gather(t.df, year, temp, X1977, X1978, X1979) # only few colums are demonstrated
# check data
df1[c(1,2,3,11,12,13,14, 23,24,25,26), 1: 7]
## month X1980 X1981 X1982 X1983 X1984 X1985
## 1 Jan 8.8 8.1 7.9 7.1 5.5 10.3
## 2 Feb 10.6 13.2 7.4 10.5 11.3 11.8
## 3 Mar 13.4 13.8 10.0 13.1 16.9 15.4
## 11 Nov 10.6 13.3 13.3 13.9 13.9 13.6
## 12 Dec 5.6 10.2 9.2 11.4 12.0 12.8
## 13 Jan 8.8 8.1 7.9 7.1 5.5 10.3
## 14 Feb 10.6 13.2 7.4 10.5 11.3 11.8
## 23 Nov 10.6 13.3 13.3 13.9 13.9 13.6
## 24 Dec 5.6 10.2 9.2 11.4 12.0 12.8
## 25 Jan 8.8 8.1 7.9 7.1 5.5 10.3
## 26 Feb 10.6 13.2 7.4 10.5 11.3 11.8
- Let’s do some housekeep before ggplot for the data
- Remove the ‘X’ from Year column, it should be done for each year
df1$year[df1$year=="X1977"]<- 1977
df1$year[df1$year=="X1978"]<- 1978
df1$year[df1$year=="X1979"]<- 1979
- Let’s make ggplot for Montly Mean Temperature
ggplot(df1, aes(x = month, y = temp, color = year)) +
geom_point(aes(shape = year)) +
geom_line(aes(linetype = year, group = year)) +
labs(x="Month",y ="Mean Temperature")+theme(legend.positio ="right")+
scale_fill_manual(labels = c("1977", "1978","1979"),
breaks = c("1977","1978","1979"), values =c("red","green","blue"))+
scale_linetype_manual(values = c("1977" = 1, "1978" = 1, "1979" = 2)) +
scale_shape_manual(values = c("1977" = 16, "1978" = 17, "1979" = 18))
Multiple plot (panel plot)
- Run the command from CookBook to create a fucntion. then run the function to make panel plot.
# The code is from CookBook
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
- Now lets make a panel plot of ggplot using above function
multiplot(gp1.sc, gp1.scl, gp1.scs, cols=3) # cols = defines number of columns in panel plot
## `geom_smooth()` using method = 'loess'
Draw polygon around clusters
- Lets try to draw polygons around clusters of data. The cluster is third dimension in the graph
- Please download data from here. URL: https://drive.google.com/open?id=16WdEgsJPR8qAtmBrPDJoeCVrSkvYAQKoKk4sioL9IoM
a.df<- read.csv("......./rhodendron.csv", header=T)
head(a.df)
## X species long lat bio09 bio17
## 1 1 lepidotum 87.96667 27.70000 -13 10
## 2 2 lepidotum 87.96667 27.70000 -13 10
## 3 3 lepidotum 86.58333 27.66667 2 14
## 4 4 lepidotum 86.58333 27.66667 2 14
## 5 5 lepidotum 86.58333 27.66667 2 14
## 6 6 lepidotum 86.58333 27.66667 2 14
library(ggalt)
library(dplyr)
library(plyr)
p.func<- function(a.df) a.df[chull (a.df$bio09/10, a.df$bio17), ] # creating a function
a.poly<- ddply(a.df, "species", p.func)
ggplot(a.df, aes(bio09/10, bio17, col=species))+ geom_point()+
labs(x="Temperature", y="Precipitation")+
geom_polygon(data=a.poly, fill=NA)+ theme_bw() # rough polygon
ggplot(a.df, aes(bio09/10, bio17, col=species))+ geom_point()+
labs(x="Temperature", y="Precipitation")+
stat_ellipse()+ theme_bw() # smooth ellipse
No comments:
Post a Comment