lattice
包可以很方便地作出 trellis 图。
library(lattice)
首先看一个简单的例子,考虑这个包自带的纽约合唱团歌手身高数据,
str(singer)
## 'data.frame': 235 obs. of 2 variables:
## $ height : num 64 62 66 65 60 61 65 66 65 63 ...
## $ voice.part: Factor w/ 8 levels "Bass 2","Bass 1",..: 8 8 8 8 8 8 8 8 8 8 ...
可以用下列代码展示不同声部的歌手的身高分布,
histogram(~height | voice.part, data = singer,
main = "Distribution of Heights by Voice Pitch",
xlab = "Height (inches)")
其中 voice.part
被称为 conditioning variable,而 height
为 dependent variable。
在 trellis 图中,conditioning variable 的每个 level 都会创建一个独立的 panel。如果有多个 conditioning variable,则 (factor) level 的每个组合都会有一个 panel。每个 panel 会有一个标签,被称之为 strip。用户可以控制
lattice
可以作出
~x | A
:dot plots, kernel density plots, histograms, bar charts, box plotsy~x | A
:scatter plots, strip plots, parallel box plotsz~x*y | A
:3D plots, scatter plot matrices下面用 mtcars
数据集进行详细说明,
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
attach(mtcars)
gear = factor(gear, levels = c(3, 4, 5),
labels = c("3 gears", "4 gears", "5 gears"))
cyl = factor(cyl, levels = c(4, 6, 8),
labels = c("4 cylinders", "6 cylinders", "8 cylinders"))
densityplot(~mpg, main = "Density Plot", xlab = "Miles Per Gallon")
densityplot(~mpg | cyl, main = "Density Plot by Numbers of Cylinders", xlab = "Miles Per Gallon")
bwplot(cyl ~ mpg | gear, main = "Box Plots by Cylinders and Gears",
xlab = "Miles Per Gallon", ylab = "Cylinders")
xyplot(mpg ~ wt | cyl * gear, main = "Scatter Plots by Cylinders and Gears",
xlab = "Car Weight", ylab = "Miles Per Gallon")
cloud(mpg ~ wt * qsec | cyl, main = "3D Scatter Plots by Cylinders")
dotplot(cyl ~ mpg | gear,
main = "Dot Plots by Number of Gears and Cylinders", # gear x cyl!!
xlab = "Miles Per Gallon")
splom(mtcars[c(1, 3, 4, 5, 6)], main = "Scatter Plot Matrix for mtcars Data")
detach(mtcars)
与基本作图系统不同的是,lattice
可以将图保存为一个变量,而且可以后续更改,
mygraph = densityplot(~height | voice.part, data = singer)
update(mygraph, col = "red", pch = 16, cex = 0.8, jitter = 0.05, lwd = 2)
一般地,conditioning variables 是 factors。对于连续变量,可以通过 cut()
函数将其转换为离散变量。不过 lattice
将连续变量转换成所谓的 shingle
结构。具体地,连续变量被划分成若干个(可能)重叠的区域。比如,
displacement = equal.count(mtcars$disp, number = 3, overlap = 0)
会将连续变量 mtcars$disp
划分成 3
个区间,有 proportion=0
的重叠部分,每个区间观测值的个数相等,然后可以将其作为 conditioning variable 进行作图,
xyplot(mpg ~ wt | displacement, data = mtcars,
main = "Miles Per Gallon vs. Weight by Engine Displacement",
xlab = "Weight", ylab = "Miles Per Gallon",
layout = c(3, 1), # 3 columns
aspect = 1.5 # height / width
)
其中 strip 的深色部分则表示连续变量作为 conditioning variable 的取值范围。
注意 proportion 是相对于每个区间的比例,而非整个长度的比例,所以确定每个区间观测值个数的公式为
\[ \frac{N}{n(1-\text{overlap}) + \text{overlap}} \]
具体代码为
co.intervals
## function (x, number = 6, overlap = 0.5)
## {
## x <- sort(x[!is.na(x)])
## n <- length(x)
## r <- n/(number * (1 - overlap) + overlap)
## ii <- 0:(number - 1) * (1 - overlap) * r
## x1 <- x[round(1 + ii)]
## xr <- x[round(r + ii)]
## keep <- c(TRUE, diff(x1) > 0 | diff(xr) > 0)
## j.gt.0 <- 0 < (jump <- diff(x))
## eps <- 0.5 * if (any(j.gt.0))
## min(jump[j.gt.0])
## else 0
## cbind(x1[keep] - eps, xr[keep] + eps)
## }
## <bytecode: 0x564f95b5a490>
## <environment: namespace:graphics>
对于每个 high-level 画图函数,graph_function
,其默认的 panel 函数即为 panel.graph_function
。比如 xyplot
的 panel 函数为 panel.xyplot
。
我们可以定义自己的 panel 函数使得每个 panel 可以加入更多的东西,比如回归直线。
# custom panel function
mypanel = function(x, y) {
panel.xyplot(x, y, pch = 19)
panel.rug(x, y)
panel.grid(h = -1, v = -1) # use negative numbers forces the grid to line up with the axis labels
panel.lmline(x, y, col = "red", lwd = 1, lty = 2)
}
xyplot(mpg ~ wt | displacement, data = mtcars,
main = "Miles Per Gallon vs. Weight by Engine Displacement",
xlab = "Weight", ylab = "Miles Per Gallon",
layout = c(3, 1), # 3 columns
aspect = 1.5, # height / width
panel = mypanel
)
考虑另一个例子,
mtcars$transmission = factor(mtcars$am, levels = c(0, 1), labels = c("Automatic", "Manual"))
panel.smoother = function(x, y) {
panel.grid(h=-1, v=-1)
panel.xyplot(x, y)
panel.loess(x, y)
panel.abline(h = mean(y), lwd = 2, lty = 2, col = "green")
}
xyplot(mpg ~ disp | transmission, data = mtcars,
scales = list(cex = .8, col = "red"), # scale annotations, or separately list(x=list(), y=list())
panel = panel.smoother,
xlab = "Displacement",
ylab = "Miles per Gallon",
main = "MGP vs Displacement by Transmission Type",
sub = "Dotted lines are Group Means", aspect = 1)
如果不想分开展示数据,而是叠在一起,则可以用 group variable。
densityplot(~mpg, data = mtcars,
group = transmission,
main = "MPG Distribution by Transimission Type",
xlab = "Miles per Gallon",
auto.key = T,
#auto.key = list(space = "right", columns = 1, title = "Transmission")
)
图例可以进一步自定义,
colors = c("red", "blue")
lines = c(1, 2)
points = c(16, 17)
key.trans = list(title = "Transmission",
space = "bottom", columns = 2,
text = list(levels(mtcars$transmission)),
points = list(pch = points, col = colors),
lines = list(col = colors, lty = lines),
cex.title = 1, cex = .9)
densityplot(~mpg, data = mtcars,
group = transmission,
main = "MPG Distribution by Transimission Type",
xlab = "Miles per Gallon",
pch = points, lty = lines, col = colors,
lwd = 2, jitter = 0.005,
key = key.trans
)
下面考虑同时使用 conditioning variable 和 grouping variable。数据集 CO2
描述了 12 种植物在 7 种二氧化碳含量的环境 conc
中对二氧化碳的吸收率 uptake
,其中 6 种植物来自 Quebec,6 种来自 Mississippi,即 Type
变量,而每个地区各有三种在不同的 Treatment
(chilled/nonchilled) 进行研究,
str(CO2)
## Classes 'nfnGroupedData', 'nfGroupedData', 'groupedData' and 'data.frame': 84 obs. of 5 variables:
## $ Plant : Ord.factor w/ 12 levels "Qn1"<"Qn2"<"Qn3"<..: 1 1 1 1 1 1 1 2 2 2 ...
## $ Type : Factor w/ 2 levels "Quebec","Mississippi": 1 1 1 1 1 1 1 1 1 1 ...
## $ Treatment: Factor w/ 2 levels "nonchilled","chilled": 1 1 1 1 1 1 1 1 1 1 ...
## $ conc : num 95 175 250 350 500 675 1000 95 175 250 ...
## $ uptake : num 16 30.4 34.8 37.2 35.3 39.2 39.7 13.6 27.3 37.1 ...
## - attr(*, "formula")=Class 'formula' language uptake ~ conc | Plant
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "outer")=Class 'formula' language ~Treatment * Type
## .. ..- attr(*, ".Environment")=<environment: R_EmptyEnv>
## - attr(*, "labels")=List of 2
## ..$ x: chr "Ambient carbon dioxide concentration"
## ..$ y: chr "CO2 uptake rate"
## - attr(*, "units")=List of 2
## ..$ x: chr "(uL/L)"
## ..$ y: chr "(umol/m^2 s)"
colors = "darkgreen"
symbols = c(1:12)
linetype = c(1:3)
key.species = list(title = "Plant",
space = "right",
text = list(levels(CO2$Plant)),
points = list(pch = symbols, col = colors))
xyplot(uptake ~ conc | Type * Treatment, data = CO2,
group = Plant,
type = "o",
pch = symbols, col = colors, lty = linetype,
main = "Carbon Dioxide Uptake \nin Grass Plants",
ylab = expression(paste("Uptake ",
bgroup("(", italic(frac("umol", "m"^2)), ")"))), # bgroup?
xlab = expression(paste("Concentration ",
bgroup("(", italic(frac(mL, L)), ")"))),
sub = "Grass Species: Echinochloa crus-galli",
key = key.species)
类似基本作图的 par()
,lattice
也有类似的设置 graphic parameters 的命令,
show.settings()
mysettings = trellis.par.get()
mysettings$superpose.symbol
## $alpha
## [1] 1 1 1 1 1 1 1
##
## $cex
## [1] 0.8 0.8 0.8 0.8 0.8 0.8 0.8
##
## $col
## [1] "#0080ff" "#ff00ff" "darkgreen" "#ff0000" "orange" "#00ff00"
## [7] "brown"
##
## $fill
## [1] "#CCFFFF" "#FFCCFF" "#CCFFCC" "#FFE5CC" "#CCE6FF" "#FFFFCC" "#FFCCCC"
##
## $font
## [1] 1 1 1 1 1 1 1
##
## $pch
## [1] 1 1 1 1 1 1 1
默认时 pch
均为 1,将其改成对应的序号,
mysettings$superpose.symbol$pch = c(1:10)
trellis.par.set(mysettings)
show.settings()
同上,par
在 lattice
不起作用,也就无法设置 mfrow
或者 mfcol
,但这可以通过 split
进行实现,
split = c(placement row, placement column, total number of rows, total number of columns)
比如
graph1 = histogram(~height|voice.part, data = singer,
main = "Heights of Choral Singers by Voice Part")
graph2 = densityplot(~height, data = singer, group = voice.part,
plot.points = F, auto.key = list(columns = 4))
plot(graph1, split=c(1, 1, 1, 2))
plot(graph2, split=c(1, 2, 1, 2), newpage=FALSE)
另外也可以通过指定 position = c(xmin, ymin, xmax, ymax)
,取值为 0 到 1 直接的比例数,原点在左下角,如
plot(graph1, position = c(0, 0.3, 1, 1))
plot(graph2, position = c(0, 0, 1, 0.3), newpage = FALSE)
一幅 lattice 图中 panel 的顺序可以通过 index.cond =
进行指定,比如将“1”声部放在一行,而“2”声部放在另一行,
levels(singer$voice.part)
## [1] "Bass 2" "Bass 1" "Tenor 2" "Tenor 1" "Alto 2" "Alto 1"
## [7] "Soprano 2" "Soprano 1"
update(graph1, index.cond = list(c(2, 4, 6, 8, 1, 3, 5, 7)))
Tricks on including external R scripts: https://stackoverflow.com/questions/52397430/include-code-from-an-external-r-script-run-in-display-both-code-and-output, and also note that imgs
and rmds
are on the same folder level, so the read.csv()
with relative path works well.
data = read.csv("../data/Ozone/ozone.csv", sep = "\t")
# calculate the overlap 4*0.4-3x=1 => x = 0.2
# the overlap in w.r.t. each interval instead of the whole interval
# prop = 0.2 / 0.4 = 0.5
Wind = equal.count(data$wind, number = 4, overlap = 0.5)
Temp = equal.count(data$temp, number = 4, overlap = 0.5)
mypanel = function(x, y) {
panel.xyplot(x, y)
panel.grid()
panel.loess(x, y)
}
xyplot(I(ozone^(1/3)) ~ radiation | Temp * Wind, data = data,
panel = mypanel,
xlab = "Solar Radiation (langleys)",
ylab = "Cube Root Ozone (cube root ppb)")
coplot(I(ozone^(1/3)) ~ radiation | temperature * wind, data = data,
number = 4, overlap = 0.5)
Copyright © 2016-2021 weiya