A Diary of Charts
Some code sketches for Base R plotting
My love for the base R plotting system has been growing since I started dealing with complicated charts. Complexity in these charts can arise for many reasons. For one, it might simply result from the data structure when working with complex statistical models (e.g., multilevel models). Conventions in the workplace could be another source of complexity. For instance, tweaking a chart in R to replicate the one originally created in Excel could be extremely difficult, and it is nearly impossible to do so with higher-level plotting packages such as ggplot2
. To approach the look of a chart created outside of R, life would be much easier if lower-level frameworks such as the base R plotting system were utilized.
Below is just a cumulation of charts I’ve created. It is intended to help me search and locate the code for plotting certain features in base R. The remainder of the post is divided into sections by charts, each of which consists of (1) the code, (2) the output chart, and (3) a list of features presented in the chart.
Scatter plots
Features
- Text on plot margins:
mtext()
#### Data ####
set.seed(2023)
subj_idx = 1:30
gender = rep(1:2, each=15)
heights = c(160, 175)[gender] + rnorm(30, sd=15)
#### Plot ####
ylim = c( min(heights)-5, max(heights)+5 )
plot( 1, type="n", xlim=c(.5,30.5), ylim=ylim,
xlab="Subject Index", ylab="Height (cm)" )
points( 1:15, heights[ 1:15], col=2, pch=19 )
points( 16:30, heights[16:30], col=4, pch=19 )
abline( v=15.5, col="grey", lty="dashed" )
mtext( c("Girls","Boys"), at=c(7,23), col=c(2,4), padj=-.5 )
Bar Charts
Features
- custom axis:
axis()
- axis label rotation (horizontal):
las = 1
- axis label padding adjustment:
hadj
,padj
- number of digits:
spintf("%.2f", yseq)
Code & Plot
yseq = seq(0, 1, .25)
plot(1, type="n", xaxt='n', yaxt='n',
ylab = "Probability", xlab="",
xlim=c(-.7, 1.7), ylim=c(0, 1) )
lines( c(0, 0), c(0, 0.25), lwd=12, col=2 )
lines( c(1, 1), c(0, 0.75), lwd=12, col=2 )
axis( 2, at=yseq, labels=sprintf("%.2f", yseq), las=1, hadj = .85 )
axis( 1, at=0:1, padj = .5,
labels = c("0\n(tail)", "1\n(head)"),
)
Code & Plot
set.seed(100)
get_pois = function(lambda) {
x = rpois(1e5, lambda)
y = table(x)
x = names(y) |> as.integer()
y = as.vector(y / sum(y))
return(
list(x=x, y=y)
)
}
lambdas = c(.8, 2.0, 3.0)
dat = lapply(lambdas, \(lambda) get_pois(lambda))
x.fct = c(-1, 0, 1) * .21
cols = stom::vals2cols(seq(2,12,length=3))
plot(1, type="n", xlim=range(dat[[3]]$x), ylim=range(dat[[1]]$y), las=1,
xlab = "Count", ylab="Probability")
for (j in seq(dat)) {
x = dat[[j]]$x
y = dat[[j]]$y
for (i in seq(x)) {
x_ = rep(x[i], 2) + x.fct[j]
y_ = c(0, y[i])
lines(x_, y_, col=cols[j], lwd=8)
}
}
legend(9.8, .45, paste0("λ = ", lambdas), col=cols, lwd=8 )
title(main = "Poisson Distributions")
Interaction Plots
Features
- Legend outside of plotting region:
par()
,legend()
,mar
,xpd
,inset
- Shaded region:
polygon()
- Custom axis (categorical axis):
axis()
- text / label:
text()
Code & Plot
library(stom)
library(dplyr)
#### Data ####
d = iris |>
group_by(Species) |>
summarise(
S.L = mean(Sepal.Length),
S.W = mean(Sepal.Width),
P.L = mean(Petal.Length),
P.W = mean(Petal.Width)
)
d
Species | S.L | S.W | P.L | P.W |
---|---|---|---|---|
setosa | 5.006 | 3.428 | 1.462 | 0.246 |
versicolor | 5.936 | 2.770 | 4.260 | 1.326 |
virginica | 6.588 | 2.974 | 5.552 | 2.026 |
#### Annotations ####
(LABELS = colnames(d)[-1])
[1] "S.L" "S.W" "P.L" "P.W"
#### Plot ####
# c( b, l, t, r )
par( mar=c(5.1, 4.1, 4.1, 8.1), xpd=F ) # Larger right margin for legend
plot( 1, type="n", xlim=c(.5, 4.5), ylim=c(0,7),
xlab="", ylab="Mean",
xaxt="n", yaxt="n") # disable x/y-axis
# Shaded region (coordinates are specified (counter-)clockwise)
polygon( c(0,4.9,4.9,0),c(2,2,5,5), col=col.alpha("grey",.15), lty=2, border="grey" )
# Auxiliary lines
for ( h in 0:7 )
abline( h=h, col=col.alpha("grey") )
# Lines & Points
for ( i in 1:nrow(d) ) {
lines ( 1:4, d[i,-1], col=i, lwd=4 )
points( 1:4, d[i,-1], col=i, lwd=4, pch=2+i )
}
# Labels
for ( i in 1:nrow(d) ) {
if ( i != 2 ) next
text( 1:4-.03, d[i,-1]-.45, labels = d[i,-1], cex=.9, col=i )
}
# Axis
axis( 1, at=1:4, tck=-.02, labels=LABELS )
axis( 2, at=0:7, labels=sprintf("%.1f", 0:7), las=1 ) # rotated y-axis labels
# Legend (outside of plotting region)
legend("right", legend=d$Species, inset=c(-.17,0), xpd=TRUE,
col=1:nrow(d), pch=1:nrow(d) + 2, lwd=4, cex=.9,
y.intersp=1.8, box.col="transparent", bg="transparent" )
Density Plots
Features
- Line segments:
segments()
- Density:
density()
- text / label:
text()
Code & Plot
library(stom)
library(dplyr)
#### Data ####
set.seed(2020)
d = data.frame(
x = rnorm(2000)
)
#### Annotations ####
X = d$x
AVG = mean(X)
SD = sd(X)
#### Plot ####
plot( density(X), ylim=c(0,.46),
main = "A Standard Normal", xlab = "X" )
# Mean line segment
segments( x0=AVG, y0=0, y1=.43, col=2, lwd=3 )
text( AVG, .46, labels = paste("AVG:",round(AVG,3)), col=2, cex=.8 )
# SD line segment
segments( x0=AVG+.06, x1=AVG+SD, y0=.02, lwd=2, col=4 )
segments( x0=AVG+.06, y0=.01, y1=.03, lwd=2, col=4 )
segments( x0=AVG+SD, y0=.01, y1=.03, lwd=2, col=4 )
text( .5*(AVG+SD+.06), .035,
labels = paste("SD =",round(SD,3)), col=4, cex=.8 )
Combining Plots
Features
- Plot panels:
par()
+mfrow()
Code & Plot
inner = c(4.1, 4.1, 1.8, .5)
par( mfrow=c(2,2), oma=c(0,0,0,0), mar=inner )
for ( i in 1:4 ) {
plot( 1, type="n", xlim=0:1, ylim=0:1, xlab="", ylab="" )
text( .5, .5, labels=paste("Plot",i), cex=2 )
}