Visualization in R

R Workshop 2024 in UTokyo

SUN Yufei(Adrian)

UTokyo & Tsinghua University

Course Homepage

https://adriansun.drhuyue.site/course/r-workshop-tokyo-2024.html

Email: sunyf20@mails.tsinghua.edu.cn

Visualization Engines in R

  • Build-In: Base R Graphics

  • lattice: Trellis graphics, with an emphasis on multivariate data

  • ggplot2: based on The Grammar of Graphics

Scatter Plot

RQ: Do people of different age groups have different views on income distribution?(WVS7 Q106)

1 Incomes should be made more equal There should be greater incentives for individual effort 10

plot(wvs$incomeLevel, wvs$education)

Why does this graph only show a bunch of small circles?

# png("scatterBasic.png")

plot(
  wvs$incomeLevel, wvs$education,
  main = "Family Income and Education",
  xlab = "Family Income",
  ylab = "Education Level"
)

# dev.off()
  1. Enable device: Set save path and format.
  2. Draw the plot
  3. Close the device
xyplot(education ~ incomeLevel, data = wvs)

xyplot(
  education ~ incomeLevel,
  group = female,
  type = c("p", "g", "smooth"),
  main = "Family Income on Education",
  xlab = "Income",
  ylab = "Education",
  data = wvs,
  auto.key = TRUE
)

xyplot(
  education ~ incomeLevel | religious,
  group = female,
  type = c("p", "g", "smooth"),
  main = "Family Income on Education",
  xlab = "Income",
  ylab = "Education",
  data = wvs,
  auto.key = TRUE
)

Tip

Please create a scatter plot of education level versus income, grouped by country

xyplot(
  education ~ incomeLevel | country,
  layout=c(5,3),
  type = c("p", "g"),
  main = "Family In come on Education",
  xlab = "Income",
  ylab = "Education",
  data = wvs,
  auto.key = TRUE
)

ggplot2: Grammar of Graphics

figure -> individual components: data, aesthetics, geometry, statistics

ggplot(data = wvs,
       aes(x = incomeLevel, y = education, color = religious))

ggplot(data = wvs,
       aes(x = incomeLevel, y = education, color = religious)) +
    geom_point(size = 2, alpha = 0.3) +
    geom_smooth(aes(color = NULL), se=TRUE) + 
    labs(title = "Economy on Education" ,
         subtitle = "Scatter plot + density distribution",
         x = "Family Income", y = "Education") +
  theme_minimal()

Universal formula for plotting with ggplot

# Load the ggplot2 package
library(ggplot2)

# General ggplot template
ggplot(data = <your_data>, aes(x = <x_variable>, y = <y_variable>, fill = <fill_variable>, color = <color_variable>)) +
  <geom_function>(aes(group = <group_variable>), 
                  size = <size_value>, 
                  shape = <shape_value>, 
                  alpha = <alpha_value>) +
  labs(title = "<Your Plot Title>", 
       subtitle = "<Subtitle>", 
       x = "<X-axis Label>", 
       y = "<Y-axis Label>", 
       fill = "<Legend Title for Fill>", 
       color = "<Legend Title for Color>") +
  theme_minimal() +  # Use theme of your choice
  theme(plot.title = element_text(hjust = 0.5),  # Center the title
        legend.position = "<legend_position>") +  # Legend position options: "none", "left", "right", "bottom", "top"
  scale_color_manual(values = <custom_color_palette>) +  # Custom color palette (optional)
  scale_fill_manual(values = <custom_fill_palette>)  # Custom fill palette (optional)
  • ggplot() initializes the plot.
  • aes() defines the aesthetic mappings (like x, y, color, and fill).
  • geom_*() specifies the type of plot (scatter, bar, line, etc.).
  • labs() adds titles and labels.
  • theme() customizes the plot’s appearance.
  • scale_*() modifies color and fill scales.

Bar Plot

# Create a contingency table of income level and education
income_education_counts <- table(wvs$incomeLevel, wvs$education)

# Calculate proportions within each income level
income_education_props <- prop.table(income_education_counts, 1)

# Use barplot() to draw the bar chart with proportions
barplot(
  t(income_education_props),  # Transpose to have education levels as bars
  main = "Proportion of Education Levels by Income Level",
  xlab = "Income Level",
  ylab = "Proportion",
  col = rainbow(ncol(income_education_props)),
  legend = colnames(income_education_props),
  beside = TRUE
)

# Load the lattice package
library(lattice)

# Calculate proportions for each income level
prop_data <- as.data.frame(prop.table(table(wvs$incomeLevel, wvs$education), 1))

# Use barchart() to draw the bar chart with proportions
barchart(
  Freq ~ Var2 | Var1,
  data = prop_data,
  main = "Proportion of Education Levels by Income Level",
  xlab = "Education Level",
  ylab = "Proportion",
  col = "lightgreen",
  horizontal = FALSE,
  scales = list(x = list(rot = 45))
)

# Load the ggplot2 package
library(ggplot2)
library(dplyr)

# Calculate proportions within each income level
prop_data <- wvs %>%
  group_by(incomeLevel, education) %>%
  summarise(count = n()) %>%
  mutate(proportion = count / sum(count))

# Use ggplot2 to draw the bar chart with proportions
ggplot(data = prop_data, aes(x = incomeLevel, y = proportion, fill = factor(education))) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(
    title = "Proportion of Education Levels by Income Level",
    x = "Income Level",
    y = "Proportion",
    fill = "Education Level"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),
    legend.position = "bottom",
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  scale_fill_brewer(palette = "Set3")