这有点hacky,但我认为这样的事情会做到这一点。不幸的是,您似乎无法将抖动高度分配给美学,但您也许可以找到另一种方法使高度取决于矩形的高度。
df = tribble(
~id, ~agegroup, ~ethnicity,
#--|--|----
"a", "20s", "African Descent",
"b", "30s", "White",
"c", "50s", "White",
"d", "40s", "Hispanic",
"e", "20s", "White",
"f", "30s", "Hispanic",
"g", "20s", "Hispanic",
"h", "30s", "White",
"i", "20s", "African Descent",
"j", "30s", "White",
"k", "50s", "White",
"l", "20s", "White",
"m", "30s", "Hispanic",
"n", "20s", "Hispanic",
"o", "30s", "White",
)
df_2 <- df %>%
count(agegroup, ethnicity) %>%
group_by(agegroup ) %>%
mutate(top_rect = cumsum(n),
bottom_rect = lag(top_rect, default = 0))
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
width = .3,
height = .5) +
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))
更新
现在有标签
df_2_uncounted <- df_2 %>%
ungroup() %>%
uncount(n)%>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()) %>%
left_join(df %>%
arrange(agegroup, ethnicity) %>%
group_by(agegroup, ethnicity) %>%
mutate(id2 = 1:n()),
by = c("agegroup", "ethnicity", "id2"))
ggplot(df_2) +
geom_rect( aes(xmin = as.numeric(as.factor(agegroup)) - .45,
xmax= as.numeric(as.factor(agegroup)) + .45,
ymin = bottom_rect,
ymax = top_rect,
fill = ethnicity )) +
geom_jitter(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2),
position = position_jitter(seed = 1, height =0.5)) +
geom_text(data = df_2_uncounted,
aes(x = as.numeric(as.factor(agegroup)),
y = (bottom_rect + top_rect)/2,
label = id),
position = position_jitter(seed = 1, height =0.5))+
scale_x_continuous(breaks = unique(as.numeric(as.factor(df_2$agegroup))),
labels = levels(as.factor(df_2$agegroup))) +
labs(
x = "Age Group",
y = paste0("Population (total = ", df_2_uncounted %>% nrow(), ")"))