在Seurat的輸出結果中,有一個展示表達量變化最大10個基因的圖表令人印象深刻,如下圖。關於該圖表的具體內容參考之前《單細胞轉錄組學習筆記之Seurat 3.0(一)》一文。那麼,常規散點圖能不能畫成這樣效果?
如果仍然使用針對單細胞數據高度定製的Seurat,顯然是非常麻煩的。而使用ggplot2繪製類似這樣的多標籤圖表,又容易出現「數據標籤重疊」、超出繪圖區域的標籤「顯示不全「和鄰近數據點」指示不明「三大難題。
於是ggrepel這個R包應用而生,它讓文字標籤周圍產生「力場」,相互排斥,相互靠近的點會添加「指引線」避免指示不明。
整個R包主要有兩個用法幾乎一樣的函數geom_text_repel()和geom_label_repel(),可以把它們視作ggplot2中geom_text()的加強版,很顯然,Seurat集成了這個R包。下面就仍以上圖的數據為例,一起看下具體如何使用ggrepel吧!
https://www.omicshare.com/forum/thread-6312-1-1.html#讀入數據:
var.df <- read.table("var.genes.xls",header = T,sep = "\t")
head(var.df,10)
#載入dplyr包;
library(dplyr)
#按照variance.standardized這一列數值降序排列;
var.genes<-arrange(var.df,desc(vst.variance.standardized))
head(var.genes,10)
#提取TopN基因集:
n <- 15
topn <- head(var.genes$gene,n)
topn
#追加標籤列;
var.genes$labels <- ""
var.genes$labels[1:n] <- topn
head(var.genes,10)
var.genes<-arrange(var.genes,vst.variance.standardized)
#載入所需的畫圖主力R包;
library(ggplot2)
library(ggrepel)
#常規的geom_text()函數的效果;
p1 <- ggplot(data = var.genes,aes(log10(vst.mean),vst.variance.standardized,
color=vst.variable,label=labels)) +
geom_point()
p1+geom_text(color="grey20",size=3)
set.seed(100)
p2 <- p1+geom_text_repel(color="grey20",size=3,point.padding = NA)
p2
p3 <- p1+geom_text_repel(force=20,color="grey20",size=3,point.padding = 0.5,hjust = 0.5,
arrow = arrow(length = unit(0.01, "npc"), type = "open", ends = "last"),
segment.color="grey20",segment.size=0.2,segment.alpha=0.8,nudge_y=1)
p3
p3 + scale_y_continuous(breaks = c(0, 3, 6, 9, 12),limits = c(0, 12)) +
xlab("Average expression") +
ylab("Standardized variance") +
scale_color_manual(values=c("#a2a9af", "#a8df65")) +
theme_classic()+
theme(plot.margin = unit(c(1,1,0.5,0.5),"cm"),
legend.position = "none",
axis.title = element_text(size = rel(1.2)),
axis.text = element_text(size=rel(1)),
axis.line = element_line(size = 0.5, colour = "grey30"))
p4 <- ggplot(data = var.genes,aes(log10(vst.mean),vst.variance.standardized,
color=vst.variable,label=labels)) +
geom_point() +
geom_text_repel(
data = subset(var.genes, log10(vst.mean) >= 0),
nudge_x = 3 - log10(subset(var.genes, log10(vst.mean) >= 0)$vst.mean),
color = "grey20",
point.padding = NA,
size = 3,
segment.size = 0.2,
segment.color = "grey50",
direction = "y",
hjust = 1
) +
geom_text_repel(
data = subset(var.genes, log10(vst.mean) < 0),
nudge_x = -3 - log10(subset(var.genes, log10(vst.mean) < 0)$vst.mean),
color = "grey20",
point.padding = NA,
size = 3,
segment.size = 0.2,
segment.color = "grey50",
direction = "y",
hjust = 0
) +
scale_x_continuous(
breaks = c(-3, -2, -1, 0, 1,2,3),
limits = c(-3.5, 3.5))
p4
p4 + scale_y_continuous(breaks = c(0, 3, 6, 9, 12),limits = c(0, 12)) +
xlab("Average expression") +
ylab("Standardized variance") +
scale_color_manual(values=c("#a2a9af", "#a8df65")) +
theme_classic()+
theme(plot.margin = unit(c(1,1,0.5,0.5),"cm"),
legend.position = "none",
axis.title = element_text(size = rel(1.2)),
axis.text = element_text(size=rel(1)),
axis.line = element_line(size = 0.5, colour = "grey30"))
p5 <- ggplot(data = var.genes,aes(log10(vst.mean),vst.variance.standardized,
color=vst.variable,label=labels)) +
geom_point() +
geom_label_repel(
data = subset(var.genes, log10(vst.mean) >= 0),
nudge_x = 3 - log10(subset(var.genes, log10(vst.mean) >= 0)$vst.mean),
color = "white",
alpha = 0.9,
point.padding = 0.5,
size = 3,
fill = "#96C93D",
segment.size = 0.5,
segment.color = "grey50",
direction = "y",
hjust = 1
) +
geom_label_repel(
data = subset(var.genes, log10(vst.mean) < 0),
nudge_x = -3 - log10(subset(var.genes, log10(vst.mean) < 0)$vst.mean),
color = "white",
alpha = 0.9,
point.padding = 0.5,
size = 3,
fill = "#9881F5",
segment.size = 0.5,
segment.color = "grey50",
direction = "y",
hjust = 0
) +
scale_x_continuous(
breaks = c(-3, -2, -1, 0, 1,2,3),
limits = c(-3.5, 3.5))
p5
p5 + scale_y_continuous(breaks = c(0, 3, 6, 9, 12),limits = c(0, 12)) +
xlab("Average expression") +
ylab("Standardized variance") +
scale_color_manual(values=c("#a2a9af", "#ff85cb")) +
theme_classic()+
theme(plot.margin = unit(c(1,1,0.5,0.5),"cm"),
legend.position = "none",
axis.title = element_text(size = rel(1.2)),
axis.text = element_text(size=rel(1)),
axis.line = element_line(size = 0.5, colour = "grey30"))