# A tibble: 5 x 2
Term Genes
<chr> <chr>
1 phosphorylation ACVRL1,FGF2,MAP2K1
2 vasculature development CXCR4,RECK,EFNB2,PNPLA6,THY1,CAV1,CDH2,CDH5,ECSCR,HB~
3 blood vessel development CXCR4,RECK,PNPLA6,THY1,CAV1,CDH2,CDH5,ECSCR,HBEGF,AM~
4 cell adhesion THY1,CDH2,CDH5
5 plasma membrane CXCR4,RECK,EFNB2,PNPLA6,THY1,CAV1,CDH2,CDH5,ECSCR,HB~
R语言如何绘制富集弦图
前言
本篇是R包GOplot绘制富集弦图的教程,GOplot包的版本是V1.0.2,安装方式为 devtools::install_github(“wencke/wencke.github.io”),不是V1.0.2的版本会报错。
什么是富集弦图
富集弦图:分别展示出每个Term富集到哪些基因以及这些基因的上下调情况。
左侧为差异基因,颜色变化表示logFC从小到大的变化,红色为上调,蓝色为下调;右侧为富集结果的Term,两者相连表示基因富集到该Term中。
富集弦图一般显示3-10个通路较为合适,太多通路,会导致太过密集,反而看不清。
绘图前的数据准备
需要2个文件
每个通路对应哪些基因
包含2列数据,第1列为通路名称,第2列为每个通路对应的基因,基因之间用”,“或”, “或”|“分隔开。
demo数据可以从这下载:https://www.r2omics.cn/res/demodata/enrichChord/data.txt
每个基因对应的logFC(可选)
第一列为基因名称,第二列为logFC。注意基因要和第一个文件对应。空着的话,基因的方格会显示黑色。
demo数据可以从这下载:https://www.r2omics.cn/res/demodata/enrichChord/gene2FC.txt
# A tibble: 14 x 2
Gene logFC
<chr> <dbl>
1 CXCR4 -0.653
2 RECK 0.371
3 EFNB2 2.65
4 PNPLA6 0.870
5 THY1 -2.56
6 CAV1 3.69
7 CDH2 -1.00
8 CDH5 0.891
9 ECSCR -0.554
10 HBEGF -0.957
11 AMOT 1.28
12 ACVRL1 -0.515
13 FGF2 -1.48
14 MAP2K1 -1.12
R语言如何绘制富集弦图
需要加载的程序
注意:GOplot包默认的效果感觉不是很好,字体重叠,且不圆,黑边太粗,图例只能在下面,所以我把源码改了改。运行正式的程序之前先把下面的代码加载了。适用于GOplot包的版本是V1.0.2,安装方式为 devtools::install_github(“wencke/wencke.github.io”),不是V1.0.2的版本会报错。
# 代码来源:https://www.r2omics.cn/
# 运行正式的程序之前先把下面的代码加载了
library(GOplot)
<- theme(axis.line = element_blank(), axis.text.x = element_blank(),
theme_blank axis.text.y = element_blank(), axis.ticks = element_blank(), axis.title.x = element_blank(),
axis.title.y = element_blank(), panel.background = element_blank(), panel.border = element_blank(),
panel.grid.major = element_blank(), panel.grid.minor = element_blank(), plot.background = element_blank())
<- function(mat, limit){
check_chord
if(all(colSums(mat) >= limit[2]) & all(rowSums(mat) >= limit[1])) return(mat)
<- mat[(rowSums(mat) >= limit[1]),]
tmp <- tmp[,(colSums(tmp) >= limit[2])]
mat
<- check_chord(mat, limit)
mat return(mat)
}
# Bezier function for drawing ribbons
<- function(data, process.col){
bezier <- c()
x <- c()
y <- c()
Id <- seq(0, 1, by = 0.01)
sequ <- dim(data)[1]
N <- seq(1, N, by = 2)
sN if (process.col[1] == '') col_rain <- grDevices::rainbow(N) else col_rain <- process.col
for (n in sN){
<- c(); xval2 <- c(); yval <- c(); yval2 <- c()
xval for (t in sequ){
<- (1 - t) * (1 - t) * data$x.start[n] + t * t * data$x.end[n]
xva <- c(xval, xva)
xval <- (1 - t) * (1 - t) * data$x.start[n + 1] + t * t * data$x.end[n + 1]
xva2 <- c(xval2, xva2)
xval2 <- (1 - t) * (1 - t) * data$y.start[n] + t * t * data$y.end[n]
yva <- c(yval, yva)
yval <- (1 - t) * (1 - t) * data$y.start[n + 1] + t * t * data$y.end[n + 1]
yva2 <- c(yval2, yva2)
yval2
}<- c(x, xval, rev(xval2))
x <- c(y, yval, rev(yval2))
y <- c(Id, rep(n, 2 * length(sequ)))
Id
}<- data.frame(lx = x, ly = y, ID = Id)
df return(df)
}
<-function(chord, process, metric, clust, clust.by, nlfc, lfc.col, lfc.min, lfc.max, lfc.space, lfc.width, term.col, term.space, term.width,process.label){
GOCluster<- y <- xend <- yend <- width <- space <- logFC <- NULL
x if (missing(metric)) metric<-'euclidean'
if (missing(clust)) clust<-'average'
if (missing(clust.by)) clust.by<-'term'
if (missing(nlfc)) nlfc <- 0
if (missing(lfc.col)) lfc.col<-c('firebrick1','white','dodgerblue')
if (missing(lfc.min)) lfc.min <- -3
if (missing(lfc.max)) lfc.max <- 3
if (missing(process.label)) process.label <- 11
if (missing(lfc.space)) lfc.space <- (-0.5) else lfc.space <- lfc.space*(-1)
if (missing(lfc.width)) lfc.width <- (-1.6) else lfc.width <- lfc.space-lfc.width-0.1
if (missing(term.col)) term.col <- brewer.pal(length(process), 'Set3')
if (missing(term.space)) term.space <- lfc.space + lfc.width else term.space <- term.space*(-1)
if (missing(term.width)) term.width <- lfc.width + term.space else term.width <- term.width*(-1)+term.space
if (clust.by == 'logFC') distance <- stats::dist(chord[,dim(chord)[2]], method=metric)
if (clust.by == 'term') distance <- stats::dist(chord, method=metric)
<- stats::hclust(distance ,method=clust)
cluster <- dendro_data(cluster)
dendr <- range(dendr$segments$y)
y_range <- data.frame(x=dendr$label$x, label=as.character(dendr$label$label))
x_pos <- as.data.frame(chord)
chord $label <- as.character(rownames(chord))
chord<- merge(x_pos, chord, by='label')
all # print(all)
$label <- as.character(all$label)
allif (nlfc != 0){
<- ncol(all)-2-nlfc
num <- seq(lfc.space, lfc.width, length = num)
tmp <- data.frame(x=numeric(),width=numeric(),space=numeric(),logFC=numeric())
lfc for (l in 1:nlfc){
<- data.frame(x = all$x, width=tmp[l+1], space = tmp[l], logFC = all[,ncol(all)-nlfc+l])
tmp_df <-rbind(lfc,tmp_df)
lfc
}else{
}<- all[,c(2, dim(all)[2])]
lfc $space <- lfc.space
lfc$width <- lfc.width
lfc
}<- all[,c(2:(length(process)+2))]
term # print(dim(term)[1])
<-NULL;termx<-NULL;tspace<-NULL;twidth<-NULL
colorfor (row in 1:dim(term)[1]){
<- which(term[row,-1] != 0)
idx if(length(idx) != 0){
<-c(termx,rep(term[row,1],length(idx)))
termx<-c(color,term.col[idx])
color<-seq(term.space,term.width,length=length(idx)+1)
tmp<-c(tspace,tmp[1:(length(tmp)-1)])
tspace<-c(twidth,tmp[2:length(tmp)])
twidth
}
}<- sapply(lfc$logFC, function(x) ifelse(x > lfc.max, lfc.max, x))
tmp <- sapply(tmp, function(x) ifelse(x < lfc.min, lfc.min, x))
logFC $logFC <- logFC
lfc<- data.frame(x = termx, width = twidth, space = tspace, col = color)
term_rect # width_x = (max(all$x)-min(all$x))/nrow(all)
# print(width_x)
<- data.frame(x = 1:length(process),label = process)
legend
ggplot()+
geom_segment(data=segment(dendr), aes(x=x, y=y, xend=xend, yend=yend))+
geom_rect(data=lfc,aes(xmin=x-0.5,xmax=x+0.5,ymin=width,ymax=space,fill=logFC))+
scale_fill_gradient2('logFC', space = 'Lab', low=lfc.col[3],mid=lfc.col[2],high=lfc.col[1],
guide=guide_colorbar(title.position='top',title.hjust=0.5,barwidth=1,barheight = 5,label.hjust = 1,label.vjust = 0.5,direction="vertical"),
# breaks = c(lfc.min,lfc.max),labels = c(lfc.min,lfc.max)
breaks=c(min(lfc$logFC),max(lfc$logFC)),labels=c(round(min(lfc$logFC),1),round(max(lfc$logFC),1))
+
)geom_rect(data=term_rect,aes(xmin=x-0.5,xmax=x+0.5,ymin=width,ymax=space),fill=term_rect$col)+
# geom_rect(data=term_rect,aes(xmin=x-width_x-1,xmax=x+width_x+1,ymin=width,ymax=space),fill=term_rect$col)+
geom_point(data=legend,aes(x=x,y=0.1,size=factor(label,levels=label),shape=NA))+
guides(size=guide_legend("Terms",ncol=1,byrow=T,override.aes=list(shape=22,fill=term.col,size = 8),title.position="top",title.hjust = 0.5,order = 1))+
coord_polar()+
scale_y_reverse()+
theme(legend.position='right',legend.text = element_text(size = process.label),legend.background = element_rect(fill='transparent'),legend.box='horizontal',legend.direction='horizontal')+
theme_blank
}# GOCluster_(chord = chord,process=process,clust.by = "logFC")
<-function(data, title, space, gene.order, gene.size, gene.space, nlfc = 1, lfc.col, lfc.min, lfc.max, ribbon.col, border.size, process.label, limit, scale.title,colorBar=T) {
GOChord<- id <- xpro <- ypro <- xgen <- ygen <- lx <- ly <- ID <- logFC <- NULL
y <- dim(data)[2]
Ncol if (missing(title)) title <- ""
if (missing(scale.title)) scale.title <- "logFC"
if (missing(space)) space = 0
if (missing(gene.order)) gene.order <- "none"
if (missing(gene.size)) gene.size <- 3
if (missing(gene.space)) gene.space <- 0.2
if (missing(lfc.col)) lfc.col <- c("brown1", "azure", "cornflowerblue")
if (missing(lfc.min)) lfc.min <- -3
if (missing(lfc.max)) lfc.max <- 3
if (missing(border.size)) border.size <- 0.5
if (missing(process.label)) process.label <- 11
if (missing(limit)) limit <- c(0, 0)
if (gene.order == "logFC") data <- data[order(data[, Ncol], decreasing = T), ]
if (gene.order == "alphabetical") data <- data[order(rownames(data)), ]
if (sum(!is.na(match(colnames(data), "logFC"))) > 0) {
if (nlfc == 1) {
<- check_chord(data[, 1 : (Ncol - 1)], limit)
cdata <- sapply(rownames(cdata), function(x) data[match(x, rownames(data)), Ncol])
lfc else {
} <- check_chord(data[, 1 : (Ncol - nlfc)], limit)
cdata <- sapply(rownames(cdata), function(x) data[, (Ncol - nlfc + 1)])
lfc
}else {
} <- check_chord(data, limit)
cdata <- 0
lfc
}if (missing(ribbon.col)) colRib <- grDevices::rainbow(dim(cdata)[2]) else colRib <- ribbon.col
<- colSums(cdata)
nrib <- rowSums(cdata)
ngen <- dim(cdata)[2]
Ncol <- dim(cdata)[1]
Nrow <- c()
colRibb for (b in 1 : length(nrib)) colRibb <- c(colRibb, rep(colRib[b], 202 * nrib[b]))
<- 1
r1 <- r1 + 0.1
r2 <- c()
xmax <- 0
x for (r in 1 : length(nrib)) {
<- nrib[r] / sum(nrib)
perc <- c(xmax, (pi * perc) - space)
xmax if (length(x) <= Ncol - 1) x <- c(x, x[r] + pi * perc)
}<- c()
xp <- c()
yp <- 50
l for (s in 1 : Ncol) {
<- seq(x[s], x[s] + xmax[s], length = l)
xh <- c(xp, r1 * sin(x[s]), r1 * sin(xh), r1 * sin(x[s] + xmax[s]), r2 * sin(x[s] + xmax[s]), r2 * sin(rev(xh)), r2 * sin(x[s]))
xp <- c(yp, r1 * cos(x[s]), r1 * cos(xh), r1 * cos(x[s] + xmax[s]), r2 * cos(x[s] + xmax[s]), r2 * cos(rev(xh)), r2 * cos(x[s]))
yp
}<- data.frame(x = xp, y = yp, id = rep(c(1 : Ncol), each = 4 + 2 * l))
df_process <- c()
xp <- c()
yp <- NULL
logs <- seq(0 - space, -pi - (-pi / Nrow) - space, length = Nrow)
x2 <- rep(-pi / Nrow + space, length = Nrow)
xmax2 for (s in 1 : Nrow) {
<- seq(x2[s], x2[s] + xmax2[s], length = l)
xh if (nlfc <= 1) {
<- c(xp, (r1 + 0.05) * sin(x2[s]), (r1 + 0.05) * sin(xh), (r1 + 0.05) * sin(x2[s] + xmax2[s]), r2 * sin(x2[s] + xmax2[s]), r2 * sin(rev(xh)), r2 * sin(x2[s]))
xp <- c(yp, (r1 + 0.05) * cos(x2[s]), (r1 + 0.05) * cos(xh), (r1 + 0.05) * cos(x2[s] + xmax2[s]), r2 * cos(x2[s] + xmax2[s]), r2 * cos(rev(xh)), r2 * cos(x2[s]))
yp else {
} <- seq(r1, r2, length = nlfc + 1)
tmp for (t in 1 : nlfc) {
<- c(logs, data[s, (dim(data)[2] + 1 - t)])
logs <- c(xp, (tmp[t]) * sin(x2[s]), (tmp[t]) * sin(xh), (tmp[t]) * sin(x2[s] + xmax2[s]), tmp[t + 1] * sin(x2[s] + xmax2[s]), tmp[t + 1] * sin(rev(xh)), tmp[t + 1] * sin(x2[s]))
xp <- c(yp, (tmp[t]) * cos(x2[s]), (tmp[t]) * cos(xh), (tmp[t]) * cos(x2[s] + xmax2[s]), tmp[t + 1] * cos(x2[s] + xmax2[s]), tmp[t + 1] * cos(rev(xh)), tmp[t + 1] * cos(x2[s]))
yp
}
}
}if (lfc[1] != 0) {
if (nlfc == 1) {
<- data.frame(x = xp, y = yp, id = rep(c(1 : Nrow), each = 4 + 2 * l), logFC = rep(lfc, each = 4 + 2 * l))
df_genes else {
} <- data.frame(x = xp, y = yp, id = rep(c(1 : (nlfc * Nrow)), each = 4 + 2 * l), logFC = rep(logs, each = 4 + 2 * l))
df_genes
}else {
} <- data.frame(x = xp, y = yp, id = rep(c(1 : Nrow), each = 4 + 2 * l))
df_genes
}<- seq(0, 180, length = length(x2))
aseq <- c()
angle for (o in aseq) if ((o + 270) <= 360) angle <- c(angle, o + 270) else angle <- c(angle, o - 90)
<- data.frame(xgen = (r1 + gene.space) * sin(x2 + xmax2 / 2), ygen = (r1 + gene.space) * cos(x2 + xmax2 / 2), labels = rownames(cdata), angle = angle)
df_texg <- data.frame(xpro = (r1 + 0.15) * sin(x + xmax / 2), ypro = (r1 + 0.15) * cos(x + xmax / 2), labels = colnames(cdata), stringsAsFactors = FALSE)
df_texp <- rep(colRib, each = 4 + 2 * l)
cols <- c()
x.end <- c()
y.end <- c()
processID for (gs in 1 : length(x2)) {
<- seq(x2[gs], x2[gs] + xmax2[gs], length = ngen[gs] + 1)
val <- which((cdata[gs, ] != 0) == T)
pros for (v in 1 : (length(val) - 1)) {
<- c(x.end, sin(val[v]), sin(val[v + 1]))
x.end <- c(y.end, cos(val[v]), cos(val[v + 1]))
y.end <- c(processID, rep(pros[v], 2))
processID
}
}<- data.frame(x.end = x.end, y.end = y.end, processID = processID)
df_bezier <- df_bezier[order(df_bezier$processID, -df_bezier$y.end), ]
df_bezier <- c()
x.start <- c()
y.start for (rs in 1 : length(x)) {
<- seq(x[rs], x[rs] + xmax[rs], length = nrib[rs] + 1)
val for (v in 1 : (length(val) - 1)) {
<- c(x.start, sin(val[v]), sin(val[v + 1]))
x.start <- c(y.start, cos(val[v]), cos(val[v + 1]))
y.start
}
}$x.start <- x.start
df_bezier$y.start <- y.start
df_bezier<- bezier(df_bezier, colRib)
df_path if (length(df_genes$logFC) != 0) {
<- sapply(df_genes$logFC, function(x) ifelse(x > lfc.max, lfc.max, x))
tmp <- sapply(tmp, function(x) ifelse(x < lfc.min, lfc.min, x))
logFC $logFC <- logFC
df_genes
}<- ggplot() +
g geom_polygon(data = df_process, aes(x, y, group = id), fill = "gray70", inherit.aes = F, color = "black",size = border.size) +
geom_polygon(data = df_process, aes(x, y, group = id), fill = cols, inherit.aes = F, alpha = 0.6, color = "black",size = border.size) +
geom_point(aes(x = xpro, y = ypro, size = factor(labels, levels = labels), shape = NA), data = df_texp) +
guides(size = guide_legend("Terms",ncol = 1, byrow = T, override.aes = list(shape = 22, fill = unique(cols), size = process.label/2),title.position = "top",title.hjust = 0,order = 1)) +
theme(legend.text = element_text(size = process.label)) +
geom_text(aes(xgen, ygen, label = labels, angle = angle), data = df_texg, size = gene.size) +
geom_polygon(aes(x = lx, y = ly, group = ID), data = df_path, fill = colRibb, color = "black", size = border.size, inherit.aes = F) +
labs(title = title) + coord_fixed()+
theme_blankif (colorBar) {
+ geom_polygon(data = df_genes, aes(x, y, group = id, fill = logFC), inherit.aes = F, color = "black",size = border.size) +
g scale_fill_gradient2(scale.title, space = "Lab", low = lfc.col[3], mid = lfc.col[2], high = lfc.col[1],
guide = guide_colorbar(title.position = "top",title.hjust=0.5,barwidth=process.label/10,barheight = process.label/2,label.hjust = 1,label.vjust = 0.5,direction="vertical"),
# breaks = c(lfc.min,lfc.max),labels = c(lfc.min,lfc.max)
breaks = c(min(df_genes$logFC), max(df_genes$logFC)), labels = c(round(min(df_genes$logFC),1), round(max(df_genes$logFC),1))
+
) theme(legend.position = "right",legend.title = element_text(size = process.label*4/3),legend.text = element_text(size = process.label), legend.background = element_rect(fill = "white"), legend.direction = "horizontal") # ,legend.box = "horizontal"
else {
}
+ geom_polygon(data = df_genes, aes(x, y, group = id), fill = "skyblue", inherit.aes = F, color = "black",size = border.size) #+
g #theme(legend.position = "bottom",legend.text = element_text(size = process.label), legend.background = element_rect(fill = "white"), legend.box = "horizontal", legend.direction = "horizontal")
}
}
正式程序
# 代码来源:https://www.r2omics.cn/
# 转载所需要的包
library(tidyverse) # 用于数据处理
library(GOplot) # 用于绘图 安装方式 devtools::install_github("wencke/wencke.github.io")
library(RColorBrewer) # 颜色
# 读取数据
= read.delim("https://www.r2omics.cn/res/demodata/enrichChord/data.txt")
dfTerm = read.delim("https://www.r2omics.cn/res/demodata/enrichChord/gene2FC.txt")
dfFC
# 整理数据
= dfTerm %>%
dfClean separate_rows(Genes,sep = ",") %>%
left_join(dfFC,by=c("Genes"="Gene"))
= chord_dat(data.frame(dfClean), process=unique(dfClean$Term))
dfPlot
# 绘图
GOChord(dfPlot,
border.size=0.1, # 线条黑边的粗细
space = 0.02, # 基因方格之间的空隙
gene.order = "logFC", # 基因的排序方式,可以按照"logFC", "alphabetical", "none",
gene.space = 0.3, # 基因标签离图案的距离
gene.size = 2, # 基因标签的大小
lfc.col=c('firebrick3', 'white','royalblue3'), # logFC图例的颜色
ribbon.col=brewer.pal(ncol(dfPlot)-1, "Set2"), # 条带颜色设置
process.label = 8 # 图例标签的大小
)