# 代码来源:https://www.r2omics.cn/
# Mfuzz包的安装方式为:BiocManager::install("Mfuzz")
library(Mfuzz)
library(tidyverse)
library(patchwork)
# 先自定一个函数,之后直接调用
= function(data,
mfuzzHeatmap clusterNum=6
){# data = df
# clusterNum=6
# 构建对象,填补缺失值,标准化等
<- data.matrix(data) # 数据框转换为矩阵
dm <- new("ExpressionSet",exprs = dm) # 构建对象
ESet <- filter.NA(ESet, thres=0.25) # 过滤缺失值超过“25%”的基因
ESet <- fill.NA(ESet,mode="knn") # knn算法填补缺失值
ESet <- filter.std(ESet,min.std=0,visu=F)# 根据标准差去除样本间差异太小的基因
ESet <- standardise(ESet) # 标准化
gene.s exprs(gene.s) # 查看处理后的数据
# 聚类
<- clusterNum # 设置聚类个数
c <- mestimate(gene.s) # 评估出最佳的m值
m set.seed(123) # 设置随机种子,防止每次聚类的结果都不一样,无法复现
<- mfuzz(gene.s, c = c, m = m)
cl # 查看每个基因聚到哪个类当中
cl $size # 查看每个cluster中的基因个数
cl$membership #查看基因和cluster之间的membership。如果两个基因对于一个特定的cluster都有高的membership score,那么他们通常来说表达模式是相似的
cl
# ggplot2绘图
# 绘制趋势分析
= exprs(gene.s) %>%data.frame()
dfMfuzz
dfMfuzz= cl$membership %>%
dfColor data.frame(check.names = F) %>%
::rownames_to_column("ID") %>%
tibblepivot_longer(-1,names_to = "cluster",values_to = "Membership")
# 计算color的范围,让多张图的图例保持相同
= dfColor %>%
colorLimit group_by(ID) %>%
summarise(max=max(Membership,na.rm = T)) %>%
ungroup()
= cl$cluster %>%
dfcluster data.frame() %>%
set_names("Cluster") %>%
::rownames_to_column("ID")
tibble
= dfcluster %>%
dfclusterSplit group_split(Cluster,.keep=T)
= imap(dfclusterSplit,function(dataItem,i){
mfuzzPlotList # dataItem = dfclusterSplit[[1]]
= dataItem$Cluster[[1]]
clusterName = dataItem %>% pull(ID)
clusterID = length(clusterID)
myN
= dfMfuzz[clusterID,] %>%
dfPlot ::rownames_to_column("ID") %>%
tibblepivot_longer(-1,names_to = "Sample",values_to = "Value") %>%
left_join(dfColor %>%
filter(cluster==clusterName)) %>%
arrange(Membership)
# dfPlot = dfPlot %>%
# filter(Membership>0.5)
= ggplot(dfPlot,aes(x=Sample,y=Value,group=factor(ID,levels = unique(ID)),color=Membership))+
p geom_line()+
scale_color_gradientn(colors =c("#5E4FA2","#3288BD","#66C2A5", # 设置渐变色
"#ABDDA4","#E6F598","#FFFFBF",
"#FEE08B", "#FDAE61","#F46D43",
"#D53E4F","#9E0142"),
breaks=seq(0,1,0.1),
limits=c(min(colorLimit$max,na.rm = T),max(colorLimit$max,na.rm = T))
+
)# scale_color_continuous(breaks=c(seq(0,1,0.1)),labels=c(seq(0,1,0.1)))+
theme_bw()+
labs(y=paste0("cluster ",clusterName,"\n","n=",myN),
x="")+
theme(legend.position = "top",
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
plot.margin = unit(c(0, 0.1, 0, 0), "inches")
+
)scale_x_discrete(expand = c(0.05,0.05))
pif(i==length(dfclusterSplit)){
else{
}= p+
p theme(axis.text.x = element_blank(), # 多张图是否只显示一个刻度
axis.ticks.x = element_blank(),
axis.title.x = element_blank())
}
p
})
# 绘制热图
= imap(dfclusterSplit,function(dataItem,i){
heatmapList # dataItem = dfclusterSplit[[1]]
= dataItem$Cluster[[1]]
clusterName = dataItem %>% pull(ID)
clusterID = length(clusterID)
myN
= t(data) %>%
dfMean.Zscore scale() %>%
t() %>%
data.frame()
= dfMean.Zscore[clusterID,] %>%
dfPlot ::rownames_to_column("ID") %>%
tibblepivot_longer(-1,names_to = "Sample",values_to = "Value")
dfPlot= ggplot(dfPlot,aes(x=Sample,y=ID,fill=Value))+
p geom_tile()+
scale_fill_gradient2(low="#0000ff", high="#ff0000", mid="#ffffff",
breaks=seq(-3,3,0.5),
limits=c(min(dfMean.Zscore,na.rm = T),max(dfMean.Zscore,na.rm = T))
+
)theme_bw()+
labs(y="",x="",fill="Expression")+
theme(legend.position = "top",
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = unit(c(0, 0, 0, 0), "inches")
+
)scale_x_discrete(expand = c(0,0))
if(i==length(dfclusterSplit)){
else{
}= p+
p theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),)
}
p
})
# 合并
= wrap_plots(c(mfuzzPlotList,heatmapList),byrow=F,ncol=2)+
p plot_layout(guides = 'collect') & theme(legend.position='top')
return(
list(p=p,
dfcluster=dfcluster)
) }
R语言做趋势分析+热图
前言
经常能在文献中看到趋势分析和热图的合并图,自己手搓了一个自定义函数,其实就是折线图和热图拼接起来。
绘图前的数据准备
demo数据可以在https://www.r2omics.cn/res/demodata/mfuzz.xls下载。
数据包含2个维度,数据通常来源于搜库结果定量表,且需要有时序关系。一般都是用预处理后的数据,组内取平均值或中位数。
R语言怎么做
已提供示例数据,可直接运行。
可以直接用本篇提供的mfuzzHeatmap函数绘制趋势分析和热图的组合图,目前提供的可改参数只有聚类个数,其他细节修改可以直接修改函数对应的部分。
自定义函数 mfuzzHeatmap
调用自定义函数
# 代码来源:https://www.r2omics.cn/
# 读取时间序列分析数据文件
= read.delim("https://www.r2omics.cn/res/demodata/mfuzz.xls",# 这里读取了网络上的demo数据,将此处换成你自己电脑里的文件
df row.names = 1
)= mfuzzHeatmap(
result data = df, # 数据
clusterNum = 6 # 聚类个数
)
211 genes excluded.
0 genes excluded.
# 绘图
$p result
# 查看基因被聚到哪类中
head(result$dfcluster)
ID Cluster
1 Gene1 5
2 Gene2 6
3 Gene4 4
4 Gene5 3
5 Gene8 2
6 Gene9 4