跟着Nature Communication学数据分析:R语言利用宏基因组的相对丰度数据做主坐标分析(PcoA))
论文
Microbiomes in the Challenger Deep slope and bottom-axis sediments
https://www.nature.com/articles/s41467-022-29144-4#code-availability
对应代码链接
https://github.com/ucassee/Challenger-Deep-Microbes
论文里提供了大部分图的数据和代码,很好的学习材料,感兴趣的同学可以找来参考,今天的推文重复一下论文中的Figure2b
部分数据集截图如下
相对丰度数据
分组数据
读取数据集
读取相对丰度数据
otu<-read.delim("data/20220602/dat01.txt",
sep="/t",
header = TRUE,
row.names = 1,
check.names = FALSE,
stringsAsFactors = FALSE)
dim(otu)
head(otu)
对数据转置
otu <- data.frame(t(otu))
otu[1:6,1:6]
读取分组数据
group<-read.delim("data/20220602/dat02.txt",
header=TRUE,
sep="/t",
stringsAsFactors = FALSE)
head(group)
这个分组数据和论文中提供的代码的分组信息还少一些内容,我们再给它增加几列
library(tidyverse)
group %>%
mutate(Site=case_when(
Group == "Slope" ~ "None-CD",
TRUE ~ "CD"
),
high=case_when(
`Depth (m)`< 6000 ~ '5k',
#`Depth (m)`>=6000 & `Depth (m)` < 7000 ~ '6k',
`Depth (m)`>=6000 & `Depth (m)` < 8000 ~ '7k',
`Depth (m)`>=8000 & `Depth (m)` < 9000 ~ '8k',
`Depth (m)`>=9000 & `Depth (m)` < 10000 ~ '9k',
`Depth (m)`>=10000 ~ '10k'
),
position=case_when(
`Depth (m)`< 8000 ~ 'North',
`Depth (m)`>= 8000 & `Depth (m)`< 10000 ~ 'South',
TRUE ~ 'axis'
)) -> new.group
这个分组信息可能和原文中有差别
主坐标分析代码
library(vegan)
distance <- vegdist(otu, method = 'bray')
pcoa <- cmdscale(distance, k = (nrow(otu) - 1), eig = T)
ordiplot(scores(pcoa)[ ,c(1, 2)], type = 't')
summary(pcoa)
构造作图数据
point <- data.frame(pcoa$point)
point %>% head()
species <- wascores(pcoa$points[,1:2], otu)
species %>% head()
pcoa_eig <- (pcoa$eig)[1:2] / sum(pcoa$eig)
pcoa_eig
sample_site <- data.frame({pcoa$point})[1:2]
sample_site$Sample <- rownames(sample_site)
names(sample_site)[1:2] <- c('PCoA1', 'PCoA2')
sample_site <- merge(sample_site, new.group, by = 'Sample', all.x = T)
sample_site %>% head()
给准备好的数据赋予因子水平
sample_site$Site <- factor(sample_site$Site,
levels = c('None-CD', 'CD'))
sample_site$high <- factor(sample_site$high,
levels = c('5k', '7k', '8k', '9k', '10k'))
sample_site$Position <- factor(sample_site$position,
levels = c('North', 'South', 'axis'))
构造画边界的数据
group_border <- plyr::ddply(sample_site, 'Site',
function(df) df[chull(df[[2]], df[[3]]), ])
画图代码
ggplot(sample_site, aes(PCoA1, PCoA2, group = Site)) +
theme(panel.grid = element_line(color = 'gray',
linetype = 2, size = 0.1),
panel.background = element_rect(color = 'black',
fill = 'transparent'),
legend.key = element_rect(fill = 'transparent')) + #去掉背景框
geom_vline(xintercept = 0, color = 'gray', size = 0.4) +
geom_hline(yintercept = 0, color = 'gray', size = 0.4) +
geom_polygon(data = group_border, aes(fill = Site),alpha=0.1) +
guides(fill = guide_legend(order = 1),
shape = guide_legend(order = 2),
color = guide_legend(order = 3)) +
scale_shape_manual(values = c(17, 16,15,12,10)) +
geom_point(aes(color = position, shape = high),
size = 2.5, alpha = 0.8) +
labs(x = paste('PCoA axis1: ',
round(100 * pcoa_eig[1], 2), '%'),
y = paste('PCoA axis2: ',
round(100 * pcoa_eig[2], 2), '%')) +
annotate('text', label = 'Slope',
x = -0.31, y = -0.15,
size = 5, colour = '#C673FF') +
annotate('text', label = 'Bottom-axis',
x = 0.32, y = 0.05,
size = 5, colour = '#FF985C')
论文中提供的代码出图效果如下
这个图和最终论文中的图还是有些差别的,主要是图例的位置和边框,如何用代码把图例的位置调整到右下角并添加一个边框,这个另起推文来介绍吧
示例数据和代码可以给推文点赞,然后点击在看,最后在公众号后台留言20220602获取
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!
共有 0 条评论