用小饼图代替点图里的点

0.起因

最近在群里看到一个图,棒棒糖图的点用饼图代替了。

然后又看到一个文章里,用饼图代替了富集分析的点图,妙啊~

这个地球上有什么漂亮的图能躲过我的小胖手呢?画他!

1.搜索

试了几个不同的关键词,最后这样搜到了。

答案指向一个包,名叫scatterpie。一看作者,啊,这不是Y叔嘛~

2. 跑示例代码

2.1 示例数据

set.seed(123)
long <- rnorm(50, sd=100)
lat <- rnorm(50, sd=50)
d <- data.frame(long=long, lat=lat)
d <- with(d, d[abs(long) < 150 & abs(lat) < 70,])
n <- nrow(d)
d$region <- factor(1:n)
d$A <- abs(rnorm(n, sd=1))
d$B <- abs(rnorm(n, sd=2))
d$C <- abs(rnorm(n, sd=3))
d$D <- abs(rnorm(n, sd=4))
d[1, 4:7] <- d[1, 4:7] * 3
head(d)

##          long        lat region          A        B        C        D
## 1  -56.047565  12.665926      1 2.13121969 8.663359 3.928711 8.676792
## 2  -23.017749  -1.427338      2 0.25688371 1.403569 1.375096 4.945092
## 4    7.050839  68.430114      3 0.24669188 0.524395 3.189978 5.138863
## 5   12.928774 -11.288549      4 0.34754260 3.144288 3.789556 2.295894
## 8 -126.506123  29.230687      5 0.95161857 3.029335 1.048951 2.471943
## 9  -68.685285   6.192712      6 0.04502772 3.203072 2.596539 4.439393

2.2 示例图片

library(ggplot2)
library(scatterpie)
ggplot() + 
  geom_scatterpie(aes(x=long, 
                      y=lat), 
                  data=d,
                  cols=LETTERS[1:4]) + 
  coord_equal()

3.仿造示例数据并画图

3.1找个数据做差异分析

library(tinyarray)
library(dplyr)
library(org.Hs.eg.db)
gse = "GSE42872"
geo = geo_download(gse)
Group = rep(c("control","treat"),each = 3)
Group = factor(Group)
find_anno(geo$gpl)

## [1] "`library(hugene10sttranscriptcluster.db);ids <- toTable(hugene10sttranscriptclusterSYMBOL)` and `ids <- AnnoProbe::idmap('GPL6244')` are both avaliable"

ids <- AnnoProbe::idmap(geo$gpl,destdir = tempdir())
deg = get_deg(geo$exp,Group,ids,logFC_cutoff = 2)
head(deg)

##       logFC   AveExpr         t      P.Value    adj.P.Val        B probe_id
## 1  5.780170  7.370282  82.94833 3.495205e-12 1.163798e-07 16.32898  8133876
## 2 -4.212683  9.106625 -68.40113 1.437468e-11 2.393169e-07 15.71739  7965335
## 3  5.633027  8.763220  57.61985 5.053466e-11 4.431880e-07 15.04752  7972259
## 4 -3.801663  9.726468 -57.21112 5.324059e-11 4.431880e-07 15.01709  7972217
## 5  3.263063 10.171635  50.51733 1.324638e-10 8.821294e-07 14.45166  8129573
## 6 -3.843247  9.667077 -45.87910 2.681063e-10 1.487856e-06 13.97123  8015806
##   symbol change ENTREZID
## 1   CD36     up      948
## 2  DUSP6   down     1848
## 3    DCT     up     1638
## 4  SPRY2   down    10253
## 5  MOXD1     up    26002
## 6   ETV4   down     2118

table(deg$change)

## 
##   down stable     up 
##     48  18092     82

3.2 用差异基因做富集分析

gene_up = deg$symbol[deg$change=="up"]
gene_down = deg$symbol[deg$change=="down"]
gene_diff = c(gene_up,gene_down)
length(gene_diff)

## [1] 130

head(gene_diff)

## [1] "CD36"       "DCT"        "MOXD1"      "NUPR1"      "ST6GALNAC2"
## [6] "TXNIP"

g = quick_enrich(gene_diff,kkgo_file = "kk_gop.Rdata")

dat = g$go@result %>% 
  arrange(pvalue) %>% 
  filter(ONTOLOGY=="BP") %>% 
  head(10) %>% 
  mutate(Description = factor(Description,levels = Description))

p1 = ggplot(dat)+
  geom_point(aes(x = -log10(pvalue),y = Description))+
  theme_bw()
p1

3.3 计算每条term的上下调基因数量

library(stringr)
p = str_split(dat$geneID,"/")
names(p) = dat$ID
get_num = function(x){
  c(sum(x %in% gene_up),sum(x %in% gene_down))
}
re = sapply(p, get_num) %>% t() %>% as.data.frame()
rownames(re) = names(p)
colnames(re) = c("up","down")

3.4 仿制示例数据并画图

试了一下,如果纵坐标是分类变量是会报错的。。。

所以曲线救国,先生成一组连续型数据,然后改刻度值。

而需要保持饼图的形状是圆的,又需要固定坐标系,横纵坐标刻度是一样的。

这时候难度就来了,需要生成的纵坐标得是和横坐标范围一致的等差数列,用seq函数。

dp = cbind(lp = -log10(dat$pvalue),
           Description = dat$Description,
           re)
dp$lab = seq(min(dp$lp), max(dp$lp), length.out = nrow(dp))
head(dp)

##                  lp                                            Description up
## GO:0071900 7.264160 regulation of protein serine/threonine kinase activity  5
## GO:0050673 6.929666                          epithelial cell proliferation  8
## GO:0043405 6.351294                      regulation of MAP kinase activity  5
## GO:0009314 5.748852                                  response to radiation  6
## GO:0010165 5.691160                                      response to X-ray  0
## GO:0071214 5.635287                  cellular response to abiotic stimulus  6
##            down      lab
## GO:0071900   10 5.525307
## GO:0050673    8 5.718513
## GO:0043405    5 5.911719
## GO:0009314    8 6.104925
## GO:0010165    5 6.298131
## GO:0071214    6 6.491337

p2 = ggplot() + 
  geom_scatterpie(aes(x=lp, 
                      y=lab), 
                  data=dp,
                  cols=colnames(re),
                  pie_scale = 1.5) +
  theme_bw()+
  scale_fill_manual(values = c("#f87669", "#2874C5"))+
  scale_y_continuous(labels = dp$Description,breaks = dp$lab)+
  labs(x = "-log10(pvalue)",y = "Description")+
  coord_equal()
p2

3.5 要棒棒糖还不简单嘛

p2 + geom_segment(data = dp, aes(x=as.integer(min(dp$lp)), xend=lp, y=lab, yend=lab))

版权声明:
作者:Alex
链接:https://www.techfm.club/p/45159.html
来源:TechFM
文章版权归作者所有,未经允许请勿转载。

THE END
分享
二维码
< <上一篇
下一篇>>