差异基因 p log2foldchange_拟南芥的基因ID批量转换?差异基因,GO/KEGG数据库注释(转录组直接送你全套流程)...
新手遇到的問題都是類似的,比如批量ID轉(zhuǎn)換
雖然我寫過大量的教程:ID轉(zhuǎn)換大全? ?不過都需要R基礎(chǔ),因?yàn)槭谴笈哭D(zhuǎn)換啊!
但熱心腸的植物生物信息學(xué)教學(xué)大佬還是友善的給出了解決方案
我也狗尾續(xù)貂制作了一個(gè)網(wǎng)頁工具教程:
簡(jiǎn)單的3個(gè)步驟,不會(huì)代碼也可以很容易把ID批量轉(zhuǎn)換啦!
不過有趣的是我搜索電腦資料,看到了2年前寫的擬南芥教程。
不過我為什么會(huì)花時(shí)間寫擬南芥教程呢?
1 首先加載必要的包
library(ggplot2)library(stringr)
#?source("https://bioconductor.org/biocLite.R")
#?biocLite("clusterProfiler")
#?biocLite("org.At.tair.db")
library(clusterProfiler)
library(org.At.tair.db)
2 然后導(dǎo)入數(shù)據(jù)
deg1=read.table('https://raw.githubusercontent.com/jmzeng1314/my-R/master/DEG_scripts/tair/DESeq2_DEG.Day1-Day0.txt')deg1=na.omit(deg1)
head(deg1)
##??????????????baseMean?log2FoldChange????lfcSE??????stat???????pvalue
##?AT3G01430???22.908929??????18.989990?2.148261??8.839704?9.597263e-19
##?AT1G47405???20.709551??????20.958806?2.434574??8.608820?7.381677e-18
##?AT2G33830?1938.159722??????-2.560609?0.312663?-8.189678?2.619266e-16
##?AT5G28627????8.118376?????-21.131318?2.875691?-7.348257?2.008078e-13
##?AT2G33750????9.789740?????-19.989301?2.847513?-7.019915?2.220033e-12
##?AT3G54500?2238.314652???????2.720430?0.386305??7.042182?1.892517e-12
##???????????????????padj
##?AT3G01430?1.858318e-14
##?AT1G47405?7.146571e-14
##?AT2G33830?1.690562e-12
##?AT5G28627?9.720602e-10
##?AT2G33750?7.164418e-09
##?AT3G54500?7.164418e-09
prefix='Day1-Day0'
3 然后判斷顯著差異基因
很明顯,這個(gè)時(shí)候用padj來挑選差異基因即可,不需要看foldchange了。
table(deg1$padj<0.05)##?
##?FALSE??TRUE?
##?19166???197
table(deg1$padj<0.01)
##?
##?FALSE??TRUE?
##?19303????60
diff_geneList?=?rownames(deg1[deg1$padj<0.05,])
up_geneList?=?rownames(deg1[deg1$padj<0.05?&?deg1$log2FoldChange?>0,])
down_geneList?=?rownames(deg1[deg1$padj<0.05?&?deg1$log2FoldChange?<0,])
length(diff_geneList)
##?[1]?197
length(up_geneList)
##?[1]?89
length(down_geneList)
##?[1]?108
3.1 畫個(gè)火山圖看看挑選的差異基因合理與否
colnames(deg1)##?[1]?"baseMean"???????"log2FoldChange"?"lfcSE"??????????"stat"??????????
##?[5]?"pvalue"?????????"padj"
log2FoldChange_Cutof?=?0
##?這里我不準(zhǔn)備用log2FoldChange來挑選差異基因,僅僅是用padj即可
deg1$change?=?as.factor(ifelse(deg1$padj?0.05?&?
???????????????????????????????????????abs(deg1$log2FoldChange)?>?log2FoldChange_Cutof,
?????????????????????????????????????ifelse(deg1$log2FoldChange?>?log2FoldChange_Cutof?,'UP','DOWN'),'NOT'))
this_tile?'Cutoff?for?log2FoldChange?is?',round(log2FoldChange_Cutof,3),
????????????????????'\nThe?number?of?up?gene?is?',nrow(deg1[deg1$change?=='UP',])?,
????????????????????'\nThe?number?of?down?gene?is?',nrow(deg1[deg1$change?=='DOWN',])
)
g_volcano?=?ggplot(data=deg1,?aes(x=log2FoldChange,?y=-log10(padj),?color=change))?+
??geom_point(alpha=0.4,?size=1.75)?+
??theme_set(theme_set(theme_bw(base_size=20)))+
??xlab("log2?fold?change")?+?ylab("-log10?p-value")?+
??ggtitle(?this_tile??)?+?
??theme(plot.title?=?element_text(size=15,hjust?=?0.5))+
??scale_colour_manual(values?=?c('blue','black','red'))??##?corresponding?to?the?levels(res$change)
print(g_volcano)
4 GO/KEGG注釋
4.1 首先進(jìn)行KEGG注釋
diff.kk?<-?enrichKEGG(gene?????????=?diff_geneList,organism?????=?'ath',pvalueCutoff?=?0.99,qvalueCutoff=0.99)kegg_diff_dt?<-?as.data.frame(setReadable(diff.kk,org.At.tair.db,keytype?=?'TAIR'))up.kk?<-?enrichKEGG(gene?????????=?up_geneList,organism?????=?'ath',pvalueCutoff?=?0.99,qvalueCutoff=0.99
)kegg_up_dt?<-?as.data.frame(setReadable(up.kk,org.At.tair.db,keytype?=?'TAIR'))down.kk?<-?enrichKEGG(gene?????????=?down_geneList,organism?????=?'ath',pvalueCutoff?=?0.99,qvalueCutoff=0.99
)kegg_down_dt?<-?as.data.frame(setReadable(down.kk,org.At.tair.db,keytype?=?'TAIR'))
4.2 可視化看看KEGG注釋結(jié)果
##?KEGG?patheay?visulization:???down_kegg$pvalue<0.05,];down_kegg$group=-1
??up_kegg$pvalue<0.05,];up_kegg$group=1
??dat=rbind(up_kegg,down_kegg)
??colnames(dat)
##??[1]?"ID"??????????"Description"?"GeneRatio"???"BgRatio"?????"pvalue"?????
##??[6]?"p.adjust"????"qvalue"??????"geneID"??????"Count"???????"group"
??dat$pvalue?=?-log10(dat$pvalue)
??dat$pvalue=dat$pvalue*dat$group?
??dat=dat[order(dat$pvalue,decreasing?=?F),]
??g_kegg????geom_bar(stat="identity")?+?
????scale_fill_gradient(low="blue",high="red",guide?=?FALSE)?+?
????scale_x_discrete(name?="Pathway?names")?+
????scale_y_continuous(name?="-log10P-value")?+
????coord_flip()?+
????ggtitle("Pathway?Enrichment")
??print(g_kegg)
4.3 接著進(jìn)行GO注釋
for?(onto?in?c('CC','BP','MF')){??ego???????????????????OrgDb?????????=?org.At.tair.db,?
??????????????????keyType?=?'TAIR',
??????????????????ont???????????=??onto?,
??????????????????pAdjustMethod?=?"BH",
??????????????????pvalueCutoff??=?0.2,
??????????????????qvalueCutoff??=?0.9)
??ego?'TAIR')
??write.csv(as.data.frame(ego),paste0(prefix,"_",onto,".csv"))
??#ego2?
??ego2=ego
??pdf(paste0(prefix,"_",onto,'_barplot.pdf'))
??p=barplot(ego2,?showCategory=12)+scale_x_discrete(labels=function(x)?str_wrap(x,width=20))print(p)dev.off()?
}ggsave(filename?=?paste0(prefix,"_volcano_plot.pdf"),g_volcano)##?Saving?7?x?5?in?image
ggsave(filename?=?paste0(prefix,"_kegg_plot.pdf"),g_kegg)
##?Saving?7?x?5?in?image
write.csv(x?=?kegg_diff_dt,file?=?paste0(prefix,"_kegg_diff.csv"))
write.csv(x?=?kegg_up_dt,file?=?paste0(prefix,"_kegg_up.csv"))
write.csv(x?=?kegg_down_dt,file?=?paste0(prefix,"_kegg_down.csv"))
5 基因ID注釋
library(org.At.tair.db)ls('package:org.At.tair.db')
##??[1]?"org.At.tair"?????????????"org.At.tair.db"?????????
##??[3]?"org.At.tairARACYC"???????"org.At.tairARACYCENZYME"
##??[5]?"org.At.tairCHR"??????????"org.At.tairCHRLENGTHS"??
##??[7]?"org.At.tairCHRLOC"???????"org.At.tairCHRLOCEND"???
##??[9]?"org.At.tairENTREZID"?????"org.At.tairENZYME"??????
##?[11]?"org.At.tairENZYME2TAIR"??"org.At.tairGENENAME"????
##?[13]?"org.At.tairGO"???????????"org.At.tairGO2ALLTAIRS"?
##?[15]?"org.At.tairGO2TAIR"??????"org.At.tairMAPCOUNTS"???
##?[17]?"org.At.tairORGANISM"?????"org.At.tairPATH"????????
##?[19]?"org.At.tairPATH2TAIR"????"org.At.tairPMID"????????
##?[21]?"org.At.tairPMID2TAIR"????"org.At.tairREFSEQ"??????
##?[23]?"org.At.tairREFSEQ2TAIR"??"org.At.tairSYMBOL"??????
##?[25]?"org.At.tair_dbInfo"??????"org.At.tair_dbconn"?????
##?[27]?"org.At.tair_dbfile"??????"org.At.tair_dbschema"
##?Then?draw?GO/kegg?figures:
deg1$gene_id=rownames(deg1)
id2symbol=toTable(org.At.tairSYMBOL)?
deg1=merge(deg1,id2symbol,by='gene_id')
##?可以看到有一些ID是沒有g(shù)ene?symbol的,這樣的基因,意義可能不大,也有可能是大部分人漏掉了
head(deg1)
##?????gene_id???baseMean?log2FoldChange?????lfcSE???????stat????pvalue
##?1?AT1G01010???58.25657?????1.13105335?0.8000274??1.4137683?0.1574300
##?2?AT1G01010???58.25657?????1.13105335?0.8000274??1.4137683?0.1574300
##?3?AT1G01020??542.64394????-0.05745554?0.3721650?-0.1543819?0.8773086
##?4?AT1G01030???48.77442????-1.09845263?1.2875862?-0.8531100?0.3935983
##?5?AT1G01040?1708.68949?????0.32421734?0.2777530??1.1672865?0.2430947
##?6?AT1G01040?1708.68949?????0.32421734?0.2777530??1.1672865?0.2430947
##????????padj?change??symbol
##?1?0.6008903????NOT?ANAC001
##?2?0.6008903????NOT??NAC001
##?3?0.9805661????NOT????ARV1
##?4?0.8144858????NOT????NGA3
##?5?0.6992279????NOT????DCL1
##?6?0.6992279????NOT???EMB60
可以看到基因的ID和symbol的對(duì)應(yīng)關(guān)系就出來了,根使用網(wǎng)頁工具是類似的,感興趣的朋友可以試試看網(wǎng)頁工具和R代碼的ID批量轉(zhuǎn)換差別有多大。
■???■?? ■?
全國(guó)巡講約你
第1-10站北上廣深杭,西安,鄭州, 吉林,武漢和成都(全部結(jié)束)
七月份我們不外出,只專注單細(xì)胞!
系統(tǒng)學(xué)習(xí)單細(xì)胞分析,報(bào)名生信技能樹的線下培訓(xùn),手慢無。
一年一度的生信技能樹單細(xì)胞線下培訓(xùn)班火熱招生
全國(guó)巡講第11站-港珠澳專場(chǎng)(生信技能樹爆款入門課)
總結(jié)
以上是生活随笔為你收集整理的差异基因 p log2foldchange_拟南芥的基因ID批量转换?差异基因,GO/KEGG数据库注释(转录组直接送你全套流程)...的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: python直方图的拟合_从一组数据py
- 下一篇: java执行查询postgresql得到