領悟生信文章邏輯之美,小夥伴們大家好呀,我是風間琉璃。上一周我們品讀了2020年10月發表在《Frontiers in oncology》雜誌上的文章「A Novel Promoter CpG-Based Signature for Long-Term Survival Prediction of Breast Cancer Patients」。作者使用CHAMP包進行篩選差異的甲基化位點。還沒看上一期推文的同學們請看一遍,看過的同學可以複習一遍。「2020年了!!這樣的文章也能發近5分!」。因為接下來我們會使用CHAMP包來適當復現文章中的流程。很精彩,不要錯過喔~
library(tidyverse)data_beta=read_tsv(file = "TCGA-BRCA.methylation450.tsv")data_samp=read_tsv(file = "phenotype.tsv")dim(data_beta)dim(data_samp)class(data_beta)class(data_samp)table(data_samp$sample_type.samples)tissue=c("Primary Tumor","Solid Tissue Normal")data_samp=data_samp[data_samp$sample_type.samples%in%tissue,]pdata=data_samp[,c("submitter_id.samples" ,"sample_type.samples")]ID=intersect(pdata$submitter_id.samples, colnames(data_beta))pdata=pdata[pdata$submitter_id.samples%in%ID,] dim(pdata)names(pdata) <- c("sample_name","sample_group")data_samp <- pdatanames(data_beta)[1]="CpG"data_beta=column_to_rownames(data_beta,"CpG") data_beta=data_beta[,ID]save(data_beta,data_samp,file = "methylation.rds")接下來,我們需要開始將清潔數據導入CHAMP包中。#加載數據和包library(ChAMP)library(tidyverse)#######################load("methylation.rds")#data_samp$Sample_Group <- if_else(data_samp$sample_group=="Primary Tumor", data_samp$Sample_Group <- "T", data_samp$Sample_Group <- "C")data_samp <- as.data.frame(data_samp)#####data_order=data_beta[,data_samp$sample_name]data_order=as.matrix(data_order)sum(is.na(data_order))#data_order=data_order+0.00001#myLoad=champ.filter(beta = data_order,pd = data_samp)#############################################################save(myLoad,file="meth_load.rds")## # myNorm <- champ.norm(beta=myLoad$beta,arraytype="450K",cores=100)# # save(myNorm,file="meth_norm.rds")# ##champ.QC()#myDMP <- champ.DMP(beta = myLoad$beta ,pheno=myLoad$pd$Sample_Group)########################################查看我們的分析結果head(myDMP[[1]])##############################myDMR <- champ.DMR(beta=myLoad$beta,pheno=myLoad$pd$Sample_Group,method="Bumphunter")#查看差異甲基化區域#head(myDMR$BumphunterDMR)(1)adj.P<0.05,delta |β|>0.2(2)位於啟動子區域(5』-UTR, TSS200, TSS1500 and 1stExon),那我們開始把。feature_pro=c("1stExon","5'UTR","TSS1500","TSS200")data_dif <- myDMP[[1]] %>% filter(adj.P.Val<0.05&abs(deltaBeta)>0.2)data_dif <- data_dif[data_dif$feature%in%feature_pro,]最後我們一共得到8778個位點,和作者的10088個差異甲基化位點有1000多個甲基化位點的差異,這可能是因為我們並沒有做標準化這一步(數據量太大了,跑不了。。。。) 接下來我們繪製Figure 1B的熱圖。library(pheatmap)ann_col <- data_sampann_col$Sample_Group <- if_else(data_samp$sample_group=="Primary Tumor", data_samp$Sample_Group <- "Tumor", data_samp$Sample_Group <- "Normal")ann_col <- ann_col %>% column_to_rownames(var = "sample_name") %>% dplyr::select(-sample_group) %>%dplyr::select(-case_submitter_id) %>% arrange(Sample_Group)data_heat=data_beta[rownames(data_dif),rownames(ann_col)]pheatmap(data_heat,color =colorRampPalette(c("navy", "white",
"firebrick3"))(10),cluster_rows = F,cluster_cols = F, legend = F,show_rownames = F,show_colnames = F,annotation_col = ann_col)
歡迎大家關註解螺旋生信頻道-挑圈聯靠公號~