> setwd("C:\\Users\\Administrator\\Desktop\\24.clinicalCor")
> rt=read.table(inputFile,sep="\t",header=T,check.names=F) #读取输入文件
> clinicalNum=7 #定义临床性状个数
> pFilter=0.05 #是否绘图的过滤标准
>
> #临床相关性分析,输出表格和图形结果
> outTab=data.frame(gene=colnames(rt[,(clinicalNum+2):ncol(rt)]))
> for(clinical in colnames(rt[,2:(clinicalNum+1)])){
+ xlabel=vector()
+ tab1=table(rt[,clinical])
+ labelNum=length(tab1)
+ dotCol=c("blue","red")
+ if(labelNum==3){
+ dotCol=c(2,3,4)
+ }
+ if(labelNum==4){
+ dotCol=c(2,3,4,5)
+ }
+ if(labelNum>4){
+ dotCol=rainbow(labelNum)
+ }
+ for(i in 1:labelNum){
+ xlabel=c(xlabel,names(tab1[i]) )
+ }
+ clinicalPvalVector=c()
+ for(i in colnames(rt[,(clinicalNum+2):ncol(rt)])){
+ rt1=rbind(expression=rt[,i],clinical=rt[,clinical])
+ rt1=as.matrix(t(rt1))
+ if(labelNum==2){
+ cliTest<-t.test(expression ~ clinical, data=rt1)
+ }else{
+ cliTest<-kruskal.test(expression ~ clinical, data = rt1)}
+ pValue=cliTest$p.value
+ stat=round(cliTest$statistic,3)
+ pval=0
+ if(pValue<0.001){
+ pval=signif(pValue,4)
+ pval=format(pval, scientific = TRUE)
+ }else{
+ pval=sprintf("%.03f",pValue)
+ }
+ clinicalPvalVector=c(clinicalPvalVector,paste0(stat,"(",pval,")"))
+ if(pValue<pFilter){
+ b = boxplot(expression ~ clinical, data = rt1,outline = FALSE, plot=F)
+ yMin=min(b$stats)
+ yMax = max(b$stats/5+b$stats)
+ n = ncol(b$stats)
+ outPdf=paste0(i,".",clinical,".pdf")
+ pdf(file=outPdf,width = 7,height = 5)
+ par(mar = c(4.5,6,3,3))
+ ylab=ifelse(i=="riskScore","Risk score","Gene expression")
+ boxplot(expression ~ clinical, data = rt1,names=xlabel,
+ ylab = ylab,main=paste0(i," (p=",pval,")"),xlab=clinical,
+ cex.main=1.4, cex.lab=1.4, cex.axis=1.3,ylim=c(yMin,yMax),outline = FALSE)
+ beeswarm(expression ~ clinical, data = rt1, col =dotCol, lwd=0.1,
+ pch = 16, add = TRUE, corral="wrap")
+ dev.off()
+ }
+ }
+ outTab=cbind(outTab,clinicalPvalVector)
+ }
Error in if (stderr < 10 * .Machine$double.eps * max(abs(mx), abs(my))) stop("data are essentially constant") :
需要TRUE/FALSE值的地方不可以用缺少值
此外: Warning messages:
1: In mean.default(x) : argument is not numeric or logical: returning NA
2: In mean.default(y) : argument is not numeric or logical: returning NA
> colnames(outTab)=c("id",colnames(rt[,2:(clinicalNum+1)]))
Error in names(x) <- value : 'names'属性的长度[8]必需和矢量的长度[1]一样
> write.table(outTab,file="clinicalCor.xls",sep="\t",row.names=F,quote=F)
代码见上图
数据为
id age grade stage T M N HSP90AB1 MAP2K7 TBK1 VAMP7 DNAJB1 ATG5 GABARAPL2 riskScore
TCGA-VR-A8EO <=65 G1-2 Stage I&II T3-4 M0 N0 9.85772070349287 3.410769808467 2.71489119710381 4.78553039990027 6.92241877104248 3.5008407160015 4.01999809636905 2.11711815629978
TCGA-Z6-A8JD <=65 G3-4 Stage I&II T3-4 M0 N0 9.16949212799578 3.48978182369592 2.52814194694367 4.09671304643945 8.19368452866279 2.91494829820787 3.78880053705284 0.614947043988963
TCGA-L5-A4OX >65 G3-4 Stage I&II T1-2 M0 N1-3 9.19447271892246 3.09424958449011 3.47628187502368 4.66724838754228 7.74573750879616 3.6577737512425 3.54538648005706 3.57354236310584