R一些非常用函数
1. identical2.surv_cutpoint3.多个数据的交集4.首字母大写5.WGCNA6.创建统计表7.下载kegg所有通路的所有基因8.批量替换9.箱10.添加线以及查看默认ggplot画图的颜色11.导出图为pptx12.jimmy老师的GEO包的快捷查找探针基因方法13.ggplot 我的常用修饰14. 箱线图加蜂巢图15. R查找是否正态变量函数17.批量logistics回归18 关于多变量同时循环时19.取消科学计算法显示20.代码变得整洁好看21.当你的复制微信公众号的代码首行有数字或空格时22.查看源代码23.批量提取生存分析24.R语言表达式25 R版本更新后更新R包26 使用bedtools
1. identical
identical(x,y) ## 鉴别两个对象是否完全一致,等同于all(x==y)
2.surv_cutpoint
surv_cutpoint,生信分析中生存分析中使用的可以找到合适的划分点使得生存存在差异,
遗憾的是这个函数只能用于两组间的cutoff值,需配合surv_categorize(res.cut)食用
示例:# 0. Load some data
data(myeloma)
head(myeloma)
# 1. Determine the optimal cutpoint of variables
res.cut % as.data.frame()
group_2 = read_xlsx("37263分组.xlsx" ,sheet = 1)%>% as.data.frame()
con_gene = intersect(data_1[,1],data_2[,1])
rownames(data_1) = data_1[,1]
rownames(data_2) = data_2[,1]
data_1 = data_1[,-1]
data_2 = data_2[,-1]
data_all = cbind(data_1[con_gene,],data_2[con_gene,])
colnames(group_1) = colnames(group_2)
pheno = rbind(group_1,group_2)
batch = c(rep(1,dim(group_1)[1]),rep(2,dim(group_2)[1]))
modcombat = model.matrix(~1, data=pheno)
edata = data_all
edata = matrix(data = as.matrix(data_all), nrow=4748, ncol = 116)
mode(edata) = "numeric"
combat_edata = ComBat(dat=edata, batch=batch, mod=NULL, par.prior=TRUE, prior.plots=FALSE)
##导入数据##
colnames(combat_edata) = c(group_1$SAMPLE,group_2$SAMPLE)
rownames(combat_edata) = con_gene
library(WGCNA)
options(stringsAsFactors = FALSE)
enableWGCNAThreads()
##筛选方差前25%的基因##
expro = combat_edata
datExpr=as.data.frame(t(combat_edata));
nGenes = ncol(datExpr)
nSamples = nrow(datExpr)
##样本聚类检查离群值##
gsg = goodSamplesGenes(datExpr, verbose = 3);
gsg$allOK
sampleTree = hclust(dist(datExpr), method = "average")
plot(sampleTree, main = "Sample clustering to detect outliers"
, sub="", xlab="")
save(datExpr, file = "FPKM-01-dataInput.RData")
##没有离群值,不作处理##
##软阈值筛选##
powers = c(c(1:10), seq(from = 12, to=20, by=2))
sft = pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
par(mfrow = c(1,2));
cex1 = 0.9;
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"));
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
labels=powers,cex=cex1,col="red");
abline(h=0.90,col="red")
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
##作图##
betal = 10
dev.off()
k.dataOne 50,">50","=2")+
scale_x_discrete(
labels = c("CAL(-)(n=165)","CAL(+)(n=61)") # 设置y轴刻度
)+ ylim(-3,6)
p 0.05)){
nonvar = c(nonvar,i)
}
}
return(nonvar)
}
var_need_test = colnames(data_temp)[2:4]
group =data_temp$group_hunhe
var_normal = seek_normal(data_temp,var_need_test,group)
var_nonnormal = setdiff(var_need_test,var_normal)
myVars = colnames(data_temp)[-c(1,5)]
catVars = colnames(data_temp)[c(6,7)]
tab2 |