SCI图片复现:批量散点柱状图
代码是做一系列的基因表达量的箱线图并进行了分组的t检验。需要两个CSV文件,一个是 "Exp.csv",它应该包含表达量数据,另一个是 "info.csv",它应该包含样本信息。
1. 生成模拟数据
# 设置工作目录
setwd("")
# 设定随机数种子以确保可重复性
set.seed(123)
# 生成模拟数据
Exp <- matrix(runif(200), nrow=20)
colnames(Exp) <- c("CD28","CD3D","CD8A","LCK","GATA3","EOMES","IL23A","CXCL8","IL1R2","IL1R1")
rownames(Exp) <- paste0("Sample_", 1:20)
# 生成样本信息
info <- data.frame(Sample = rownames(Exp),
Type = sample(c("Asymptomatic","Mild","Severe","Critical"), 20, replace = TRUE))
# 保存数据
write.csv(Exp, "Exp.csv", row.names = TRUE)
write.csv(info, "info.csv", row.names = FALSE)
# 检查生成的数据
head(Exp)
head(info)
2. 修改代码以适应生成的数据
因为模拟数据已经是正态分布,我们不需要执行对数转换。
# 读取模拟数据
Exp <- read.csv("Exp.csv",header=T,row.names=1)
info <- read.csv("info.csv",header=T)
# 需要作图的基因
gene <- c("CD28","CD3D","CD8A","LCK","GATA3","EOMES","IL23A","CXCL8","IL1R2","IL1R1")
gene <- as.vector(gene)
Exp_plot <- Exp[,gene]
# 调整样本信息的顺序以匹配表达数据的顺序
info$Sample <- factor(info$Sample, levels = rownames(Exp_plot))
info <- info[order(info$Sample),]
# 根据样本信息调整表达数据的顺序
Exp_plot <- Exp_plot[info$Sample,]
Exp_plot$sam <- info$Type
Exp_plot$sam <- factor(Exp_plot$sam,levels=c("Asymptomatic","Mild","Severe","Critical"))
# 然后跟随你的代码进行图形生成和可视化...
3. 代码优化及注释
# 定义颜色
col <-c("#5CB85C","#337AB7","#F0AD4E","#D9534F")
# 定义所有比较组
groups <- c("Asymptomatic","Mild","Severe","Critical")
comparisons <- combn(groups, 2, simplify = FALSE)
# 初始化列表来保存图形
plist2 <- list()
# 针对每个基因生成箱线图并进行t检验
for (i in 1:length(gene)) {
# 提取基因表达信息
bar_tmp <- Exp_plot[,c(gene[i],"sam")]
colnames(bar_tmp) <- c("Expression","sam")
# 生成并修改箱线图
pb1 <- ggboxplot(bar_tmp, x = "sam", y = "Expression", color = "sam", add = "jitter",
bxp.errorbar.width = 0.6, width = 0.4, size = 0.01,
font.label = list(size = 30), palette = col) +
theme(panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, angle = 45, vjust = 1, hjust = 1),
axis.text.y = element_text(size = 15),
plot.title = element_text(hjust = 0.5, size = 15, face = "bold"),
legend.position = "none") +
ggtitle(gene[i]) +
stat_compare_means(method = "t.test", hide.ns = F, comparisons = comparisons, label = "p.signif")
# 保存图形
plist2[[i]] <- pb1
}
# 将所有图形组合到一个图中
pall <- plot_grid(plotlist = plist2, ncol = 4)
pall
4. 优化图片的美观度
# 更新主题以优化图形美观度
pb1 <- pb1 + theme_classic() + # 使用经典主题
theme(
axis.text.x = element_text(size = 15, angle = 45, vjust = 1, hjust = 1, color = "black"),
axis.text.y = element_text(size = 15, color = "black"),
axis.title = element_text(size = 16, face = "bold"),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold", color = "black"),
legend.title = element_text(size = 14, face = "bold"),
legend.text = element_text(size = 12)
)
5. 保存生成的结果
# 保存图像为 PDF 格式
ggsave("boxplots.pdf", pall, width = 10, height = 10)
# 保存数据为 CSV 格式
write.csv(Exp_plot, "Exp_plot.csv", row.names = TRUE)
完整代码
# 加载必要的包
library(RColorBrewer)
library(ggpubr)
library(ggplot2)
library(cowplot)
# 设定工作目录
setwd("C:/Users/赖龙/Desktop")
# 设定随机数种子以确保可重复性
set.seed(123)
# 生成模拟数据
# 生成模拟数据
Exp <- matrix(runif(200), nrow=20, ncol=10) # 注意这里的 ncol 参数,确保你有12列
colnames(Exp) <- c("CD28","CD3D","CD8A","LCK","GATA3","EOMES","IL23A","CXCL8","IL1R2","IL1R1")
rownames(Exp) <- paste0("Sample_", 1:20)
# 生成样本信息
info <- data.frame(Sample = rownames(Exp),
Type = sample(c("Asymptomatic","Mild","Severe","Critical"), 20, replace = TRUE))
# 保存模拟数据
write.csv(Exp, "Exp.csv", row.names = TRUE)
write.csv(info, "info.csv", row.names = FALSE)
# 读取模拟数据
Exp <- read.csv("Exp.csv",header=T,row.names=1)
info <- read.csv("info.csv",header=T)
# 需要作图的基因
gene <- c("CD28","CD3D","CD8A","LCK","GATA3","EOMES","IL23A","CXCL8","IL1R2","IL1R1")
gene <- as.vector(gene)
Exp_plot <- Exp[,gene]
# 调整样本信息的顺序以匹配表达数据的顺序
info$Sample <- factor(info$Sample, levels = rownames(Exp_plot))
info <- info[order(info$Sample),]
# 根据样本信息调整表达数据的顺序
Exp_plot <- Exp_plot[info$Sample,]
Exp_plot$sam <- info$Type
Exp_plot$sam <- factor(Exp_plot$sam,levels=c("Asymptomatic","Mild","Severe","Critical"))
# 定义颜色
col <-c("#5CB85C","#337AB7","#F0AD4E","#D9534F")
# 定义所有比较组
groups <- c("Asymptomatic","Mild","Severe","Critical")
comparisons <- combn(groups, 2, simplify = FALSE)
# 初始化列表来保存图形
plist2 <- list()
# 针对每个基因生成箱线图并进行t检验
for (i in 1:length(gene)) {
# 提取基因表达信息
bar_tmp <- Exp_plot[,c(gene[i],"sam")]
colnames(bar_tmp) <- c("Expression","sam")
# 生成并修改箱线图
pb1 <- ggboxplot(bar_tmp, x = "sam", y = "Expression", color = "sam", add = "jitter",
bxp.errorbar.width = 0.6, width = 0.4, size = 0.01,
font.label = list(size = 30), palette = col) +
theme(panel.background = element_blank(),
axis.line = element_line(colour = "black"),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 15, angle = 45, vjust = 1, hjust = 1, color = "black"),
axis.text.y = element_text(size = 15, color = "black"),
plot.title = element_text(hjust = 0.5, size = 18, face = "bold", color = "black"),
legend.position = "none") +
ggtitle(gene[i]) +
stat_compare_means(method = "t.test", hide.ns = F, comparisons = comparisons, label = "p.signif")
# 保存图形
plist2[[i]] <- pb1
}
# 将所有图形组合到一个图中
pall <- plot_grid(plotlist = plist2, ncol = 4)
# 显示组合图
print(pall)
# 保存图像为 PDF 格式
ggsave("boxplots.pdf", pall, width = 10, height = 10)
# 保存数据为 CSV 格式
write.csv(Exp_plot, "Exp_plot.csv", row.names = TRUE)
阅读剩余
THE END