1.差异基因曼哈顿图

#======================================
# 原创代码无删减，编写不易，若使用代码绘图，请引用：www.tcmbiohub.com

# 祝大家投稿顺利！

## RRA “伪曼哈顿图” + 渐变色（按 meanLogFC）完整脚本
## 输入文件（放在同一工作目录）：
##   up_all.txt, down_all.txt   （未筛选全量）
##   up.txt, down.txt           （可手动筛选，用于放大+标注）
##
## 横坐标含义：
##   idx = 按 Gene 字母排序后的顺序索引（1,2,3,...）
##   block = 把 idx 均分成 18 段的段号（0–17）

##=========

pkgs <- c("ggplot2", "ggrepel", "dplyr", "scales")
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(to_install) > 0) install.packages(to_install, dependencies = TRUE)
invisible(lapply(pkgs, library, character.only = TRUE))

## 1) 文件
up_all_file   <- "up_all.txt"
down_all_file <- "down_all.txt"
up_sel_file   <- "up.txt"
down_sel_file <- "down.txt"

out_tiff <- "RRA_block_plot_gradient_noGreyBand.tiff"
out_png  <- "RRA_block_plot_gradient_noGreyBand.png"
out_pdf  <- "RRA_block_plot_gradient_noGreyBand.pdf"   # 新增：PDF输出

n_block <- 18
label_n_each <- 15

## 2) 读入函数
read_rra_table <- function(file){
  if (!file.exists(file)) stop(paste0("找不到文件：", file))
  df <- read.delim(file, header = TRUE, sep = "\t",
                   stringsAsFactors = FALSE, check.names = FALSE)
  
  cn <- colnames(df)
  pick_col <- function(target){
    if (target %in% cn) return(target)
    hit <- cn[tolower(cn) == tolower(target)]
    if (length(hit) == 1) return(hit)
    return(NA_character_)
  }
  
  gene_col <- pick_col("Gene")
  p_col    <- pick_col("Pvalue")
  padj_col <- pick_col("adjPvalue")
  mfc_col  <- pick_col("meanLogFC")
  
  if (is.na(gene_col)) stop(paste0("File ", file, " 缺少 Gene 列"))
  if (is.na(p_col))    stop(paste0("File ", file, " 缺少 Pvalue 列"))
  if (is.na(padj_col)) stop(paste0("File ", file, " 缺少 adjPvalue 列"))
  if (is.na(mfc_col))  stop(paste0("File ", file, " 缺少 meanLogFC 列"))
  
  out <- df[, c(gene_col, p_col, padj_col, mfc_col)]
  colnames(out) <- c("Gene", "Pvalue", "adjPvalue", "meanLogFC")
  
  out <- out[!is.na(out$Gene) & out$Gene != "", ]
  out$Pvalue    <- suppressWarnings(as.numeric(out$Pvalue))
  out$adjPvalue <- suppressWarnings(as.numeric(out$adjPvalue))
  out$meanLogFC <- suppressWarnings(as.numeric(out$meanLogFC))
  out
}

read_gene_list <- function(file){
  if (!file.exists(file)) stop(paste0("找不到文件：", file))
  df <- read.delim(file, header = TRUE, sep = "\t",
                   stringsAsFactors = FALSE, check.names = FALSE)
  if ("Gene" %in% colnames(df)) return(unique(df$Gene))
  unique(df[[1]])
}

## 3) 读入数据
up_all   <- read_rra_table(up_all_file)
down_all <- read_rra_table(down_all_file)

up_genes   <- read_gene_list(up_sel_file)
down_genes <- read_gene_list(down_sel_file)

df_all <- dplyr::full_join(
  up_all   %>% dplyr::rename(Pvalue_up = Pvalue, adjPvalue_up = adjPvalue, meanLogFC_up = meanLogFC),
  down_all %>% dplyr::rename(Pvalue_down = Pvalue, adjPvalue_down = adjPvalue, meanLogFC_down = meanLogFC),
  by = "Gene"
)

df_all$meanLogFC <- ifelse(!is.na(df_all$meanLogFC_up), df_all$meanLogFC_up, df_all$meanLogFC_down)
df_all$Pvalue    <- ifelse(df_all$meanLogFC >= 0, df_all$Pvalue_up, df_all$Pvalue_down)
df_all$adjPvalue <- ifelse(df_all$meanLogFC >= 0, df_all$adjPvalue_up, df_all$adjPvalue_down)

df_plot <- df_all %>%
  dplyr::select(Gene, Pvalue, adjPvalue, meanLogFC) %>%
  dplyr::filter(!is.na(meanLogFC)) %>%
  dplyr::distinct(Gene, .keep_all = TRUE)

if (nrow(df_plot) == 0) stop("df_plot 是空的：请检查输入文件是否为空或 meanLogFC 是否全是 NA。")

df_plot$Selected <- ifelse(df_plot$Gene %in% up_genes | df_plot$Gene %in% down_genes, "Selected", "All")
df_plot$Selected <- factor(df_plot$Selected, levels = c("All", "Selected"))

## 4) 分块
df_plot <- df_plot %>% dplyr::arrange(Gene)
df_plot$idx <- seq_len(nrow(df_plot))

n_block_use <- min(n_block, nrow(df_plot))
if (n_block_use < 1) n_block_use <- 1

if (n_block_use == 1) {
  df_plot$block <- factor(0)
} else {
  df_plot$block <- cut(df_plot$idx, breaks = n_block_use,
                       labels = 0:(n_block_use - 1), include.lowest = TRUE)
}

block_df <- df_plot %>%
  dplyr::group_by(block) %>%
  dplyr::summarise(xmin = min(idx), xmax = max(idx), .groups = "drop") %>%
  dplyr::mutate(
    block_num = as.integer(as.character(block)),
    xmid = (xmin + xmax) / 2
  )

ymax_abs <- max(abs(df_plot$meanLogFC), na.rm = TRUE)
band_h <- max(0.15, 0.06 * ymax_abs)

block_cols <- scales::hue_pal()(n_block_use)
names(block_cols) <- as.character(0:(n_block_use - 1))

## 5) 标注
df_plot$ColorGroup <- ifelse(df_plot$meanLogFC >= 0, "UP", "DOWN")
label_df <- df_plot %>%
  dplyr::filter(Selected == "Selected") %>%
  dplyr::group_by(ColorGroup) %>%
  dplyr::arrange(dplyr::desc(abs(meanLogFC))) %>%
  dplyr::slice_head(n = label_n_each) %>%
  dplyr::ungroup()

## 6) 作图（已去掉背景灰条带那一层 geom_rect(bg1...)）
p <- ggplot(df_plot, aes(x = idx, y = meanLogFC)) +
  
  ## 0线彩色条（按 block 上色）
  geom_rect(
    data = block_df,
    aes(xmin = xmin - 0.5, xmax = xmax + 0.5, ymin = -band_h, ymax = band_h, fill = as.character(block)),
    inherit.aes = FALSE,
    alpha = 0.95,
    color = NA
  ) +
  
  ## 0线
  geom_hline(yintercept = 0, linewidth = 0.4) +
  
  ## 所有点：颜色按 meanLogFC 渐变
  geom_point(aes(color = meanLogFC), size = 1.1, alpha = 0.75) +
  
  ## 你筛选的点：再叠一层放大
  geom_point(
    data = df_plot %>% dplyr::filter(Selected == "Selected"),
    aes(color = meanLogFC),
    size = 2.2,
    alpha = 0.95
  ) +
  
  ## 标注
  ggrepel::geom_text_repel(
    data = label_df,
    aes(label = Gene, color = meanLogFC),
    size = 3.0,
    max.overlaps = Inf,
    box.padding = 0.35,
    point.padding = 0.15,
    segment.alpha = 0.35
  ) +
  
  ## 配色
  scale_fill_manual(values = block_cols, guide = "none") +
  scale_color_gradient2(
    low = "#2BB316",
    mid = "#FEFD04",
    high = "#E64B35",
    midpoint = 0,
    name = "meanLogFC"
  ) +
  
  scale_x_continuous(
    breaks = block_df$xmid,
    labels = as.character(block_df$block),
    expand = expansion(mult = c(0.01, 0.01))
  ) +
  
  labs(x = "Gene-order bins (0–17)", y = "average log2FC") +
  
  theme_classic(base_size = 12) +
  theme(
    axis.ticks.x = element_blank(),
    axis.text.x  = element_text(size = 10),
    legend.title = element_blank(),
    legend.position = "topleft"
  )

## 7) 导出（tiff + png + pdf）
ggsave(out_tiff, p, width = 8, height = 6.5, units = "in", dpi = 400, compression = "lzw")
ggsave(out_png,  p, width = 8, height = 6.5, units = "in", dpi = 300)

## 新增：输出PDF（推荐cairo_pdf，字体更稳定；若不支持则自动用pdf）
if (capabilities("cairo")) {
  ggsave(out_pdf, p, width = 8, height = 6.5, units = "in", device = cairo_pdf)
} else {
  ggsave(out_pdf, p, width = 8, height = 6.5, units = "in", device = "pdf")
}

print(p)

cat("完成：", out_tiff, "\n")
cat("完成：", out_png, "\n")
cat("完成：", out_pdf, "\n")

# 原创代码不易，使用代码绘图，请引用：www.tcmbiohub.com

# 祝大家投稿顺利！