自学内容网 自学内容网

R包:APAlyzer从RNA-seq数据计算APA表达丰度

在这里插入图片描述

介绍

今天安利APAlyzer工具,它是通过RNA-seq数据获取3′UTR APA, intronic APA等表达谱的R包。

APAlyzer将bam文件比对到PolyA-DB数据库识别APA。

Most eukaryotic genes produce alternative polyadenylation (APA) isoforms. APA is dynamically regulated under different growth and differentiation conditions. Here, we present a bioinformatics package, named APAlyzer, for examining 3′UTR APA, intronic APA and gene expression changes using RNA-seq data and annotated polyadenylation sites in the PolyA_DB database. Using APAlyzer and data from the GTEx database, we present APA profiles across human tissues.

在这里插入图片描述

教程

library(APAlyzer)
library(TBX20BamSubset)
library(Rsamtools)

# RNA-seq BAM files
flsall = getBamFileList()

# Genomic reference
library(repmis)
URL="https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"
file="mm9_REF.RData"
source_data(paste0(URL,file,"?raw=True"))

# Building 3’UTR and intronic PAS reference region at once
refUTRraw=refUTRraw[which(refUTRraw$Chrom=='chr19'),]
dfIPAraw=dfIPA[which(dfIPA$Chrom=='chr19'),]
dfLEraw=dfLE[which(dfLE$Chrom=='chr19'),]   
PASREF=REF4PAS(refUTRraw,dfIPAraw,dfLEraw)
UTRdbraw=PASREF$UTRdbraw
dfIPA=PASREF$dfIPA
dfLE=PASREF$dfLE 

# Building 3’UTR PAS and IPA reference using GTF files
download.file(url='ftp://ftp.ensembl.org/pub/release-99/gtf/mus_musculus/Mus_musculus.GRCm38.99.gtf.gz',
          destfile='Mus_musculus.GRCm38.99.gtf.gz')           
GTFfile="Mus_musculus.GRCm38.99.gtf.gz" 
PASREFraw=PAS2GEF(GTFfile)  
refUTRraw=PASREFraw$refUTRraw
dfIPAraw=PASREFraw$dfIPA
dfLEraw=PASREFraw$dfLE
PASREF=REF4PAS(refUTRraw,dfIPAraw,dfLEraw)

# Building aUTR and cUTR references
refUTRraw=refUTRraw[which(refUTRraw$Chrom=='chr19'),]
UTRdbraw=REF3UTR(refUTRraw)

# Calculation of relative expression
DFUTRraw=PASEXP_3UTR(UTRdbraw, flsall, Strandtype="forward")

# Building intronic polyA references
URL="https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"
file="mm9_REF.RData"
source_data(paste0(URL,file,"?raw=True"))

# Calculation of relative expression
dfIPA=dfIPA[which(dfIPA$Chrom=='chr19'),]
dfLE=dfLE[which(dfLE$Chrom=='chr19'),]
IPA_OUTraw=PASEXP_IPA(dfIPA, dfLE, flsall, Strandtype="forward", nts=1)

# Significance analysis of APA events
sampleTable1 = data.frame(samplename = c(names(flsall)),
                    condition = c(rep("NT",3),rep("KD",3)))

# Significantly regulated APA in 3’UTRs
test_3UTRsing=APAdiff(sampleTable2,DFUTRraw, 
                        conKET='NT',
                        trtKEY='KD',
                        PAS='3UTR',
                        CUTreads=0,
                        p_adjust_methods="fdr")
# Visualization of analysis results
APAVolcano(test_3UTRsing, PAS='3UTR', Pcol = "pvalue", top=5, main='3UTR APA')

实战案例

数据

下列样本存成bam_file.tsv

SampleIDBamPath
SRR316184/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316184.bam
SRR316185/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316185.bam
SRR316186/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316186.bam
SRR316187/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316187.bam
SRR316188/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316188.bam
SRR316189/Library/Frameworks/R.framework/Versions/4.1/Resources/library/TBX20BamSubset/extdata/SRR316189.bam

脚本

下列代码存成APAlyzer_Expression.R

suppressPackageStartupMessages({ 
  library(dplyr)
  library(tibble)
  library(optparse)
  library(data.table)
  library(APAlyzer)
  library(TBX20BamSubset)
  library(Rsamtools)
})


option_list <- list(
  make_option(c("-b", "--bam"), 
              type = "character",
              help = "bam csv file (1st column: sampleID; 2nd: bam path)", 
              metavar = "character"),
  make_option(c("-r", "--reference"), 
              type = "character", # RData/gtf
              help = "genomic reference type", 
              metavar = "character"),    
  make_option(c("-g", "--genome"), 
              type = "character",
              help = "genomic reference file", 
              metavar = "character"), 
  make_option(c("-c", "--chromosome"), 
              type = "character",
              default = "all", # chr19
              help = "chromosome to be selected", 
              metavar = "character"),  
  make_option(c("-e", "--expression"), 
              type = "character", 
              default = "all", # 3UTR/IPA
              help = "APA expression: 3UTR and intronic APA", 
              metavar = "character"),  
  make_option(c("-o", "--out"), 
              type = "character",
              help = "output file path", 
              metavar = "character")
)

opt_parser <- OptionParser(option_list = option_list)
opt <- parse_args(opt_parser)

# input parameters
bam_path <- opt$bam
ref_type <- opt$reference
ref_path <- opt$genome
chrom <- opt$chromosome
expr_type <- opt$expression
dir <- opt$out


# bam_path <- "bam_file.tsv"
# ref_type <- "RData"
# ref_path <- "mm9_REF.RData"
# chrom <- "chr19"
# expr_type <- "3UTR"
# dir <- "result"


# step1: bam file
bam_vector <- read.table("bam_file.tsv", header = TRUE)
bam_file <- bam_vector$BamPath
names(bam_file) <- bam_vector$SampleID

# step2: genomic reference
if (ref_type == "RData") {
  # data from built reference
  require(repmis)
  URL <- "https://github.com/RJWANGbioinfo/PAS_reference_RData/blob/master/"
  source_data(paste0(URL, ref_path, "?raw=True"))
  
  if (ref_path == "mm9_REF.RData") {
    refUTRraw_temp <- refUTRraw
    dfIPAraw_temp <- dfIPA
    dfLEraw_temp <- dfLE
  } else if (ref_path == "hg19_REF.RData") {
    refUTRraw_temp <- refUTRraw_hg19
    dfIPAraw_temp <- dfIPA_hg19
    dfLEraw_temp <- dfLE_hg19
  }
  
} else if (ref_type == "gtf") {
  # building reference from gtf file
  PASREFraw <- PAS2GEF(ref_path)  
  
  refUTRraw_temp <- PASREFraw$refUTRraw
  dfIPAraw_temp <- PASREFraw$dfIPA
  dfLEraw_temp <- PASREFraw$dfLE
}

# step3: whether to choose chromosome
if (chrom == "all") {
  UTRdbraw <- refUTRraw_temp
  dfIPAraw <- dfIPAraw_temp
  dfLEraw <- dfLEraw_temp   
} else {
  # multiple chromosome or not
  if (length(grep(":", chrom)) > 0) {
    chroms <- unlist(strsplit(chrom, ":"))
  } else {
    chroms <- chrom
  }
  UTRdbraw <- refUTRraw_temp[which(refUTRraw_temp$Chrom %in% chroms), ]
  dfIPAraw <- dfIPAraw_temp[which(dfIPAraw_temp$Chrom %in% chroms), ]
  dfLEraw <- dfLEraw_temp[which(dfLEraw_temp$Chrom %in% chroms), ]
}
## aUTR cUTR
PASREF_temp <- REF4PAS(UTRdbraw, dfIPAraw, dfLEraw)
UTRdb <- PASREF_temp$UTRdbraw
dfIPA <- PASREF_temp$dfIPA
dfLE <- PASREF_temp$dfLE  

# step4: APA expression (3UTR and IPA)
if (expr_type == "all") {
  # 3UTR
  UTR_APA_OUT <- PASEXP_3UTR(UTRdb, bam_file, Strandtype = "forward")
  # IPA
  IPA_OUT <- PASEXP_IPA(dfIPA, dfLE, bam_file, Strandtype = "invert", nts = 4)
  
  final_OUT <- list(UTR = UTR_APA_OUT,
                    IPA = IPA_OUT)
} else if (expr_type == "3UTR") { 
  # 3UTR
  final_OUT <- PASEXP_3UTR(UTRdb, bam_file, Strandtype = "forward")  
} else if (expr_type == "IPA") { 
  final_OUT <- PASEXP_IPA(dfIPA, dfLE, bam_file, Strandtype = "invert", nts = 4)
}

# step5: output
if (!dir.exists(dir)) {
  dir.create(dir, recursive = TRUE)
}

if (!is.data.frame(final_OUT)) {
  file_name <- paste0(dir, "/APA_Expr_", expr_type, ".RDS")
  saveRDS(final_OUT, file_name, compress = TRUE)
} else {
  file_name <- paste0(dir, "/APA_Expr_", expr_type, ".tsv")
  write.table(final_OUT, file_name, quote = F, row.names = F, sep = "\t")
}

print("Program Ended without Problems")

运行

在命令行模式下运行该命令

Rscript APAlyzer_Expression.R \
  -b bam_file.tsv \
  -r RData \
  -g mm9_REF.RData \
  -c chr19 \
  -e 3UTR \
  -o result

原文地址:https://blog.csdn.net/H20230717/article/details/142829351

免责声明:本站文章内容转载自网络资源,如本站内容侵犯了原著者的合法权益,可联系本站删除。更多内容请关注自学内容网(zxcms.com)!