https://bioinformatics-core-shared-training.github.io/RNAseq-R/align-and-count.nb.html
https://bioinf.wehi.edu.au/Rsubread/annot/
library(Rsubread)
library(edgeR)
library(limma)
library(annotables)
fastq.files <- list.files(path = "~/Desktop/RNAseq", pattern = ".fastq.gz$", full.names = TRUE)
buildindex(basename="GRCh38", reference="~/Downloads/GRCh38.primary_assembly.genome.fa.gz”)
align(index="GRCh38", readfile1 = fastq.files)
bam.files <- list.files(path = "~/Desktop/RNAseq", pattern = ".BAM$", full.names = TRUE)
fc <- featureCounts(bam.files, annot.inbuilt="GRCh38")
#For Ensembl and RefSeq-NCBI annotations, they were provided via the ‘annot.ext’ parameter. For RefSeq-Rsubread annotation, it was provided by specifying ‘annot.inbuilt = “hg38”’. The ‘useAnnotation’ parameter was also set to ‘TRUE’ when an annotation was provided to the align function.
#The Ensembl gene annotation used in this study was generated in April 2020. Its version number is 100. It was downloaded from ftp://ftp.ensembl.org/pub/release-100/gtf/homo_sapiens/Homo_sapiens.GRCh38.100.gtf.gz.
#The recent RefSeq gene annotation used was released by the NCBI in August 2020. Its release number is 109.20200815 and it is part of the RefSeq release version 202. It was downloaded from the NCBI FTP #site ftp://ftp.ncbi.nlm.nih.gov/refseq/H_sapiens/annotation/annotation_releases/109.20200815/GCF_000001405.39_GRCh38.p13/GCF_000001405.39_GRCh38.p13_genomic.gtf.gz. We refer this RefSeq annotation as ‘RefSeq-NCBI’ in this study.
UTcount <- as.data.frame(fc$counts)
grch <- as.data.frame(grch38)
UTcount$entrez <- rownames(UTcount)
UT <- merge(grch[, 1:3], UTcount)
write.csv(UT, "UT.csv")