Get gene names from rs SNP ids

Gene to rs id

library(biomaRt)

## It might take long time to process if many genes (>50)  in the list.

## hgnc_gene_symbols.txt is the file that has the list of gene symbols one per line.
genes <- read.table("~/hgnc_gene_symbols.txt")

ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl")
dbsnp = useMart("snp", dataset = "hsapiens_snp")

getHGNC2ENSG = getBM(attributes=c('chromosome_name', 'start_position',
                                  'end_position', 'strand', 'ensembl_gene_id',
                                  'hgnc_symbol', 'refseq_mrna'),
                     filters="hgnc_symbol", values = genes, mart = ensembl)

write.table(getHGNC2ENSG, file="~/hgnc_gene_symbols.txt.ensg.coord.tsv",
            sep="t", col.names=T, row.names=T, append = F, quote=FALSE)

getRSid4ENSG <- getBM(c('refsnp_id', 'allele', 'snp', 'chr_name', 'chrom_start',
                        'chrom_strand', 'associated_gene', 'ensembl_gene_stable_id', 
                        'synonym_name', 'consequence_type_tv'), 
                      filters="ensembl_gene",  values = genes, mart = dbsnp)

write.table(getRSid4ENSG, file="~/hgnc_gene_symbols.txt.ensg.RSid.coord.tsv", 
            sep="t", col.names=T, row.names=T, append = F, quote=FALSE)

Read more here: Source link