Gene to rs id
library(biomaRt)
## It might take long time to process if many genes (>50) in the list.
## hgnc_gene_symbols.txt is the file that has the list of gene symbols one per line.
genes <- read.table("~/hgnc_gene_symbols.txt")
ensembl = useMart("ensembl", dataset="hsapiens_gene_ensembl")
dbsnp = useMart("snp", dataset = "hsapiens_snp")
getHGNC2ENSG = getBM(attributes=c('chromosome_name', 'start_position',
'end_position', 'strand', 'ensembl_gene_id',
'hgnc_symbol', 'refseq_mrna'),
filters="hgnc_symbol", values = genes, mart = ensembl)
write.table(getHGNC2ENSG, file="~/hgnc_gene_symbols.txt.ensg.coord.tsv",
sep="t", col.names=T, row.names=T, append = F, quote=FALSE)
getRSid4ENSG <- getBM(c('refsnp_id', 'allele', 'snp', 'chr_name', 'chrom_start',
'chrom_strand', 'associated_gene', 'ensembl_gene_stable_id',
'synonym_name', 'consequence_type_tv'),
filters="ensembl_gene", values = genes, mart = dbsnp)
write.table(getRSid4ENSG, file="~/hgnc_gene_symbols.txt.ensg.RSid.coord.tsv",
sep="t", col.names=T, row.names=T, append = F, quote=FALSE)
Read more here: Source link