Hi, I would use the pre-built Bioconductor annotation databases / packages for this array (I have used this array a few times over the years):
- mogene10sttranscriptcluster.db
- mogene10stprobeset.db
Most likely mogene10sttranscriptcluster.db
is what you want:
require(mogene10sttranscriptcluster.db)
columns(mogene10sttranscriptcluster.db)
[6] "ENTREZID" "ENZYME" "EVIDENCE" "EVIDENCEALL" "GENENAME"
[11] "GO" "GOALL" "IPI" "MGI" "ONTOLOGY"
[16] "ONTOLOGYALL" "PATH" "PFAM" "PMID" "PROBEID"
[21] "PROSITE" "REFSEQ" "SYMBOL" "UNIGENE" "UNIPROT"
head(keys(mogene10sttranscriptcluster.db))
[1] "10338001" "10338002" "10338003" "10338004" "10338005" "10338006"
annotTable <- select(
mogene10sttranscriptcluster.db,
keys = keys(mogene10sttranscriptcluster.db),
column = c('PROBEID', 'SYMBOL', 'ENTREZID', 'ENSEMBL'),
keytype="PROBEID")
head(annotTable)
PROBEID SYMBOL ENTREZID ENSEMBL
1 10338001 <NA> <NA> <NA>
2 10338002 <NA> <NA> <NA>
3 10338003 <NA> <NA> <NA>
4 10338004 <NA> <NA> <NA>
5 10338005 <NA> <NA> <NA>
6 10338006 <NA> <NA> <NA>
dim(annotTable)
[1] 53244 4
There is data there – don’t worry / no te preocupes / لا تقلق:
head(annotTable[!is.na(annotTable$SYMBOL),])
PROBEID SYMBOL ENTREZID ENSEMBL
6618 10344624 Lypla1 18777 ENSMUSG00000025903
6619 10344633 Tcea1 21399 ENSMUSG00000033813
6620 10344637 Atp6v1h 108664 ENSMUSG00000033793
6621 10344653 Oprk1 18387 ENSMUSG00000025905
6622 10344658 Rb1cc1 12421 ENSMUSG00000025907
6623 10344674 Alkal1 620393 ENSMUSG00000087247
dim(annotTable[!is.na(annotTable$SYMBOL),])
[1] 42056 4
annotTable.filt <- annotTable[!is.na(annotTable$SYMBOL),]
# select 10 random probes
probes <- annotTable.filt$PROBEID[sample(1:nrow(annotTable.filt), 10)]
mapIds(
mogene10sttranscriptcluster.db,
keys = probes,
column = 'SYMBOL',
keytype="PROBEID")
10367033 10519951 10396485 10431424 10398173 10550770 10403558
"Zbtb39" "Gsap" "Syne2" "Plxnb2" "Vrk1" "Vmn1r114" "Ero1lb"
10363512 10596053 10608237
"Sar1a" "Pccb" "Gm20736"
Kevin
Read more here: Source link