Agilent-016436 Human miRNA Microarray 1.0

Upon request, a quick tutorial for processing the Agilent micro-RNA (miRNA) microarray data of GSE28955.

The raw TXT files are contained in: ftp.ncbi.nlm.nih.gov/geo/series/GSE28nnn/GSE28955/suppl/GSE28955_RAW.tar

  1. Download this TAR file
  2. Unpack it [the TAR file]
  3. Unzip the txt.gz files
  4. Store these [txt files] in a directory raw/

Then, create a tab-delimited file, targets.txt, that looks like:

FileName            desc    group
raw/GSM717459.txt   AsPC-1  cancer
raw/GSM717460.txt   BxPC-3  cancer
raw/GSM717461.txt   Capan-1 cancer
raw/GSM717462.txt   Capan-2 cancer
raw/GSM717463.txt   CFPAC-1 cancer
raw/GSM717464.txt   DanG    cancer
raw/GSM717465.txt   HPAC    cancer
raw/GSM717466.txt   HPAF-II cancer
raw/GSM717467.txt   Hs700T  cancer
raw/GSM717468.txt   Hs766T  cancer
raw/GSM717469.txt   HupT3   cancer
raw/GSM717470.txt   HupT4   cancer
raw/GSM717471.txt   MIAPaCa-2   cancer
raw/GSM717472.txt   Panc-1  cancer
raw/GSM717473.txt   SU.86.86    cancer
raw/GSM717474.txt   SW1990  cancer
raw/GSM717475.txt   Ambion  normal
raw/GSM717476.txt   Biochain-1  normal
raw/GSM717477.txt   Biochain-5  normal
raw/GSM717478.txt   Clontech    normal
raw/GSM717479.txt   Pool_slide1 normal
raw/GSM717480.txt   Pool_slide2 normal
raw/GSM717481.txt   Pool_slide3 normal
targetsfile <- 'targets.txt'
require(limma)
# read in the data
# readTargets will by default look for the 'FileName' column in the specified file
targetinfo <- readTargets(targetsfile, sep = 't')

# convert the data to an EListRaw object, which is a data object for single channel data
# specify green.only = TRUE for Agilent
# retain information about background via gIsWellAboveBG
project <- read.maimages(
  targetinfo,
  source="agilent.median",
  green.only = TRUE,
  other.columns="gIsWellAboveBG")
colnames(project) <- sub('raw\/', '', colnames(project))
# perform background correction on the fluorescent intensities
project.bgcorrect <- backgroundCorrect(project, method = 'normexp')

# normalize the data with the 'quantile' method
project.bgcorrect.norm <- normalizeBetweenArrays(project.bgcorrect,
  method = 'quantile')
# filter out:
#   - control probes
#   - probes with no name
#   - probes whose signal falls below background in 3 or more samples
Control <- project.bgcorrect.norm$genes$ControlType==1L
NoSymbol <- is.na(project.bgcorrect.norm$genes$GeneName)
IsExpr <- rowSums(project.bgcorrect.norm$other$gIsWellAboveBG > 0) >= 3
project.bgcorrect.norm.filt <- project.bgcorrect.norm[!Control & !NoSymbol & IsExpr, ]
dim(project.bgcorrect.norm)
dim(project.bgcorrect.norm.filt)

# for replicate probes, replace values with the mean
# ID is used to identify the replicates
project.bgcorrect.norm.filt.mean <- avereps(project.bgcorrect.norm.filt,
  ID = project.bgcorrect.norm.filt$genes$ProbeName)
dim(project.bgcorrect.norm.filt.mean)

micro RNA annotation

project.bgcorrect.norm.filt.mean$genes)
   Row Col ProbeUID ControlType      ProbeName      GeneName SystematicName
4    1   4        5           0 A_25_P00010390   hsa-miR-30b    hsa-miR-30b
5    1   5        7           0 A_25_P00010956   hsa-miR-379    hsa-miR-379
7    1   7       10           0 A_25_P00010912   hsa-miR-634    hsa-miR-634
10   1  11       16           0 A_25_P00010666   hsa-miR-662    hsa-miR-662
16   1  22       31           0 A_25_P00010697 hsa-miR-519e*  hsa-miR-519e*
17   1  23       33           0 A_25_P00010752    hsa-miR-32     hsa-miR-32

expression data

project.bgcorrect.norm.filt.mean$E[1:5,1:5]
               GSM717459 GSM717460 GSM717461 GSM717462 GSM717463
A_25_P00010390  7.892000  6.715211  6.396424  7.001778  6.975865
A_25_P00010956  4.823109  4.737696  4.836087  4.814941  4.829373
A_25_P00010912  5.078833  5.173138  5.252949  5.195864  5.093929
A_25_P00010666  5.994890  6.508853  6.375115  6.377417  6.506460
A_25_P00010697  5.279065  5.336263  5.215905  5.150749  5.140071

See some simple code here: build the expression matrix step by step from GEO raw data

For example:

hist(project.bgcorrect.norm.filt.mean$E)
boxplot(project.bgcorrect.norm.filt.mean$E)

require(PCAtools)
p <- pca(
  project.bgcorrect.norm.filt.mean$E,
  metadata = project.bgcorrect.norm.filt.mean$targets)
biplot(p, colby = 'group', legendPosition = 'bottom')

Captura-de-tela-de-2021-08-18-01-08-47

targets <- project.bgcorrect.norm.filt.mean$targets
targets$group <- factor(targets$group, levels = c('normal','cancer'))
expr <- project.bgcorrect.norm.filt.mean$E

# Create the study design
design <- model.matrix(~ 0 + targets$group)
colnames(design) <- c('normal', 'cancer')

# Fit the linear model on the study's data
project.fitmodel <- lmFit(
  expr,
  design)

# Applying the empirical Bayes method to the fitted values
# Acts as an extra normalisation step and aims to bring the different
#   probe-wise variances to common values
project.fitmodel.eBayes <- eBayes(project.fitmodel)
names(project.fitmodel.eBayes)

# Make individual contrasts: cancer vs normal
res <- makeContrasts(res="cancer-normal", levels = design)
res.fitmodel <- contrasts.fit(project.fitmodel.eBayes, res)
res.fitmodel.eBayes <- eBayes(res.fitmodel)

toptable <- topTable(
  res.fitmodel.eBayes,
  adjust="BH",
  coef="res",
  number = Inf,
  p.value = 1)
head(toptable)

                   logFC  AveExpr         t      P.Value    adj.P.Val        B
A_25_P00010700 -3.827306 5.923667 -46.66596 2.575366e-24 1.506589e-21 45.60756
A_25_P00010424 -4.392465 6.047750 -37.79800 3.112054e-22 9.102757e-20 41.00203
A_25_P00010014 -4.394076 6.138899 -33.61272 4.435544e-21 8.149164e-19 38.39282
A_25_P00010110 -5.514454 6.420097 -33.27500 5.572078e-21 8.149164e-19 38.16726
A_25_P00010425 -3.217659 5.653281 -31.37645 2.096846e-20 2.453310e-18 36.85255
A_25_P00010109 -6.444659 6.695490 -29.48774 8.475559e-20 8.263670e-18 35.45955

Kevin

Read more here: Source link