Nextflow Singularity/Apptainer error: command not found

I have a nextflow workflow for which I am running with sge + apptainer profile using the command nextflow run main.nf -profile sge,apptainer
but I am receiving the error line #: bwa: command not found

I have tried running apptainer run workflow.sif to check, and bwa as well as other tools seem to be properly installed. I am not sure why nextflow is not finding the tools…

The workflow.sif file is built by running apptainer build workflow.sif Apptainer

The Apptainer file to build the .sif is:

Bootstrap: docker
From: rocker/r-ubuntu:22.04

%post
# automake
apt-get update \
    && apt-get install -y --no-install-recommends build-essential automake bzip2 wget unzip \
    python3 python3-dev python3-pip python3-venv git git-lfs default-jdk ant \
    libbz2-dev libsdl1.2-dev liblzma-dev libcurl4-openssl-dev zlib1g-dev libxml2-dev \
    r-cran-tidyverse bwa samtools multiqc datamash && rm -rf /var/lib/apt/lists/*

# CONDA 
%environment
    export LC_ALL=C
    export LC_NUMERIC=en_GB.UTF-8
    export PATH="/opt/miniconda/bin:$PATH"
%post
    #essential stuff but minimal
    apt update
    #for security fixe:
    #apt upgrade -y
    apt install -y wget bzip2
    #install conda
    cd /opt
    rm -fr miniconda
    #miniconda3: get miniconda3 version 4.7.12
    wget https://repo.continuum.io/miniconda/Miniconda3-4.7.12-Linux-x86_64.sh -O miniconda.sh
    #install conda
    bash miniconda.sh -b -p /opt/miniconda
    export PATH="/opt/miniconda/bin:$PATH"
    #add channels
    conda config --add channels defaults
    conda config --add channels bioconda
    conda config --add channels conda-forge
    #install trimmomatic
    conda install -y -c conda-forge -c bioconda nextflow
    conda install -y -c conda-forge -c bioconda trimmomatic
    conda install -y -c conda-forge -c bioconda gatk4
    conda install -y -c conda-forge -c bioconda fastqc
    #cleanup
    conda clean -y --all
    rm -f /opt/miniconda.sh
    apt autoremove --purge
    apt clean

# RSTUDIO
mkdir -p /usr/local/lib/R/etc/ /usr/lib/R/etc/
echo "options(repos = c(CRAN = 'https://cran.rstudio.com/'), download.file.method = 'libcurl', Ncpus = 4)" | tee /usr/local/lib/R/etc/Rprofile.site | tee /usr/lib/R/etc/Rprofile.site
R -e 'install.packages("remotes")'
# Update apt-get
Rscript -e 'install.packages("remotes", version = "2.4.2")'
Rscript -e 'remotes::install_cran("rmarkdown",upgrade="never", version = "2.19")'
Rscript -e 'remotes::install_cran("knitr",upgrade="never", version = "1.41")'
Rscript -e 'remotes::install_cran("tidyverse",upgrade="never", version = "1.3.2")'
Rscript -e 'remotes::install_cran("plotly",upgrade="never", version = "4.10.1")'
Rscript -e 'remotes::install_cran("RColorBrewer",upgrade="never", version = "1.1-3")'
Rscript -e 'remotes::install_cran("data.table",upgrade="never", version = "1.14.6")'
Rscript -e 'remotes::install_cran("viridis",upgrade="never", version = "0.6.2")'
Rscript -e 'remotes::install_cran("DT",upgrade="never", version = "0.26")'

%runscript
exec /bin/bash "$@"
%startscript
exec /bin/bash "$@"

The nextflow.config is:

params {
    ...

    max_memory      = 10.GB       
    max_cpus        = 4    
    max_time="48.h"         
}

process {
    withLabel: big_mem {
        cpus = "${params.max_cpus}"
        memory = "${params.max_memory}"
        time = "${params.max_time}"
        penv = 'smp' 
    }
}

profiles {
    conda {
        conda.enabled = true
        docker.enabled = false
        apptainer.enabled = false
        process.conda = "./envs/env.yml"
    }
    mamba {
        conda.enabled       = true
        conda.useMamba      = true
        docker.enabled      = false
        apptainer.enabled   = false
    }
    docker {
        conda.enabled           = false
        docker.enabled          = true
        docker.userEmulation    = true
        apptainer.enabled       = false
        process.container       = "directory/myworkflow:latest"
    }
    apptainer {
        conda.enabled           = false
        apptainer.enabled       = true
        apptainer.autoMounts    = true
        docker.enabled          = false
        process.container="file://myworkflow.sif"
    }
    sge {
        process {
            executor        = "sge"
            scratch         = true
            stageInMode     = "copy"
            stageOutMode    = "move"
            errorStrategy   = "retry"
            clusterOptions="-S /bin/bash -o job.log -e job.err"
        }
        executor {
            queueSize = 1000
        }
    } 

}

manifest {
    name="directory/myworkflow"
    homePage="https://github.com/directory/myworkflow"
    description     = 'analysis pipeline'
    mainScript="main.nf"
    nextflowVersion = '!>=22.10.0'
    version         = '1.1.0'
}

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
}

// keep trace
trace {
    enabled = true
    file = "${params.outdir}/trace.txt"
    overwrite = true
}

// keep report
report {
    enabled = true
    file = "${params.outdir}/report.html"
    overwrite = true
}

// Function to ensure that resource requirements don't go beyond
// a maximum limit
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}

Read more here: Source link