Bioinformatics script using Python/Biopython/Clustalw using stdout to iterate over a directory of proteins

What exactly is the error you are seeing? You shouldn’t set sys.sterr and sys.stdout to string values (the clustalw_cline() function returns the clustal stderr and stdout as strings), as you won’t be able to write anything to stdout from python.

I tried to clean up and correct your code below.

#!/usr/bin/env python

import Bio
import os
from glob import glob
from Bio.Align.Applications import ClustalwCommandline
from Bio import Seq
from Bio import SeqIO
from Bio import AlignIO
from Bio.SeqRecord import SeqRecord
import subprocess
from subprocess import Popen
clustal_loc=r"/Users/Wes/Desktop/eggNOG_files/clustalw-2.1-macosx/clustalw2"

try:
    folder= raw_input("Enter the folder of .fasta files to iterate over and align: ")
    listing = glob(os.path.join(folder, '*.fasta'))
    for infile in listing:
        print infile
        with open(os.path.splitext(infile) + '.pir') as f_out:
            with open(infile) as f_in:
                for record in SeqIO.parse(infile,"fasta"):
                        id = record.id
                        seq = record.seq
                        print("Name: %s, size: %s"%(id,len(seq)))
                assert os.path.isfile(clustal_loc), "Clustal W not found"
                clustalw_cline= ClustalwCommandline(clustal_loc,infile=f_in,
                                                    outfile=f_out, align=True, 
                                                    outorder="ALIGNED",convert=True, output="pir")
                stdout, stderr = clustalw_cline()
except Exception:
    print("There was a problem aligning. Check ClustalW path and .fasta folder format/location")

Read more here: Source link