fix key error with dssp file parse

Hello I am getting a key error that says:

/opt/anaconda3/lib/python3.8/site-packages/Bio/PDB/PDBParser.py:395: PDBConstructionWarning: Ignoring unrecognized record ‘ 1242 ‘ at line 1270
warnings.warn(
Traceback (most recent call last):
File “dsspparse.py”, line 6, in <module>
model = structure[1]
File “/opt/anaconda3/lib/python3.8/site-packages/Bio/PDB/Entity.py”, line 42, in __getitem__
return self.child_dict[id]
KeyError: 1

whilst using this code:
`from Bio.PDB import PDBParser
from Bio.PDB.DSSP import DSSP

p = PDBParser()
structure = p.get_structure(“4XGV”, “/Users/username/Desktop/DSSP/4XGV.dssp”)
model = structure[1]

dssp = DSSP(model, ‘/Users/username/Desktop/DSSP/4XGV.dssp’, dssp=’mkdssp’)

def make_dssp_dict(filename):
“””DSSP dictionary mapping identifiers to properties.
Return a DSSP dictionary that maps (chainid, resid) to
aa, ss and accessibility, from a DSSP file.

Parameters
----------
filename : string
    the DSSP output file
"""
with open(filename) as handle:
    return _make_dssp_dict(handle)

def _make_dssp_dict(handle):
“””Return a DSSP dictionary, used by mask_dssp_dict (PRIVATE).
DSSP dictionary maps (chainid, resid) to an amino acid,
secondary structure symbol, solvent accessibility value, and hydrogen bond
information (relative dssp indices and hydrogen bond energies) from an open
DSSP file object.

Parameters
----------
handle : file
    the open DSSP output file handle
"""
dssp = {}
start = 0
keys = []
for l in handle:
    sl = l.split()
    if len(sl) < 2:
        continue
    if sl[1] == "RESIDUE":
        # Start parsing from here
        start = 1
        continue
    if not start:
        continue
    if l[9] == " ":
        # Skip -- missing residue
        continue

    dssp_index = int(l[:5])
    resseq = int(l[5:10])
    icode = l[10]
    chainid = l[11]
    aa = l[13]
    ss = l[16]
    if ss == " ":
        ss = "-"
    try:
        NH_O_1_relidx = int(l[38:45])
        NH_O_1_energy = float(l[46:50])
        O_NH_1_relidx = int(l[50:56])
        O_NH_1_energy = float(l[57:61])
        NH_O_2_relidx = int(l[61:67])
        NH_O_2_energy = float(l[68:72])
        O_NH_2_relidx = int(l[72:78])
        O_NH_2_energy = float(l[79:83])

        acc = int(l[34:38])
        phi = float(l[103:109])
        psi = float(l[109:115])
    except ValueError as exc:
        # DSSP output breaks its own format when there are >9999
        # residues, since only 4 digits are allocated to the seq num
        # field.  See 3kic chain T res 321, 1vsy chain T res 6077.
        # Here, look for whitespace to figure out the number of extra
        # digits, and shift parsing the rest of the line by that amount.
        if l[34] != " ":
            shift = l[34:].find(" ")

            NH_O_1_relidx = int(l[38 + shift : 45 + shift])
            NH_O_1_energy = float(l[46 + shift : 50 + shift])
            O_NH_1_relidx = int(l[50 + shift : 56 + shift])
            O_NH_1_energy = float(l[57 + shift : 61 + shift])
            NH_O_2_relidx = int(l[61 + shift : 67 + shift])
            NH_O_2_energy = float(l[68 + shift : 72 + shift])
            O_NH_2_relidx = int(l[72 + shift : 78 + shift])
            O_NH_2_energy = float(l[79 + shift : 83 + shift])

            acc = int(l[34 + shift : 38 + shift])
            phi = float(l[103 + shift : 109 + shift])
            psi = float(l[109 + shift : 115 + shift])
        else:
            raise ValueError(exc) from None
    res_id = (" ", resseq, icode)
    dssp[(chainid, res_id)] = (
        aa,
        ss,
        acc,
        phi,
        psi,
        dssp_index,
        NH_O_1_relidx,
        NH_O_1_energy,
        O_NH_1_relidx,
        O_NH_1_energy,
        NH_O_2_relidx,
        NH_O_2_energy,
        O_NH_2_relidx,
        O_NH_2_energy,
    )
    keys.append((chainid, res_id))
return dssp, keys

make_dssp_dict(‘/Users/username/Desktop/DSSP/4XGV.dssp’)
_make_dssp_dict(handle)`

Read more here: Source link