#! /usr/bin/nawk -f # # # Converts protin - readable PDB to O-readable PDB # # BEGIN { } /^TER/{ print terminal "TER" terminal = "" lastRESnum = 0 } /^ATOM/ { # extract interesting columns entry=$0 # change N-terminal groups back to ACE front = substr(entry, 0, 12) ATOM = substr(entry, 13, 4) Conf = substr(entry, 17, 1) RES = substr(entry, 18, 3) chain = substr(entry, 21, 2) RESnum = substr(entry, 23, 4) XYZ = substr(entry, 28, 27) occ = substr(entry, 55, 6) B = substr(entry, 61, 6) back = substr(entry, 67) # modify acetyl terminus if(RESnum == " 1") { if(ATOM == " CT2") { RES = "GLY"; ATOM = " CA "; RESnum = " 0"} if(ATOM == " CT1") { RES = "GLY"; ATOM = " C "; RESnum = " 0"} if(ATOM == " OT ") { RES = "GLY"; ATOM = " O "; RESnum = " 0"} } # carboxy terminus if(ATOM == " OT ") { # turn it into something we can recognize later ATOM = " N " RES = "GLY" occ += 100 occ = sprintf("%6.2f", occ ) RESnum = sprintf("%4d", RESnum+1) } # add terminators (if not already there) if(RESnum+0 < lastRESnum-2) { print terminal "TER" terminal = "" } # fix selenomethionine if((RES == "MET")&&(ATOM == "SED ")) { ATOM = " SD " occ += 100 } # multiple conformers # shorthand ID = RES RESnum chain # if(RESnum != lastRESnum) resbuffer = "" if((Conf != " ")) { if(Conf != "A") { # O needs ENTIRE residue in order to build it properly printf resbuffer[ID Conf] resbuffer[ID Conf] = "" } else { # conformer "A" becomes normal residue number in O Conf = " " } } else { # collect a buffer of all atoms NOT in multiple conformers resbuffer[ID "B"] = resbuffer[ID "B"] front ATOM " " RES chain RESnum "B" XYZ occ B back "\n" resbuffer[ID "C"] = resbuffer[ID "C"] front ATOM " " RES chain RESnum "C" XYZ occ B back "\n" resbuffer[ID "D"] = resbuffer[ID "D"] front ATOM " " RES chain RESnum "D" XYZ occ B back "\n" } # do something else? print front ATOM " " RES chain RESnum Conf XYZ occ B back lastRESnum = RESnum } ! /^ATOM/ && ! /^TER/{ print; } END { # print "TER" print "END" }