#!/usr/bin/perl -w use strict; @ARGV==1 or die "". "Error in syntax!\n". " ./get_pdb_pep.pl \n". "For example:\n". " ./get_pdb_pep.pl 1Z0Q\n"; my $egFileIn = ""; my $egFileOut = ""; my $egTag = $ARGV[0]; my $tline = ""; my @tlist = (); if( (-e "$egTag.pdb") ) { $egFileIn = $egTag.".pdb"; $egFileOut = $egTag.".fasta"; } else { die "Error! The PDB data file should \"$egTag.pdb\" in the current directory!\n"; } my $tempi = 0; my $tempj = 0; my $tempk = 0; my %egAA = ( "ALA" => "A", "CYS" => "C", "ASP" => "D", "GLU" => "E", "PHE" => "F", "GLY" => "G", "HIS" => "H", "ILE" => "I", "LYS" => "K", "LEU" => "L", "MET" => "M", "ASN" => "N", "PRO" => "P", "GLN" => "Q", "ARG" => "R", "SER" => "S", "THR" => "T", "VAL" => "V", "TRP" => "W", "TYR" => "Y", ); print "Loading PDB data from $egFileIn ... "; my $egSeq = ""; my $egChain = ""; my $egStart = 0; my $egEnd = 0; my $egNow = 0; my $tid = 0; my $tchain = ""; my $taai = ""; my $taa = ""; my $tcchain = 0; my $tcaa = 0; my $tfirstmodel = 0; # Only one model open(efIn, "$egFileIn") or die "Error when loading PDB data!\n"; open(efOut, ">$egFileOut") or die "Error when saving peptide sequences from the PDB file $egFileIn\n"; while() { if( $_=~/^MODEL\s+(\d+)/ ) { if( $1 > 1 ) { $tfirstmodel = 1; #print "We only retrieve the first MODEL from the PDB file!\n"; last ; } } elsif( $_=~/^ATOM(.*)$/ ) { $tline=$_; $tline=~s/[\r\n]//g; @tlist = split(//, $tline); $tchain = $tlist[20].$tlist[21]; $tchain=~s/^\s+//g; $tchain=~s/\s+$//g; $taai = $tlist[17].$tlist[18].$tlist[19]; $tid = int($tlist[22].$tlist[23].$tlist[24].$tlist[25]); $taa = $egAA{"$taai"}; $tcaa++; #die "---'$tline'---\n---'$taai'---'$tchain'---'$tid'---\n"; if( ! defined $taa ) { #die "Error! Non Amino Acid appeared!\n"; } else { $egChain = $tchain; $egSeq = $taa; $egStart = $tid; $egEnd = $tid; $egNow = $tid; while( () ) { if( $_=~/^ATOM(.*)$/ ) { $tline=$_; $tline=~s/[\r\n]//g; @tlist = split(//, $tline); $tchain = $tlist[20].$tlist[21]; $tchain=~s/^\s+//g; $tchain=~s/\s+$//g; $taai = $tlist[17].$tlist[18].$tlist[19]; $tid = int($tlist[22].$tlist[23].$tlist[24].$tlist[25]); $taa = $egAA{"$taai"}; if( ! defined $taa ) { die "Error! Non Amino Acid appeared: $taai\n"; } elsif( $tchain eq $egChain ) { if( $tid == $egNow+1 ) { $egSeq = $egSeq.$taa; $egEnd = $tid; $egNow = $tid; $tcaa++; } elsif( $tid == $egNow ) { } else { if( length($egChain)==0 ) { $egChain = "DefaultChain"; } if( (length($egChain)>0) and ($egStart>=1) and ($egEnd>=$egStart) ) { print efOut ">$egTag;;$egChain;;$egStart;;$egEnd\n"; print efOut "$egSeq\n"; $tcchain++; } $egChain = ""; $egStart = 0; $egEnd = 0; $egSeq = ""; $egNow = 0; last ; } } else { if( length($egChain)==0 ) { $egChain = "DefaultChain"; } if( (length($egChain)>0) and ($egStart>=1) and ($egEnd>=$egStart) ) { print efOut ">$egTag;;$egChain;;$egStart;;$egEnd\n"; print efOut "$egSeq\n"; $tcchain++; } $egChain = ""; $egStart = 0; $egEnd = 0; $egSeq = ""; $egNow = 0; last ; } } else { if( length($egChain)==0 ) { $egChain = "DefaultChain"; } if( (length($egChain)>0) and ($egStart>=1) and ($egEnd>=$egStart) ) { print efOut ">$egTag;;$egChain;;$egStart;;$egEnd\n"; print efOut "$egSeq\n"; $tcchain++; } $egChain = ""; $egStart = 0; $egEnd = 0; $egSeq = ""; $egNow = 0; last ; } } } } } close(efIn); close(efOut); print "[done with $tcaa amino acids in $tcchain chains]\n"; if( $tfirstmodel == 1 ) { print "We only retrieve the first MODEL from the PDB file!\n"; }