Raw content of Bio::Structure::SecStr::STRIDE::Res # $id $ # # bioperl module for Bio::Structure::SecStr::STRIDE::Res.pm # # Cared for by Ed Green <ed@compbio.berkeley.edu> # # Copyright Univ. of California # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::Structure::SecStr::STRIDE::Res - Module for parsing/accessing stride output =head1 SYNOPSIS my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-file' => 'filename.stride' ); # or my $stride_obj = new Bio::Structure::SecStr::STRIDE::Res( '-fh' => \*STDOUT ); # Get secondary structure assignment for PDB residue 20 of chain A $sec_str = $stride_obj->resSecStr( '20:A' ); # same $sec_str = $stride_obj->resSecStr( 20, 'A' ) =head1 DESCRIPTION STRIDE::Res is a module for objectifying STRIDE output. STRIDE is a program (similar to DSSP) for assigning secondary structure to individual residues of a pdb structure file. ( Knowledge-Based Protein Secondary Structure Assignment, PROTEINS: Structure, Function, and Genetics 23:566-579 (1995) ) STRIDE is available here: http://www.embl-heidelberg.de/argos/stride/down_stride.html Methods are then available for extracting all of the infomation present within the output or convenient subsets of it. Although they are very similar in function, DSSP and STRIDE differ somewhat in output format. Thes differences are reflected in the return value of some methods of these modules. For example, both the STRIDE and DSSP parsers have resSecStr() methods for returning the secondary structure of a given residue. However, the range of return values for DSSP is ( H, B, E, G, I, T, and S ) whereas the range of values for STRIDE is ( H, G, I, E, B, b, T, and C ). See individual methods for details. The methods are roughly divided into 3 sections: 1. Global features of this structure (PDB ID, total surface area, etc.). These methods do not require an argument. 2. Residue specific features ( amino acid, secondary structure, solvent exposed surface area, etc. ). These methods do require an arguement. The argument is supposed to uniquely identify a residue described within the structure. It can be of any of the following forms: ('#A:B') or ( #, 'A', 'B' ) || | || - Chain ID (blank for single chain) |--- Insertion code for this residue. Blank for most residues. |--- Numeric portion of residue ID. (#) | --- Numeric portion of residue ID. If there is only one chain and it has no ID AND there is no residue with an insertion code at this number, then this can uniquely specify a residue. ('#:C') or ( #, 'C' ) | | | -Chain ID ---Numeric portion of residue ID. If a residue is incompletely specified then the first residue that fits the arguments is returned. For example, if 19 is the argument and there are three chains, A, B, and C with a residue whose number is 19, then 19:A will be returned (assuming its listed first). Since neither DSSP nor STRIDE correctly handle alt-loc codes, they are not supported by these modules. 3. Value-added methods. Return values are not verbatem strings parsed from DSSP or STRIDE output. =head1 FEEDBACK =head2 MailingLists UsUser feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to one of the Bioperl mailing lists. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bio.perl.org/MailList.html - About the mailing lists =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bio.perl.org http://bugzilla.bioperl.org/ =head1 AUTHOR - Ed Green Email ed@compbio.berkeley.edu =head1 APPENDIX The Rest of the documentation details each method. Internal methods are preceded with a _. =cut package Bio::Structure::SecStr::STRIDE::Res; use strict; use vars qw(@ISA); use Bio::Root::Root; use Bio::Root::IO; use Bio::PrimarySeq; @ISA = qw(Bio::Root::Root); our %ASGTable = ( 'aa' => 0, 'resNum' => 1, 'ssAbbr' => 2, 'ssName' => 3, 'phi' => 4, 'psi' => 5, 'surfArea' => 6 ); our %AATable = ( 'ALA' => 'A', 'ARG' => 'R', 'ASN' => 'N', 'ASP' => 'D', 'CYS' => 'C', 'GLN' => 'Q', 'GLU' => 'E', 'GLY' => 'G', 'HIS' => 'H', 'ILE' => 'I', 'LEU' => 'L', 'LYS' => 'K', 'MET' => 'M', 'PHE' => 'F', 'PRO' => 'P', 'SER' => 'S', 'THR' => 'T', 'TRP' => 'W', 'TYR' => 'Y', 'VAL' => 'V' ); =head2 new Title : new Usage : makes new object of this class Function : Constructor Example : $stride_obj = Bio::Structure::SecStr::STRIDE:Res->new( '-file' => filename # or '-fh' => FILEHANDLE ) Returns : object (ref) Args : filename or filehandle( must be proper STRIDE output ) =cut sub new { my ( $class, @args ) = @_; my $self = $class->SUPER::new( @args ); my $io = Bio::Root::IO->new( @args ); $self->_parse( $io ); # not passing filehandle ! $io->close(); return $self; } # GLOBAL FEATURES / INFO / STATS =head2 totSurfArea Title : totSurfArea Usage : returns sum of surface areas of all residues of all chains considered. Result is memoized. Function : Example : $tot_SA = $stride_obj->totSurfArea(); Returns : scalar Args : none =cut sub totSurfArea { my $self = shift; my $total = 0; my ( $chain, $res ); if ( $self->{ 'SurfArea' } ) { return $self->{ 'SurfArea' }; } else { foreach $chain ( keys %{$self->{ 'ASG' }} ) { for ( my $i = 1; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { $total += $self->{'ASG'}->{$chain}->[$i]->[$ASGTable{'surfArea'}]; } } } $self->{ 'SurfArea' } = $total; return $self->{ 'SurfArea' }; } =head2 numResidues Title : numResidues Usage : returns total number of residues in all chains or just the specified chain Function : Example : $tot_res = $stride_obj->numResidues(); Returns : scalar int Args : none or chain id =cut sub numResidues { my $self = shift; my $chain = shift; my $total = 0; my $key; foreach $key ( keys %{$self->{ 'ASG' }} ) { if ( $chain ) { if ( $key eq $chain ) { $total += $#{$self->{ 'ASG' }{ $key }}; } } else { $total += $#{$self->{ 'ASG' }{ $key }}; } } return $total; } # STRAIGHT FROM THE PDB ENTRY =head2 pdbID Title : pdbID Usage : returns pdb identifier ( 1FJM, e.g. ) Function : Example : $pdb_id = $stride_obj->pdbID(); Returns : scalar string Args : none =cut sub pdbID { my $self = shift; return $self->{ 'PDB' }; } =head2 pdbAuthor Title : pdbAuthor Usage : returns author of this PDB entry Function : Example : $auth = $stride_obj->pdbAuthor() Returns : scalar string Args : none =cut sub pdbAuthor { my $self = shift; return join( ' ', @{ $self->{ 'HEAD' }->{ 'AUT' } } ); } =head2 pdbCompound Title : pdbCompound Usage : returns string of what was found on the CMP lines Function : Example : $cmp = $stride_obj->pdbCompound(); Returns : string Args : none =cut sub pdbCompound { my $self = shift; return join( ' ', @{ $self->{ 'HEAD' }->{ 'CMP' } } ); } =head2 pdbDate Title : pdbDate Usage : returns date given in PDB file Function : Example : $pdb_date = $stride_obj->pdbDate(); Returns : scalar Args : none =cut sub pdbDate { my $self = shift; return $self->{ 'DATE' }; } =head2 pdbHeader Title : pdbHeader Usage : returns string of characters found on the PDB header line Function : Example : $head = $stride_obj->pdbHeader(); Returns : scalar Args : none =cut sub pdbHeader { my $self = shift; return $self->{ 'HEAD' }->{ 'HEADER' }; } =head2 pdbSource Title : pdbSource Usage : returns string of what was found on SRC lines Function : Example : $src = $stride_obj->pdbSource(); Returns : scalar Args : none =cut sub pdbSource { my $self = shift; return join( ' ', @{ $self->{ 'HEAD' }->{ 'SRC' } } ); } # RESIDUE SPECIFIC ACCESSORS =head2 resAA Title : resAA Usage : returns 1 letter abbr. of the amino acid specified by the arguments Function : Examples : $aa = $stride_obj->resAA( RESIDUE_ID ); Returns : scalar character Args : RESIDUE_ID =cut sub resAA { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); } =head2 resPhi Title : resPhi Usage : returns phi angle of specified residue Function : Example : $phi = $stride_obj->resPhi( RESIDUE_ID ); Returns : scaler Args : RESIDUE_ID =cut sub resPhi { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'phi' } ]; } =head2 resPsi Title : resPsi Usage : returns psi angle of specified residue Function : Example : $psi = $stride_obj->resPsi( RESIDUE_ID ); Returns : scalar Args : RESIDUE_ID =cut sub resPsi { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'psi' } ]; } =head2 resSolvAcc Title : resSolvAcc Usage : returns stride calculated surface area of specified residue Function : Example : $sa = $stride_obj->resSolvAcc( RESIDUE_ID ); Returns : scalar Args : RESIDUE_ID =cut sub resSolvAcc { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; } =head2 resSurfArea Title : resSurfArea Usage : returns stride calculated surface area of specified residue Function : Example : $sa = $stride_obj->resSurfArea( RESIDUE_ID ); Returns : scalar Args : RESIDUE_ID =cut sub resSurfArea { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'surfArea' } ]; } =head2 resSecStr Title : resSecStr Usage : gives one letter abbr. of stride determined secondary structure of specified residue Function : Example : $ss = $stride_obj->resSecStr( RESIDUE_ID ); Returns : one of: 'H' => Alpha Helix 'G' => 3-10 helix 'I' => PI-helix 'E' => Extended conformation 'B' or 'b' => Isolated bridge 'T' => Turn 'C' => Coil ' ' => None # NOTE: This range is slightly DIFFERENT from the # DSSP method of the same name Args : RESIDUE_ID =cut sub resSecStr { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssAbbr' } ]; } =head2 resSecStrSum Title : resSecStrSum Usage : gives one letter summary of secondary structure of specified residue. More general than secStruc() Function : Example : $ss_sum = $stride_obj->resSecStrSum( RESIDUE_ID ); Returns : one of: 'H' (helix), 'B' (beta), 'T' (turn), or 'C' (coil) Args : residue identifier(s) ( SEE INTRO NOTE ) =cut sub resSecStrSum { my $self = shift; my @args = @_; my $ss_char = $self->resSecStr( @args ); if ( $ss_char eq 'H' || $ss_char eq 'G' || $ss_char eq 'I' ) { return 'H'; } if ( $ss_char eq 'E' || $ss_char eq 'B' || $ss_char eq 'b' ) { return 'B'; } if ( $ss_char eq 'T' ) { return 'T'; } else { return 'C'; } } # STRIDE SPECIFIC =head2 resSecStrName Title : resSecStrName Usage : gives full name of the secondary structural element classification of the specified residue Function : Example : $ss_name = $stride_obj->resSecStrName( RESIDUE_ID ); Returns : scalar string Args : RESIDUE_ID =cut sub resSecStrName { my $self = shift; my @args = @_; my ( $ord, $chain ) = $self->_toOrdChain( @args ); return $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'ssName' } ]; } =head2 strideLocs Title : strideLocs Usage : returns stride determined contiguous secondary structural elements as specified on the LOC lines Function : Example : $loc_pnt = $stride_obj->strideLocs(); Returns : pointer to array of 5 element arrays. 0 => stride name of structural element 1 => first residue pdb key (including insertion code, if app.) 2 => first residue chain id 3 => last residue pdb key (including insertion code, if app.) 4 => last residue chain id NOTE the differences between this range and the range of SecBounds() Args : none =cut sub strideLocs { my $self = shift; return $self->{ 'LOC' }; } # VALUE ADDED METHODS (NOT JUST PARSE/REPORT) =head2 secBounds Title : secBounds Usage : gets residue ids of boundary residues in each contiguous secondary structural element of specified chain Function : Example : $ss_bound_pnt = $stride_obj->secBounds( 'A' ); Returns : pointer to array of 3 element arrays. First two elements are the PDB IDs of the start and end points, respectively and inclusively. The last element is the STRIDE secondary structural element code (same range as resSecStr). Args : chain identifier ( one character ). If none, '-' is assumed =cut sub secBounds { # Requires a chain name. If left blank, we assume ' ' which equals '-' my $self = shift; my $chain = shift; my @SecBounds; $chain = '-' if ( !( $chain ) || $chain eq ' ' || $chain eq '-' ); # if we've memoized this one, use that if ( $self->{ 'SecBounds' }->{ $chain } ) { return $self->{ 'SecBounds' }->{ $chain }; } #check to make sure chain is valid if ( !( $self->{ 'ASG' }->{ $chain } ) ) { $self->throw( "No such chain: $chain\n" ); } my $cur_element = $self->{ 'ASG' }->{ $chain }->[ 1 ]-> [ $ASGTable{ 'ssAbbr' } ]; my $beg = 1; my $i; for ( $i = 2; $i <= $#{$self->{'ASG'}->{$chain}}; $i++ ) { if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] ne $cur_element ) { push( @SecBounds, [ $beg, $i -1 , $cur_element ] ); $beg = $i; $cur_element = $self->{ 'ASG' }->{ $chain }->[ $i ]-> [ $ASGTable{ 'ssAbbr' } ]; } } if ( $self->{ 'ASG' }->{ $chain }->[ $i ]->[ $ASGTable{ 'ssAbbr' } ] eq $cur_element ) { push( @SecBounds, [ $beg, $i, $cur_element ] ); } else { push( @SecBounds, [ $beg, $i - 1, $cur_element ], [ $i, $i, $self->{ 'ASG' }->{ $chain }->[ $i ]-> [ $ASGTable{ 'ssAbbr' } ] ] ); } $self->{ 'SecBounds' }->{ $chain } = \@SecBounds; return $self->{ 'SecBounds' }->{ $chain }; } =head2 chains Title : chains Usage : gives array chain I.D.s (characters) Function : Example : @chains = $stride_obj->chains(); Returns : array of characters Args : none =cut sub chains { my $self = shift; my @chains = keys ( %{ $self->{ 'ASG' } } ); return \@chains; } =head2 getSeq Title : getSeq Usage : returns a Bio::PrimarySeq object which represents an approximation at the sequence of the specified chain. Function : For most chain of most entries, the sequence returned by this method will be very good. However, it it inherently unsafe to rely on STRIDE to extract sequence information about a PDB entry. More reliable information can be obtained from the PDB entry itself. If a second option is given (and evaluates to true), the sequence generated will have 'X' in spaces where the pdb residue numbers are discontinuous. In some cases this results in a better sequence object (when the discontinuity is due to regions which were present, but could not be resolved). In other cases, it will result in a WORSE sequence object (when the discontinuity is due to historical sequence numbering and all sequence is actually resolved). Example : $pso = $dssp_obj->getSeq( 'A' ); Returns : (pointer to) a PrimarySeq object Args : Chain identifier. If none given, '-' is assumed. =cut sub getSeq { my $self = shift; my $chain = shift; my $fill_in = shift; if ( !( $chain ) ) { $chain = '-'; } if ( $self->{ 'Seq' }->{ $chain } ) { return $self->{ 'Seq' }->{ $chain }; } my ( $seq, $num_res, $last_res_num, $cur_res_num, $i, $step, $id ); $seq = ""; $num_res = $self->numResidues( $chain ); $last_res_num = $self->_pdbNum( 1, $chain ); for ( $i = 1; $i <= $num_res; $i++ ) { if ( $fill_in ) { $cur_res_num = $self->_pdbNum( $i, $chain ); $step = $cur_res_num - $last_res_num; if ( $step > 1 ) { $seq .= 'X' x ( $step - 1 ); } } $seq .= $self->_resAA( $i, $chain ); $last_res_num = $cur_res_num; } $id = $self->pdbID(); $id .= "$chain"; $self->{ 'Seq' }->{ $chain } = Bio::PrimarySeq->new( -seq => $seq, -id => $id, -moltype => 'protein' ); return $self->{ 'Seq' }->{ $chain }; } =head1 INTERNAL METHODS =head2 _pdbNum Title : _pdbNum Usage : fetches the numeric portion of the identifier for a given residue as reported by the pdb entry. Note, this DOES NOT uniquely specify a residue. There may be an insertion code and/or chain identifier differences. Function : Example : $pdbNum = $self->pdbNum( 3, 'A' ); Returns : a scalar Args : valid ordinal num / chain combination =cut sub _pdbNum { my $self = shift; my $ord = shift; my $chain = shift; if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { $self->throw( "No such ordinal $ord in chain $chain.\n" ); } my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; my $num_part; ( $num_part ) = ( $pdb_junk =~ /(-*\d+).*/ ); return $num_part; } =head2 _resAA Title : _resAA Usage : returns 1 letter abbr. of the amino acid specified by the arguments Function : Examples : $aa = $stride_obj->_resAA( 3, '-' ); Returns : scalar character Args : ( ord. num, chain ) =cut sub _resAA { my $self = shift; my $ord = shift; my $chain = shift; if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { $self->throw( "No such ordinal $ord in chain $chain.\n" ); } return ( $AATable{$self->{'ASG'}->{$chain}->[$ord]->[$ASGTable{'aa'}]} ); } =head2 _pdbInsCo Title : _pdbInsCo Usage : fetches the Insertion code for this residue. Function : Example : $pdb_ins_co = $self->_pdb_ins_co( 15, 'B' ); Returns : a scalar Args : ordinal number and chain =cut sub _pdbInsCo { my $self = shift; my $ord = shift; my $chain = shift; if ( !( $self->{ 'ASG' }->{ $chain }->[ $ord ] ) ) { $self->throw( "No such ordinal $ord in chain $chain.\n" ); } my $pdb_junk = $self->{ 'ASG' }->{ $chain }->[ $ord ]->[ $ASGTable{ 'resNum' } ]; my $letter_part; ( $letter_part ) = ( $pdb_junk =~ /\d+(\D+)/ ); # insertion code can be any # non-word character(s) return $letter_part; } =head2 _toOrdChain Title : _toOrdChain Usage : takes any set of residue identifying parameters and wrestles them into a two element array: the chain and the ordinal number of this residue. This two element array can then be efficiently used as keys in many of the above accessor methods ('#A:B') or ( #, 'A', 'B' ) || | || - Chain ID (blank for single chain) |--- Insertion code for this residue. Blank for most residues. |--- Numeric portion of residue ID. (#) | --- Numeric portion of residue ID. If there is only one chain and it has no ID AND there is no residue with an insertion code at this number, then this can uniquely specify a residue. # ('#:C) or ( #, 'C' ) | | | -Chain ID ---Numeric portion of residue ID. If a residue is incompletely specified then the first residue that fits the arguments is returned. For example, if 19 is the argument and there are three chains, A, B, and C with a residue whose number is 19, then 19:A will be returned (assuming its listed first). Function : Example : my ( $ord, $chain ) = $self->_toOrdChain( @args ); Returns : two element array Args : valid set of residue identifier(s) ( SEE NOTE ABOVE ) =cut sub _toOrdChain { my $self = shift; my $arg_str; my ( $key_num, $chain_id, $ins_code, $key, $i ); # check to see how many args are given if ( $#_ >= 1 ) { # multiple args $key_num = shift; if ( $#_ >= 1 ) { # still multiple args => ins. code, too $ins_code = shift; $chain_id = shift; } else { # just one more arg. => chain_id $chain_id = shift; } } else { # only single arg. Might be number or string $arg_str = shift; if ( $arg_str =~ /:/ ) { # a chain is specified ( $chain_id ) = ( $arg_str =~ /:(.)/); $arg_str =~ s/:.//; } if ( $arg_str =~ /[A-Z]|[a-z]/ ) { # an insertion code is specified ( $ins_code ) = ( $arg_str =~ /([A-Z]|[a-z])/ ); $arg_str =~ s/[A-Z]|[a-z]//g; } #now, get the number bit-> everything still around $key_num = $arg_str; } $key = "$key_num$ins_code"; if ( !( $chain_id ) || $chain_id eq ' ' ) { $chain_id = '-'; } if ( !( $self->{ 'ASG' }->{ $chain_id } ) ) { $self->throw( "No such chain: $chain_id" ); } for ( $i = 1; $i <= $#{$self->{ 'ASG' }->{ $chain_id }}; $i++ ) { if ( $self->{ 'ASG' }->{ $chain_id }->[ $i ]->[ $ASGTable{ 'resNum' } ] eq $key ) { return ( $i, $chain_id ); } } $self->throw( "No such key: $key" ); } =head2 _parse Title : _parse Usage : as name suggests, parses stride output, creating object Function : Example : $self->_parse( $io ); Returns : Args : valid Bio::Root::IO object =cut sub _parse { my $self = shift; my $io = shift; my $file = $io->_fh(); # Parse top lines if ( $self->_parseTop( $io ) ) { $self->throw( "Not stride output" ); } # Parse the HDR, CMP, SCR, and AUT lines $self->_parseHead( $io ); # Parse the CHN, SEQ, STR, and LOC lines $self->_parseSummary( $io ); # we're ignoring this # Parse the ASG lines $self->_parseASG( $io ); } =head2 _parseTop Title : _parseTop Usage : makes sure this looks like stride output Function : Example : Returns : Args : =cut sub _parseTop { my $self = shift; my $io = shift; my $file = $io->_fh(); my $cur = <$file>; if ( $cur =~ /^REM ---/ ) { return 0; } return 1; } =head2 _parseHead Title : _parseHead Usage : parses Function : HDR, CMP, SRC, and AUT lines Example : Returns : Args : =cut sub _parseHead { my $self = shift; my $io = shift; my $file = $io->_fh(); my $cur; my $element; my ( @elements, @cmp, @src, @aut ); my %head = {}; my $still_head = 1; $cur = <$file>; while ( $cur =~ /^REM / ) { $cur = <$file>; } if ( $cur =~ /^HDR / ) { @elements = split( /\s+/, $cur ); shift( @elements ); pop( @elements ); $self->{ 'PDB' } = pop( @elements ); $self->{ 'DATE' } = pop( @elements ); # now, everything else is "header" except for the word # HDR $element = join( ' ', @elements ); $head{ 'HEADER' } = $element; } $cur = <$file>; while ( $cur =~ /^CMP / ) { ( $cur ) = ( $cur =~ /^CMP\s+(.+?)\s*\w{4}$/ ); push( @cmp, $cur ); $cur = <$file>; } while ( $cur =~ /^SRC / ) { ( $cur ) = ( $cur =~ /^SRC\s+(.+?)\s*\w{4}$/ ); push( @src, $cur ); $cur = <$file>; } while ( $cur =~ /^AUT / ) { ( $cur ) = ( $cur =~ /^AUT\s+(.+?)\s*\w{4}$/ ); push( @aut, $cur ); $cur = <$file>; } $head{ 'CMP' } = \@cmp; $head{ 'SRC' } = \@src; $head{ 'AUT' } = \@aut; $self->{ 'HEAD' } = \%head; } =head2 _parseSummary Title : _parseSummary Usage : parses LOC lines Function : Example : Returns : Args : =cut sub _parseSummary { my $self = shift; my $io = shift; my $file = $io->_fh(); my $cur = <$file>; my $bound_set; my $element; my ( @elements, @cur ); my @LOC_lookup = ( [ 5, 12 ], # Element name # reduntdant [ 18, 3 ], # First residue name [ 22, 5 ], # First residue PDB number [ 28, 1 ], # First residue Chain ID # redundant [ 35, 3 ], # Last residue name [ 40, 5 ], # Last residue PDB number [ 46, 1 ] ); # Last residue Chain ID #ignore these lines while ( $cur =~ /^REM |^STR |^SEQ |^CHN / ) { $cur = <$file>; } while ( $cur =~ /^LOC / ) { foreach $bound_set ( @LOC_lookup ) { $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] ); $element =~ s/\s//g; push( @cur, $element ); } push( @elements, [ @cur ] ); $cur = <$file>; @cur = (); } $self->{ 'LOC' } = \@elements; } =head2 _parseASG Title : _parseASG Usage : parses ASG lines Function : Example : Returns : Args : =cut sub _parseASG { my $self = shift; my $io = shift; my $file = $io->_fh(); my $cur = <$file>; my $bound_set; my $ord_num; my ( $chain, $last_chain ); my $element; my %ASG; my ( @cur, @elements ); my @ASG_lookup = ( [ 5, 3 ], # Residue name # [ 9, 1 ], # Chain ID [ 10, 5 ], # PDB residue number (w/ins.code) # [ 16, 4 ], # ordinal stride number [ 24, 1 ], # one letter sec. stru. abbr. [ 26, 13], # full sec. stru. name [ 42, 7 ], # phi angle [ 52, 7 ], # psi angle [ 64, 5 ] );# residue solv. acc. while ( $cur =~ /^REM / ) { $cur = <$file>; } while ( $cur =~ /^ASG / ) { # get ordinal number for array key $ord_num = substr( $cur, 16, 4 ); $ord_num =~ s/\s//g; # get the chain id $chain = substr( $cur, 9, 1 ); if ( $last_chain && ( $chain ne $last_chain ) ) { $ASG{ $last_chain } = [ @elements ]; @elements = (); } # now get the rest of the info on this line foreach $bound_set ( @ASG_lookup ) { $element = substr( $cur, $bound_set->[ 0 ], $bound_set->[ 1 ] ); $element =~ s/\s//g; push( @cur, $element ); } $elements[ $ord_num ] = [ @cur ]; $cur = <$file>; @cur = (); $last_chain = $chain; } $ASG{ $chain } = [ @elements ]; $self->{ 'ASG' } = \%ASG; } 1;