Raw content of Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg # Author: Marc Sohrmann (ms2@sanger.ac.uk) # Copyright (c) Marc Sohrmann, 2001 # You may distribute this code under the same terms as perl itself # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg =head1 SYNOPSIS my $seqstream = Bio::SeqIO->new ( -file => $queryfile, -fmt => 'Fasta', ); $seq = $seqstream->next_seq; my $seg = Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg->new ( -QUERY => $seq); $seg->workdir ($workdir); $seg->run; my @results = $seg->output; =head1 DESCRIPTION Seg takes a Bio::Seq (or Bio::PrimarySeq) object and runs seg on it (detecting low complexity sequences). The resulting output file is parsed to produce a set of features. =head1 CONTACT Marc Sohrmann: ms2@sanger.ac.uk =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _. =cut package Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation::Seg; use vars qw(@ISA); use strict; use warnings; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation; @ISA = qw(Bio::EnsEMBL::Analysis::Runnable::ProteinAnnotation); sub multiprotein{ my ($self) = @_; return 1; } sub run_analysis { my ($self) = @_; throw ("Error running ".$self->program." on ".$self->queryfile) unless ((system ($self->program." ".$self->queryfile." -l > ". $self->resultsfile)) == 0); } sub parse_results { my ($self) = @_; my ($fh); my $resfile = $self->resultsfile; if (-e $resfile) { # it's a filename if (-z $resfile) { return; }else { open($fh, "<$resfile") or throw ("Error opening $resfile"); } } else { # it'a a filehandle $fh = $resfile; } # parse my @pfs; while (<$fh>) { chomp; next if /^$/; if (/^\>/) { /^\>(\S+)?\((\d+)\-(\d+)\)\s*complexity=(\S+)/; my $tid = $1; my $start = $2; my $end = $3; my $score = $4; my $fp = $self->create_protein_feature($start, $end, $score, $tid, 0, 0, 'Seg', $self->analysis, 0, 0); push @pfs, $fp; } } close($fh); $self->output(\@pfs); } =head2 get_low_complexity_length Title : get_low_complexity_length Usage : $len = $self->get_low_complexity_length; Function : returns *percentage* low complexity of protein Example : Returns : a percentage_id Args : Throws : Notes : It only makes sense to call this method when the Runnable was created with a single Bio::Seq =cut sub get_low_complexity_length { my ($self) = @_; if ($self->query->length > 0) { my $lc_length = 0; foreach my $feat (@{$self->output}) { $lc_length += abs($feat->end - $feat->start) + 1; } my $low_complexity = ($lc_length)/($self->query->length); $low_complexity *= 100; return $low_complexity; } else { return 0; } } 1;