Raw content of Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
=head1 SYNOPSIS
my $obj = Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise->new(-genomic => $genseq,
-features => $features)
$obj->run
my @newfeatures = $obj->output;
=head1 DESCRIPTION
=head1 CONTACT
Describe contact details here
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Analysis::Runnable::MiniGenewise;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Tools::Logger qw(logger_info);
use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils
qw(attach_Slice_to_Transcript);
use Bio::EnsEMBL::Utils::Exception qw(throw warning stack_trace_dump);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my( $features,$seqfetcher, $cluster_start, $cluster_end, $full_seq,
$genewise_options, $minigenewise_options, $min_length) =
rearrange([qw( FEATURES SEQFETCHER CLUSTER_START CLUSTER_END
FULLSEQ GENEWISE_OPTIONS MINIGENEWISE_OPTIONS
MINIMUM_FEATURE_LENGTH)], @args);
#SETTING DEFAULTS
$self->minimum_feature_length(50);
#################
$self->seqfetcher($seqfetcher);
$self->features($features);
$self->cluster_start($cluster_start);
$self->cluster_end($cluster_end);
$self->full_seq($full_seq);
$self->genewise_options($genewise_options);
$self->minigenewise_options($minigenewise_options);
$self->minimum_feature_length($min_length);
throw("MultiMiniGenewise needs a query sequence ") unless($self->query);
throw("MultiMiniGenewise needs features to run across")
unless($self->features && scalar(@{$self->features}));
throw("MultiMiniGenewise needs a sequence fetcher ") unless($self->seqfetcher);
return $self;
}
#ACCESSOR METHODS
=head2 genewise_options
Title : genewise_options
Usage : $self->genewise_options($genewise_options)
Function: Get/set method for genewise genewise_options
Returns :
Args :
=cut
sub genewise_options {
my ($self,$arg) = @_;
if (defined($arg)) {
throw("Must pass genewise options a hash ref not a $arg")
unless(ref($arg) eq "HASH");
$self->{'_genewise_options'} = $arg;
}
return $self->{'_genewise_options'};
}
=head2 minigenewise_options
Title : minigenewise_options
Usage : $self->minigenewise_options($minigenewise_options)
Function: Get/set method for minigenewise minigenewise_options
Returns :
Args :
=cut
sub minigenewise_options {
my ($self,$arg) = @_;
if (defined($arg)) {
throw("Must pass minigenewise options a hash ref not a $arg")
unless(ref($arg) eq "HASH");
$self->{'_minigenewise_options'} = $arg;
}
return $self->{'_minigenewise_options'};
}
=head2 features
Arg [1] : arrayref
Function : sets varible to arrayref
Returntype: arrayref
Exceptions: throws if not given an arrayref or if elements of array aren't featurepairs'
Caller : $self
Example : my $features = $self->features();
=cut
sub features {
my ($self,$features) = @_;
if (!defined($self->{_features})) {
$self->{_features} = [];
}
if (defined($features)) {
if (ref($features) eq "ARRAY") {
foreach my $f (@$features) {
if ($f->isa("Bio::EnsEMBL::FeaturePair")) {
push(@{$self->{_features}},$f);
} else {
throw("Object [$f] is not a Bio::EnsEMBL::FeaturePair");
}
}
} else {
throw("[$features] is not an array ref.");
}
}
return $self->{_features};
}
=head2 object accessors
Arg [1] : Object of correct type
Function : get/set object
Returntype: object
Exceptions: throws if object not of correct type
Caller : $self
Example : my $genomic = $self->query;
=cut
sub genomic_sequence {
my( $self, $value ) = @_;
if ($value) {
#need to check if passed sequence is Bio::Seq object
$value->isa("Bio::PrimarySeqI") || throw("Input isn't a Bio::PrimarySeqI");
$self->{'_genomic_sequence'} = $value;
}
return $self->{'_genomic_sequence'};
}
sub seqfetcher {
my( $self, $value ) = @_;
if (defined($value)) {
#need to check if we are being passed a Bio::DB::RandomAccessI object
throw("[$value] is not a Bio::DB::RandomAccessI") unless $value->isa("Bio::DB::RandomAccessI");
$self->{'_seqfetcher'} = $value;
}
return $self->{'_seqfetcher'};
}
sub minimum_intron {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_minimum_intron'} = $arg;
}
return $self->{'_minimum_intron'};
}
sub exon_padding {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_padding'} = $arg;
}
return $self->{'_padding'};
}
sub terminal_padding {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_terminal_padding'} = $arg;
}
return $self->{'_terminal_padding'};
}
sub cluster_start {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_cluster_start} = $arg;
}
return $self->{_cluster_start};
}
sub cluster_end {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_cluster_end} = $arg;
}
return $self->{_cluster_end};
}
sub full_seq {
my( $self, $value ) = @_;
if ($value) {
$self->{'_full_seq'} = $value;
}
return $self->{'_full_seq'};
}
sub minimum_feature_length {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_minimum_feature_length} = $arg;
}
return $self->{_minimum_feature_length};
}
#Functionality
=head2 run
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
Function : sort through the features passed in and create MiniGenewise
objects for them and run them
Returntype: 1
Exceptions: warns if a single id failed to find a sequence, throws if
all the ids fail to find a sequence
Example :
=cut
sub run{
my ($self) = @_;
my ($fhash,$ids) = $self->get_all_features_by_id;
my $failed_count = 0;
my @output;
ID:foreach my $id(@$ids){
my $features = $fhash->{$id};
#printf STDERR "MMG doing $id (%d feats)\n", scalar(@$features);
my @forward;
my @reverse;
my $peptide_sequence = $self->get_Sequence($features->[0]->hseqname);
warning($id." has produced no peptide sequence from ".$self->seqfetcher)
if(!$peptide_sequence);
$failed_count++ if(!$peptide_sequence);
foreach my $feature(@$features){
if($feature->strand == -1){
push(@reverse, $feature);
}elsif($feature->strand == 1){
push(@forward, $feature);
}else{
throw("MultiMiniGenewise ".$feature." from id ".$id.
" seems to have no strand defined");
}
}
logger_info("MMG have ".@forward." forward strand features and ".@reverse.
" reverse strand features\n");
my $slice = $self->query;
if($self->cluster_end){
#print "MMG making subseq based on ".$self->cluster_start." ".$self->cluster_end." from ".$self->query->name."\n";
my $string_seq = ('N' x ($self->cluster_start - 1)) .
$self->query->subseq($self->cluster_start, $self->cluster_end) .
('N' x ($self->query->length - ($self->cluster_end + 1)));
$slice = Bio::EnsEMBL::Slice->new
(
-seq => $string_seq,
-seq_region_name => $self->query->seq_region_name,
-start => 1,
-end => length($string_seq),
-coord_system => $self->query->coord_system,
);
}else{
#print "MMG using whole query ".$self->query->name."\n";
}
my $forward_output = $self->run_MiniGenewise(\@forward, $slice,
$peptide_sequence)
if(@forward >= 1);
$self->output($forward_output);
my $reverse_output = $self->run_MiniGenewise(\@reverse, $slice,
$peptide_sequence)
if(@reverse >= 1);
$self->output($reverse_output);
push(@output, @$forward_output) if($forward_output);
push(@output, @$reverse_output) if($reverse_output);
}
if($self->cluster_end){
foreach my $output(@{$self->output}){
attach_Slice_to_Transcript($output, $self->query);
}
}
my $count = 1;
foreach my $output(@{$self->output}){
#print "MMG output $count ".$output->start." ".$output->end." ".$output->strand." ".@{$output->get_all_Exons}."\n";
$count++;
}
#$self->output(\@output);
if($failed_count == @$ids){
throw("Can't find any sequences for the ".@$ids." ids which match ".
$self->query->name);
}
return 1;
}
=head2 run_MiniGenewise
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::Tools::MultiMiniGenewise
Arg [2] : arrayref of Features
Arg [3] : Dna sequence (Should be a primaryseq)
Arg [4] : Protein sequence (should be a primary seq)
Function : Create and run a MiniGenewise object on the given features
Returntype: arrayref of transcripts from MiniGenewise
Exceptions:
Example :
=cut
sub run_MiniGenewise{
my ($self, $features, $slice, $peptide) = @_;
my @to_use = @{$self->find_extras($features)};
return () if(@to_use == 0);
if($self->full_seq){
my $rangefeat = new Bio::EnsEMBL::FeaturePair
(
-start => $features->[0]->start,
-end => $features->[-1]->end,
-strand=> 1,
-slice => $slice
);
$rangefeat->strand(-1) if($features->[0]->strand == -1);
$features = [$rangefeat];
}
my %params = %{$self->minigenewise_options} if($self->minigenewise_options);
my $runnable = Bio::EnsEMBL::Analysis::Runnable::MiniGenewise->new
(
-query => $slice,
-protein => $peptide,
-features => $features,
-genewise_options => $self->genewise_options,
-analysis => $self->analysis,
%params,
);
%params = ();
$runnable->run;
my @output = @{$runnable->output};
return(\@output);
}
=head2 get_all_feature_by_id
Arg [1] : none
Function : arranges all feature into hash keyed by hseqname, each element
containing an anonymous array of features with that name also produces an hash of
key hseqname and value of score which is used to sort an array of hseqname
Returntype: hasfref and array ref
Exceptions: warns and skips if a feature doesn't have a hseqname'
Caller : $self
Example : my ($idhash, $idarray) = $self->get_all_features_by_id;
=cut
sub get_all_features_by_id {
my ($self) = @_;
my %idhash;
my %scorehash;
FEAT: foreach my $f (@{$self->features}) {
if (!$f->hseqname) {
warning("No hit name for " . $f->seqname . "\n");
next FEAT;
}
$idhash{$f->hseqname} = [] if(!$idhash{$f->hseqname});
push(@{$idhash{$f->hseqname}},$f);
$scorehash{$f->hseqname} = 0 if(!$scorehash{$f->hseqname});
$scorehash{$f->hseqname} = $f->score if($f->score >
$scorehash{$f->hseqname});;
}
my @ids = keys %idhash;
@ids = sort {$scorehash{$b} <=> $scorehash{$a}} @ids;
return (\%idhash,\@ids);
}
=head2 get_Sequence
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::MultiMiniGenewise
Arg [2] : string, peptide id
Function : fetch given peptide from sequence index
Returntype: Bio::PrimarySeq
Exceptions: warns if not passed an id or if the sequence cannot be found
Example :
=cut
sub get_Sequence {
my ($self,$id) = @_;
my $seqfetcher = $self->seqfetcher;
my $seq;
my $name;
if($seqfetcher->can('db')){
my @dbs = $seqfetcher->db;
$name = $dbs[0];
}
if (!$id) {
warning("No id input to get_Sequence");
return undef;
}
eval {
$seq = $seqfetcher->get_Seq_by_acc($id);
};
if($@) {
warning("Problem fetching sequence for id [$id] with $seqfetcher $name $@\n");
return undef;
}
if(!$seq){
warning("Could not find sequence for [$id] with $seqfetcher $name");
}
return $seq;
}
sub find_extras{
my ($self, $features) = @_;
my $output = $self->output;
my @out;
foreach my $f(@$features){
my $found = 0;
$f->slice($self->query);
$f->seqname($f->slice->name);
if($f->length < $self->minimum_feature_length){
next;
}
foreach my $transcript(@$output){
foreach my $exon(@{$transcript->get_all_Exons}){
$exon->slice($self->query);
if($f->overlaps($exon)){
$found = 1;
}
}
}
if($found == 0){
push(@out, $f);
}
}
return \@out;
}
1;