Bio::EnsEMBL::Analysis::Runnable::Finished MiniEst2Genome
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome
Package variables
No package variables defined.
Included modules
Bio::DB::RandomAccessI
Bio::EnsEMBL::Analysis
Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome
Bio::EnsEMBL::Analysis::Tools::MiniSeq
Bio::EnsEMBL::DnaDnaAlignFeature
Bio::EnsEMBL::FeaturePair
Bio::EnsEMBL::SeqFeature
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::PrimarySeqI
Bio::SeqIO
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
    my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome->new('-genomic'    => $genseq,
'-features' => $features,
'-seqfetcher' => $seqfetcher,
'-analysis' => $analysis,
)
$obj->run my @newfeatures = $obj->output;
Description
Methods
addFeatureDescriptionCode
add_output
No description
Code
analysisDescriptionCode
exon_paddingDescriptionCode
find_extras
No description
Code
genomic_sequenceDescriptionCode
get_SequenceDescriptionCode
get_all_FeatureIdsDescriptionCode
get_all_FeaturesDescriptionCode
get_all_FeaturesById
No description
Code
get_all_SequencesDescriptionCode
make_miniseqDescriptionCode
minimum_intron
No description
Code
new
No description
Code
outputDescriptionCode
print_FeaturePairDescriptionCode
runDescriptionCode
run_blaste2gDescriptionCode
seqfetcherDescriptionCode
Methods description
addFeaturecode    nextTop
    Title   :   addFeature
Usage : $self->addFeature($f)
Function: Adds a feature to the object for realigning
Returns : Bio::EnsEMBL::FeaturePair
Args : Bio::EnsEMBL::FeaturePair
analysiscodeprevnextTop
    Title   :   analysis
Usage : $self->analysis($analysis)
Function: Get/set method for analysis
Returns : Bio::EnsEMBL::Analysis object
Args : Bio::EnsEMBL::Analysis object
exon_paddingcodeprevnextTop
  Title   : exon_padding
Usage :
Function: Defines exon padding extent for miniseq
Returns :
Args :
genomic_sequencecodeprevnextTop
    Title   :   genomic_sequence
Usage : $self->genomic_sequence($seq)
Function: Get/set method for genomic sequence
Returns : Bio::Seq object
Args : Bio::Seq object
get_SequencecodeprevnextTop
  Title   : get_Sequence
Usage : my $seq = get_Sequence($id)
Function: Fetches sequences with id $id
Returns : Bio::PrimarySeq
Args : none
get_all_FeatureIdscodeprevnextTop
  Title   : get_all_FeatureIds
Usage : my @ids = get_all_FeatureIds
Function: Returns an array of all distinct feature hids
Returns : @string
Args : none
get_all_FeaturescodeprevnextTop
    Title   :   get_all_Features
Usage : @f = $self->get_all_Features;
Function: Returns the array of features
Returns : @Bio::EnsEMBL::FeaturePair
Args : none
get_all_SequencescodeprevnextTop
  Title   : get_all_Sequences
Usage : my $seq = get_all_Sequences(@id)
Function: Fetches sequences with ids in @id
Returns : nothing, but $self->{'_seq_cache'}{$id} has a Bio::PrimarySeq for each $id in @id
Args : array of ids
make_miniseqcodeprevnextTop
  Title   : make_miniseq
Usage :
Function: makes a mini genomic from the genomic sequence and features list
Returns :
Args :
outputcodeprevnextTop
  Title   : output
Usage : $self->output
Function: Returns results of est2genome as array of FeaturePair
Returns : An array of Bio::EnsEMBL::FeaturePair
Args : none
print_FeaturePaircodeprevnextTop
  Title   : print_FeaturePair
Usage :
Function: for debugging
Returns :
Args :
runcodeprevnextTop
  Title   : run
Usage : $self->run()
Function: Runs est2genome on MiniSeq representation of genomic sequence for each EST
Returns : none
Args :
run_blaste2gcodeprevnextTop
  Title   : run_blaste2g
Usage : $self->run_blaste2g()
Function: Runs est2genome on a MiniSeq
Returns : none
Args :
seqfetchercodeprevnextTop
    Title   :   seqfetcher
Usage : $self->seqfetcher($seqfetcher)
Function: Get/set method for SeqFetcher
Returns : Bio::DB::RandomAccessI object
Args : Bio::DB::RandomAccessI object
Methods code
addFeaturedescriptionprevnextTop
sub addFeature {
    my( $self, $value ) = @_;

    if(!defined($self->{'_features'})) {
	$self->{'_features'} = [];
    }

    if ($value) {
        $value->isa("Bio::EnsEMBL::FeaturePair") || $self->throw("Input isn't a Bio::EnsEMBL::FeaturePair");
	push(@{$self->{'_features'}},$value);
    }
}
add_outputdescriptionprevnextTop
sub add_output {
    my($self, @feat_pairs) = @_;
    push( @{ $self->{'_output'} } , @feat_pairs);

}

1;

__END__
}
analysisdescriptionprevnextTop
sub analysis {
    my( $self, $value ) = @_;
    if ($value) {
        $value->isa("Bio::EnsEMBL::Analysis") || throw("[$value] isn't a Bio::EnsEMBL::Analysis");
        $self->{'analysis'} = $value;
    }
    return $self->{'_analysis'};
}
exon_paddingdescriptionprevnextTop
sub exon_padding {
    my ($self,$arg) = @_;

    if (defined($arg)) {
	$self->{'_padding'} = $arg;
    }

#    return $self->{'_padding'} || 100;
return $self->{'_padding'} || 1000;
}
find_extrasdescriptionprevnextTop
sub find_extras {
  my ($self,@features) = @_;
  my @output = $self->output;
  my @new;
 FEAT: foreach my $f (@features) {
    my $found = 0;
    if (($f->end - $f->start) < 50) {
      next FEAT;
    }
    #	print ("New feature\n");
#$self->print_FeaturePair($f);
foreach my $out (@output) { foreach my $sf ($out->sub_SeqFeature) { if (!($f->end < $out->start || $f->start >$out->end)) { $found = 1; } } } if ($found == 0) { push(@new,$f); } } return\@ new;
}
genomic_sequencedescriptionprevnextTop
sub genomic_sequence {
    my( $self, $value ) = @_;
    if ($value) {
        #need to check if passed sequence is Bio::Seq object
$value->isa("Bio::PrimarySeqI") || throw("Input isn't a Bio::PrimarySeqI"); $self->{'_genomic_sequence'} = $value; } return $self->{'_genomic_sequence'};
}
get_SequencedescriptionprevnextTop
sub get_Sequence {
    my ($self,$id) = @_;
    my $seqfetcher = $self->seqfetcher;

    if (defined($self->{'_seq_cache'}{$id})) {
      return $self->{'_seq_cache'}{$id};
    }

    my $seq;
    eval {
      $seq = $seqfetcher->get_Seq_by_acc($id);
    };

    # if we didn't get it by accession, try by id
# get_Seq_by_id method not in Pfetch.pm module
if(!defined $seq){ eval{ $seq = $seqfetcher->get_Seq_by_id($id) unless defined $seq; }; } if ((!defined($seq)) && $@) { warning("Couldn't find sequence for [$id]:\n"); } return $seq;
}
get_all_FeatureIdsdescriptionprevnextTop
sub get_all_FeatureIds {
    my ($self) = @_;

    my %idhash;

    foreach my $f ($self->get_all_Features) {
	if (defined($f->hseqname)) {
	    $idhash{$f->hseqname} = 1;
	} else {
	    warning("No sequence name defined for feature. " . $f->seqname . "\n");
	}
    }

    return keys %idhash;
}
get_all_FeaturesdescriptionprevnextTop
sub get_all_Features {
    my( $self, $value ) = @_;
    return (@{$self->{'_features'}});
}
get_all_FeaturesByIddescriptionprevnextTop
sub get_all_FeaturesById {
    my( $self) = @_;
    my  %idhash;

    FEAT: foreach my $f ($self->get_all_Features) {
    if (!(defined($f->hseqname))) {
	warning("No hit name for " . $f->seqname . "\n");
	    next FEAT;
	}
	if (defined($idhash{$f->hseqname})) {
	    push(@{$idhash{$f->hseqname}},$f);
	} else {
	    $idhash{$f->hseqname} = [];
	    push(@{$idhash{$f->hseqname}},$f);
	}

    }

    return (\%idhash);
}
get_all_SequencesdescriptionprevnextTop
sub get_all_Sequences {
  my ($self,@id) = @_;

 SEQ: foreach my $id (@id) {
	my ($acc,$ver) = $id =~ /(\w+)\.(\d+)/;
    my $seq = $self->get_Sequence($id);
    $seq = $self->get_Sequence($acc) unless $seq;
    if(defined $seq) {
      $self->{'_seq_cache'}{$id} = $seq;
    }
  }
}
make_miniseqdescriptionprevnextTop
sub make_miniseq {
    my ($self,@features) = @_;
    my $seqname = $features[0]->seqname;
    @features = sort {$a->start <=> $b->start} @features;
    my $count  = 0;
    my $mingap = $self->minimum_intron;

    my $pairaln = new Bio::EnsEMBL::Analysis::Tools::PairAlign;

    my @genomic_features;

    my $prevend     = 0;
    my $prevcdnaend = 0;
  FEAT: foreach my $f (@features) {

      my $start = $f->start;
      my $end   = $f->end;
      $start = $f->start - $self->exon_padding;
      $end   = $f->end   + $self->exon_padding;

      if ($start < 1) { $start = 1;}
      if ($end   > $self->genomic_sequence->length) {$end = $self->genomic_sequence->length;}

      my $gap     =    ($start - $prevend);

      if ($count > 0 && ($gap < $mingap)) {
	# STRANDS!!!!!
if ($end < $prevend) { $end = $prevend;} $genomic_features[$#genomic_features]->end($end); $prevend = $end; $prevcdnaend = $f->hend; } else { my $newfeature = new Bio::EnsEMBL::SeqFeature; $newfeature->seqname ($f->hseqname); $newfeature->start ($start); $newfeature->end ($end); $newfeature->strand (1); # ??? $newfeature->strand ($strand);
$newfeature->attach_seq($self->genomic_sequence); push(@genomic_features,$newfeature); $prevend = $end; $prevcdnaend = $f->hend; } $count++; } # Now we make the cDNA features
# but presumably only if we actually HAVE any ...
return unless scalar(@genomic_features); my $current_coord = 1; # make a forward strand sequence, est2genome runs -reverse
@genomic_features = sort {$a->start <=> $b->start } @genomic_features; foreach my $f (@genomic_features) { $f->strand(1); my $cdna_start = $current_coord; my $cdna_end = $current_coord + ($f->end - $f->start); my $tmp = new Bio::EnsEMBL::SeqFeature( -seqname => $f->seqname.'.cDNA', -start => $cdna_start, -end => $cdna_end, -strand => 1); my $fp = new Bio::EnsEMBL::FeaturePair(-feature1 => $f, -feature2 => $tmp); $pairaln->addFeaturePair($fp); # $self->print_FeaturePair($fp);
$current_coord = $cdna_end+1; } #changed id from 'Genomic' to seqname
my $miniseq = new Bio::EnsEMBL::Analysis::Tools::MiniSeq(-id => $seqname, -pairalign => $pairaln); my $newgenomic = $miniseq->get_cDNA_sequence->seq; $newgenomic =~ s/(.{72})/$1\n/g; # print ("New genomic sequence is " . $newgenomic. "\n");
return $miniseq;
}
minimum_introndescriptionprevnextTop
sub minimum_intron {
    my ($self,$arg) = @_;

    if (defined($arg)) {
	$self->{'_minimum_intron'} = $arg;
    }

    return $self->{'_minimum_intron'} || 1000;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->{'_fplist'} = []; #create key to an array of feature pairs
my( $genomic, $features, $seqfetcher, $analysis ) = rearrange([qw(GENOMIC FEATURES SEQFETCHER ANALYSIS)], @args); $self->throw("No genomic sequence input") unless defined($genomic); $self->throw("[$genomic] is not a Bio::PrimarySeqI") unless $genomic->isa("Bio::PrimarySeqI"); $self->genomic_sequence($genomic) if defined($genomic); $self->throw("No seqfetcher provided") unless defined($seqfetcher); $self->throw("[$seqfetcher] is not a Bio::DB::RandomAccessI") unless $seqfetcher->isa("Bio::DB::RandomAccessI"); $self->seqfetcher($seqfetcher) if defined($seqfetcher); $self->analysis($analysis) if defined $analysis; if (defined($features)) { if (ref($features) eq "ARRAY") { my @f = @$features; foreach my $f (@f) { $self->addFeature($f); } } else { $self->throw("[$features] is not an array ref."); } } return $self; # success - we hope!
}
outputdescriptionprevnextTop
sub output {
    my ($self) = @_;
    if (!defined($self->{'_output'})) {
	$self->{'_output'} = [];
    }
    return\@ {$self->{'_output'}};
}
print_FeaturePairdescriptionprevnextTop
sub print_FeaturePair {
    my ($self,$nf) = @_;
    #changed $nf->id to $nf->seqname
print(STDERR "FeaturePair is " . $nf->seqname . "\t" . $nf->start . "\t" . $nf->end . "\t(" . $nf->strand . ")\t" . $nf->hseqname . "\t" . $nf->hstart . "\t" . $nf->hend . "\t(" . $nf->hstrand . ")\n");
}
rundescriptionprevnextTop
sub run {
    my ($self) = @_;
    my ($esthash) = $self->get_all_FeaturesById;
    my @ests    = keys %$esthash;
    $self->get_all_Sequences(@ests);
    my $number_of_errors = 0;

    foreach my $est (@ests) {
        my $features = $esthash->{$est};
        my @exons;
        next unless (ref($features) eq "ARRAY");
        next unless (scalar(@$features) >= 1);
        $self->run_blaste2g($est, $features,$self->{analysis});
    }

    return 1;
}
run_blaste2gdescriptionprevnextTop
sub run_blaste2g {
    my ( $self, $est, $features,$analysis ) = @_;
    my $count = @$features;
    my $miniseq = $self->make_miniseq(@$features);
    my $hseq    = $self->get_Sequence($est) or throw("Can't fetch sequence for id '$est'");
	my $g = $miniseq->get_cDNA_sequence;

    my $eg = new Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome(
        -genomic => $g,
        -est     => $hseq,
		-analysis => $analysis,
    );

    $eg->run;

    foreach my $fp ( @{$eg->output} ) {

      my @converted = @{$miniseq->convert_FeaturePair($fp)};

      if ( @converted > 1 ) {
	warning "feature converts into '" . scalar(@converted) . "' > 1 features - ignoring\n";
      } else {
	# convert_FeaturePair zaps strand and hseqname,0
# so we put them back here.
my $new = $converted[0]; # $new->seqname( $fp->hseqname ); #
# $new->strand( $fp->strand ); #
# $new->hseqname( $fp->hseqname ); #
# $new->percent_id( $fp->percent_id ); #
# $new->feature2->percent_id( $fp->percent_id ); #
# reverse logic here, change the $fp passed to equal the converted [miniseq]
# change seqname, hend, hstart, gsf_start, gsf_end
# hend & hstart appear to only be calculated from the length of the end & start
# producing the same length for the hit as the contig. not good....
# $fp->hend( $new->hend );
# $fp->hstart( $new->hstart );
$fp->end( $new->end ); $fp->start( $new->start ); # $self->add_output(@converted);
# print STDERR "CIGAR_STRING : " . $fp->cigar_string . "\n";
# print STDERR $fp->gffstring . "\n";
$self->add_output($fp); } }
}
seqfetcherdescriptionprevnextTop
sub seqfetcher {
    my( $self, $value ) = @_;
    if ($value) {
        #need to check if passed sequence is Bio::DB::RandomAccessI object
$value->isa("Bio::DB::RandomAccessI") || throw("Input isn't a Bio::DB::RandomAccessI"); $self->{'_seqfetcher'} = $value; } return $self->{'_seqfetcher'};
}
General documentation
CONTACTTop
Modified by Sindhu K. Pillai email sp1@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
get_all_FeaturesbyIdTop
    Title   :   get_all_FeaturesById
Usage : $hash = $self->get_all_FeaturesById;
Function: Returns a ref to a hash of features.
The keys to the hash are distinct feature ids
Returns : ref to hash of Bio::EnsEMBL::FeaturePair
Args : none
minimum_intromTop
  Title   : minimum_intron
Usage :
Function: Defines minimum intron size for miniseq
Returns :
Args :
NAME - Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2GenomeTop
AUTHORTop
James Gilbert email jgrg@sanger.ac.uk
Modified by Sindhu K. Pillai email sp1@sanger.ac.uk