Bio::EnsEMBL::Analysis::Runnable::Finished
MiniEst2Genome
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome->new('-genomic' => $genseq,
'-features' => $features,
'-seqfetcher' => $seqfetcher,
'-analysis' => $analysis,
)
$obj->run
my @newfeatures = $obj->output;
Description
Methods
Methods description
Title : addFeature Usage : $self->addFeature($f) Function: Adds a feature to the object for realigning Returns : Bio::EnsEMBL::FeaturePair Args : Bio::EnsEMBL::FeaturePair |
Title : analysis Usage : $self->analysis($analysis) Function: Get/set method for analysis Returns : Bio::EnsEMBL::Analysis object Args : Bio::EnsEMBL::Analysis object |
Title : exon_padding Usage : Function: Defines exon padding extent for miniseq Returns : Args : |
Title : genomic_sequence Usage : $self->genomic_sequence($seq) Function: Get/set method for genomic sequence Returns : Bio::Seq object Args : Bio::Seq object |
Title : get_Sequence Usage : my $seq = get_Sequence($id) Function: Fetches sequences with id $id Returns : Bio::PrimarySeq Args : none |
Title : get_all_FeatureIds Usage : my @ids = get_all_FeatureIds Function: Returns an array of all distinct feature hids Returns : @string Args : none |
Title : get_all_Features Usage : @f = $self->get_all_Features; Function: Returns the array of features Returns : @Bio::EnsEMBL::FeaturePair Args : none |
Title : get_all_Sequences Usage : my $seq = get_all_Sequences(@id) Function: Fetches sequences with ids in @id Returns : nothing, but $self->{'_seq_cache'}{$id} has a Bio::PrimarySeq for each $id in @id Args : array of ids |
Title : make_miniseq Usage : Function: makes a mini genomic from the genomic sequence and features list Returns : Args : |
Title : output Usage : $self->output Function: Returns results of est2genome as array of FeaturePair Returns : An array of Bio::EnsEMBL::FeaturePair Args : none |
Title : print_FeaturePair Usage : Function: for debugging Returns : Args : |
Title : run Usage : $self->run() Function: Runs est2genome on MiniSeq representation of genomic sequence for each EST Returns : none Args : |
Title : run_blaste2g Usage : $self->run_blaste2g() Function: Runs est2genome on a MiniSeq Returns : none Args : |
Title : seqfetcher Usage : $self->seqfetcher($seqfetcher) Function: Get/set method for SeqFetcher Returns : Bio::DB::RandomAccessI object Args : Bio::DB::RandomAccessI object |
Methods code
sub addFeature
{
my( $self, $value ) = @_;
if(!defined($self->{'_features'})) {
$self->{'_features'} = [];
}
if ($value) {
$value->isa("Bio::EnsEMBL::FeaturePair") || $self->throw("Input isn't a Bio::EnsEMBL::FeaturePair");
push(@{$self->{'_features'}},$value);
} } |
sub add_output
{ my($self, @feat_pairs) = @_;
push( @{ $self->{'_output'} } , @feat_pairs);
}
1;
__END__ } |
sub analysis
{ my( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::EnsEMBL::Analysis") || throw("[$value] isn't a Bio::EnsEMBL::Analysis");
$self->{'analysis'} = $value;
}
return $self->{'_analysis'}; } |
sub exon_padding
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_padding'} = $arg;
}
return $self->{'_padding'} || 1000; } |
sub find_extras
{ my ($self,@features) = @_;
my @output = $self->output;
my @new;
FEAT: foreach my $f (@features) {
my $found = 0;
if (($f->end - $f->start) < 50) {
next FEAT;
}
foreach my $out (@output) {
foreach my $sf ($out->sub_SeqFeature) {
if (!($f->end < $out->start || $f->start >$out->end)) {
$found = 1;
}
}
}
if ($found == 0) {
push(@new,$f);
}
}
return\@ new; } |
sub genomic_sequence
{ my( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::PrimarySeqI") || throw("Input isn't a Bio::PrimarySeqI");
$self->{'_genomic_sequence'} = $value;
}
return $self->{'_genomic_sequence'}; } |
sub get_Sequence
{ my ($self,$id) = @_;
my $seqfetcher = $self->seqfetcher;
if (defined($self->{'_seq_cache'}{$id})) {
return $self->{'_seq_cache'}{$id};
}
my $seq;
eval {
$seq = $seqfetcher->get_Seq_by_acc($id);
};
if(!defined $seq){
eval{
$seq = $seqfetcher->get_Seq_by_id($id) unless defined $seq;
};
}
if ((!defined($seq)) && $@) {
warning("Couldn't find sequence for [$id]:\n");
}
return $seq; } |
sub get_all_FeatureIds
{ my ($self) = @_;
my %idhash;
foreach my $f ($self->get_all_Features) {
if (defined($f->hseqname)) {
$idhash{$f->hseqname} = 1;
} else {
warning("No sequence name defined for feature. " . $f->seqname . "\n");
}
}
return keys %idhash; } |
sub get_all_Features
{ my( $self, $value ) = @_;
return (@{$self->{'_features'}}); } |
sub get_all_FeaturesById
{ my( $self) = @_;
my %idhash;
FEAT: foreach my $f ($self->get_all_Features) {
if (!(defined($f->hseqname))) {
warning("No hit name for " . $f->seqname . "\n");
next FEAT;
}
if (defined($idhash{$f->hseqname})) {
push(@{$idhash{$f->hseqname}},$f);
} else {
$idhash{$f->hseqname} = [];
push(@{$idhash{$f->hseqname}},$f);
}
}
return (\%idhash); } |
sub get_all_Sequences
{ my ($self,@id) = @_;
SEQ: foreach my $id (@id) {
my ($acc,$ver) = $id =~ /(\w+)\.(\d+)/;
my $seq = $self->get_Sequence($id);
$seq = $self->get_Sequence($acc) unless $seq;
if(defined $seq) {
$self->{'_seq_cache'}{$id} = $seq;
}
} } |
sub make_miniseq
{
my ($self,@features) = @_;
my $seqname = $features[0]->seqname;
@features = sort {$a->start <=> $b->start} @features;
my $count = 0;
my $mingap = $self->minimum_intron;
my $pairaln = new Bio::EnsEMBL::Analysis::Tools::PairAlign;
my @genomic_features;
my $prevend = 0;
my $prevcdnaend = 0;
FEAT: foreach my $f (@features) {
my $start = $f->start;
my $end = $f->end;
$start = $f->start - $self->exon_padding;
$end = $f->end + $self->exon_padding;
if ($start < 1) { $start = 1;}
if ($end > $self->genomic_sequence->length) {$end = $self->genomic_sequence->length;}
my $gap = ($start - $prevend);
if ($count > 0 && ($gap < $mingap)) {
if ($end < $prevend) { $end = $prevend;}
$genomic_features[$#genomic_features]->end($end);
$prevend = $end;
$prevcdnaend = $f->hend;
} else {
my $newfeature = new Bio::EnsEMBL::SeqFeature;
$newfeature->seqname ($f->hseqname);
$newfeature->start ($start);
$newfeature->end ($end);
$newfeature->strand (1);
$newfeature->attach_seq($self->genomic_sequence);
push(@genomic_features,$newfeature);
$prevend = $end;
$prevcdnaend = $f->hend;
}
$count++;
}
return unless scalar(@genomic_features);
my $current_coord = 1;
@genomic_features = sort {$a->start <=> $b->start } @genomic_features;
foreach my $f (@genomic_features) {
$f->strand(1);
my $cdna_start = $current_coord;
my $cdna_end = $current_coord + ($f->end - $f->start);
my $tmp = new Bio::EnsEMBL::SeqFeature(
-seqname => $f->seqname.'.cDNA',
-start => $cdna_start,
-end => $cdna_end,
-strand => 1);
my $fp = new Bio::EnsEMBL::FeaturePair(-feature1 => $f,
-feature2 => $tmp);
$pairaln->addFeaturePair($fp);
$current_coord = $cdna_end+1;
}
my $miniseq = new Bio::EnsEMBL::Analysis::Tools::MiniSeq(-id => $seqname,
-pairalign => $pairaln);
my $newgenomic = $miniseq->get_cDNA_sequence->seq;
$newgenomic =~ s/(.{72})/$1\n/g;
return $miniseq; } |
sub minimum_intron
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_minimum_intron'} = $arg;
}
return $self->{'_minimum_intron'} || 1000; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_fplist'} = [];
my( $genomic, $features, $seqfetcher, $analysis ) = rearrange([qw(GENOMIC
FEATURES
SEQFETCHER
ANALYSIS)], @args);
$self->throw("No genomic sequence input")
unless defined($genomic);
$self->throw("[$genomic] is not a Bio::PrimarySeqI")
unless $genomic->isa("Bio::PrimarySeqI");
$self->genomic_sequence($genomic) if defined($genomic);
$self->throw("No seqfetcher provided")
unless defined($seqfetcher);
$self->throw("[$seqfetcher] is not a Bio::DB::RandomAccessI")
unless $seqfetcher->isa("Bio::DB::RandomAccessI");
$self->seqfetcher($seqfetcher) if defined($seqfetcher);
$self->analysis($analysis) if defined $analysis;
if (defined($features)) {
if (ref($features) eq "ARRAY") {
my @f = @$features;
foreach my $f (@f) {
$self->addFeature($f);
}
} else {
$self->throw("[$features] is not an array ref.");
}
}
return $self;
} |
sub output
{ my ($self) = @_;
if (!defined($self->{'_output'})) {
$self->{'_output'} = [];
}
return\@ {$self->{'_output'}}; } |
sub print_FeaturePair
{ my ($self,$nf) = @_;
print(STDERR "FeaturePair is " . $nf->seqname . "\t" .
$nf->start . "\t" .
$nf->end . "\t(" .
$nf->strand . ")\t" .
$nf->hseqname . "\t" .
$nf->hstart . "\t" .
$nf->hend . "\t(" .
$nf->hstrand . ")\n"); } |
sub run
{
my ($self) = @_;
my ($esthash) = $self->get_all_FeaturesById;
my @ests = keys %$esthash;
$self->get_all_Sequences(@ests);
my $number_of_errors = 0;
foreach my $est (@ests) {
my $features = $esthash->{$est};
my @exons;
next unless (ref($features) eq "ARRAY");
next unless (scalar(@$features) >= 1);
$self->run_blaste2g($est, $features,$self->{analysis});
}
return 1; } |
sub run_blaste2g
{ my ( $self, $est, $features,$analysis ) = @_;
my $count = @$features;
my $miniseq = $self->make_miniseq(@$features);
my $hseq = $self->get_Sequence($est) or throw("Can't fetch sequence for id '$est'");
my $g = $miniseq->get_cDNA_sequence;
my $eg = new Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome(
-genomic => $g,
-est => $hseq,
-analysis => $analysis,
);
$eg->run;
foreach my $fp ( @{$eg->output} ) {
my @converted = @{$miniseq->convert_FeaturePair($fp)};
if ( @converted > 1 ) {
warning "feature converts into '" . scalar(@converted) . "' > 1 features - ignoring\n";
} else {
my $new = $converted[0];
$fp->end( $new->end );
$fp->start( $new->start );
$self->add_output($fp);
}
} } |
sub seqfetcher
{ my( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::DB::RandomAccessI") || throw("Input isn't a Bio::DB::RandomAccessI");
$self->{'_seqfetcher'} = $value;
}
return $self->{'_seqfetcher'}; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
Title : get_all_FeaturesById
Usage : $hash = $self->get_all_FeaturesById;
Function: Returns a ref to a hash of features.
The keys to the hash are distinct feature ids
Returns : ref to hash of Bio::EnsEMBL::FeaturePair
Args : none
Title : minimum_intron
Usage :
Function: Defines minimum intron size for miniseq
Returns :
Args :
NAME - Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome | Top |