Bio::EnsEMBL::Analysis::Runnable::Finished EST
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Blast
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Analysis::Runnable::Finished::Blast
Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome
Bio::EnsEMBL::Analysis::Tools::BPliteWrapper
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
No synopsis!
Description
No description!
Methods
_make_seqfetcher
No description
Code
add_output
No description
Code
analysis
No description
Code
do_mini_est_genome
No description
Code
get_db_versionDescriptionCode
new
No description
Code
query
No description
Code
run
No description
Code
run_est_genome_on_strand
No description
Code
seqfetcher
No description
Code
unmasked
No description
Code
Methods description
get_db_versioncode    nextTop
    Title   :  get_db_version
[ distinguished from RunnableDB::*::db_version_searched() ]
Useage : $self->get_db_version('version string')
$obj->get_db_version()
Function: Get/Set a blast database version that was searched
The actual look up is done in Runnable::Finished_Blast
This is just a holding place for the string in this
module
Returns : String or undef
Args : String
Caller : $self::run()
RunnableDB::Finished_EST::db_version_searched()
Methods code
_make_seqfetcherdescriptionprevnextTop
sub _make_seqfetcher {
    my ( $self, $indices ) = @_;
    my $seqfetcher;
    if ( ref($indices) eq "ARRAY"){
        $seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::xdget->new(-db => $indices,
        															 -executable => '/software/farm/bin/xdget');
    }
    else{
        $seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::Pfetch->new();
    }
    return $seqfetcher;
}
add_outputdescriptionprevnextTop
sub add_output {
    my ( $self, $f ) = @_;
    my $ana = $self->analysis;
    $f->analysis($ana);
    $self->{'output'} ||= [];
    push ( @{ $self->{'output'} }, $f );
}
analysisdescriptionprevnextTop
sub analysis {
    my ( $self, $analysis ) = @_;
    if ($analysis) {
        $self->{'_analysis'} = $analysis;
    }
    return $self->{'_analysis'};
}
do_mini_est_genomedescriptionprevnextTop
sub do_mini_est_genome {
    my ( $self,$linear ) = @_;
    print STDERR "\nLinear Matches\n" if $verbose;
    foreach my $lin (@$linear) {
        print STDERR "before:  " . $lin->gffstring . "\n" if $verbose;
    }

    ### Is this merging features?  - May be cause of duplicate features if isn't
### Duplicate features found has been eliminated by setting the discard_overlaps to 1 , since the duplicates created were found to be due to many overlapping hits.
my $e2g = new Bio::EnsEMBL::Analysis::Runnable::Finished::MiniEst2Genome( '-genomic' => $self->unmasked, '-features' => $linear, '-seqfetcher' => $self->seqfetcher, '-analysis' => $self->analysis, ); $e2g->run; foreach my $fp ( @{$e2g->output} ) { print STDERR "after: " . $fp->gffstring . "\n" if $verbose; # source_tag and primary_tag have to be set to
# something, or validate method in FeaturePair
# (callled by RunnableDB) throws!
# $fp->feature2->source_tag('I_am_valid');
# $fp->feature2->primary_tag('I_am_valid');
$self->add_output($fp); }
}
get_db_versiondescriptionprevnextTop
sub get_db_version {
    my ($self, $arg) = @_;
    $self->{'_db_version_searched'} = $arg if $arg;
    return $self->{'_db_version_searched'};
}
1;

__END__
}
newdescriptionprevnextTop
sub new {
    my ( $class, @args ) = @_;
    my $self = bless {}, $class;
    my ( $query, $unmasked, $analysis, $seqfetcher ) = rearrange(
        [
        qw{
            QUERY
            UNMASKED
            ANALYSIS
            SEQFETCHER
        }
        ],
        @args
    );
    die "No QUERY (masked genomic sequence) given" unless $query;
    die "No UNMASKED (genomic sequence) given"     unless $unmasked;
    $self->query($query);
    $self->unmasked($unmasked);
    $self->analysis($analysis);

    # this should make a OBDAIndexSeqFetcher, but will default to Pfetch
my $indices = [ split(",", $self->analysis->db) ]; $seqfetcher = $self->_make_seqfetcher($indices); $self->seqfetcher($seqfetcher); return $self;
}
querydescriptionprevnextTop
sub query {
    my ( $self, $seq ) = @_;
    if ($seq) {
        unless ( $seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::Seq") ) {
            throw("Input isn't a Bio::Seq or Bio::PrimarySeq");
        }
        $self->{'_query'} = $seq;
    }
    return $self->{'_query'};
}
rundescriptionprevnextTop
sub run {
    my ($self) = shift;
    my $parser = shift;
    my $filter = shift;
    my $blastcon = shift;

    my $blast = Bio::EnsEMBL::Analysis::Runnable::Finished::Blast->new(
     -query => $self->query,
     -program => 'wublastn',
     -database => $self->analysis->db_file,
     -analysis => $self->analysis,
     -parser => $parser,
     -filter => $filter,
     %{$blastcon},
     );

    my $db_files = $blast->databases;

    # set the db_version_searched
my $dbv = $blast->get_db_version(); print "using $dbv\n"; $self->get_db_version($dbv); foreach my $db_file (sort @$db_files) { print STDOUT "database file $db_file\n"; $blast->clean_output(); $blast->clean_results_files(); $blast->clean_databases(); $blast->databases($db_file); $blast->run(); my $features = $blast->output ; if (!$self->analysis->db) { print STDOUT "indice file $db_file\n"; my $seqfetcher = $self->_make_seqfetcher([$db_file]); $self->seqfetcher($seqfetcher); } print STDERR "\nPlus strand est_genome\n" if $verbose; $self->run_est_genome_on_strand( 1, $features ); print STDERR "\nMinus strand est_genome\n" if $verbose; $self->run_est_genome_on_strand( -1, $features) }
}
run_est_genome_on_stranddescriptionprevnextTop
sub run_est_genome_on_strand {
    my ( $self, $strand, $feat ) = @_;
    my $hit_features = {};
    for ( my $i = 0 ; $i < @$feat ; $i++ ) {

      my $f   = $feat->[$i];
      my $hid = $f->hseqname or throw("Missing hid");
      next unless $f->strand == $strand;
      $hit_features->{$hid} ||= [];
      push ( @{ $hit_features->{$hid} }, $f );
    }
    my ($is_linear);
    if ( $strand == 1 ) {
        $is_linear = sub {
            my ( $x, $y ) = @_;
            return $x->hstart <= $y->hstart;
        };
    }
    else {
        $is_linear = sub {
            my ( $x, $y ) = @_;
            return $x->hend >= $y->hend;
        };
    }
    while ( my ( $hid, $flist ) = each %$hit_features ) {

        print STDERR "$hid\n" if $verbose;
        print STDERR "PRESORTED\n" if $verbose; #
foreach my $f (@$flist) { print STDERR "hit: " . $f->gffstring . "\n" if $verbose; } # Sort features by start/end in genomic
@$flist = sort { $a->start <=> $b->start or $a->end <=> $b->end } @$flist; print STDERR "SORTED\n" if $verbose; foreach my $f (@$flist) { print STDERR "hit: " . $f->gffstring . "\n" if $verbose; } # Group into linear matches
my @sets = ( [ $flist->[0] ] ); my $curr = $sets[0]; for ( my $i = 1 ; $i < @$flist ; $i++ ) { my $prev = $flist->[ $i - 1 ]; my $this = $flist->[$i]; if ( &$is_linear( $prev, $this ) ) { push ( @$curr, $this ); } else { $curr = [$this]; push ( @sets, $curr ); } } print STDERR "MADE ",scalar(@sets)," LINEAR MATCHES SET(S)\n" if $verbose; foreach my $lin (@sets) { $self->do_mini_est_genome($lin); } }
}
seqfetcherdescriptionprevnextTop
sub seqfetcher {
    my ( $self, $seqfetcher ) = @_;
    if ($seqfetcher) {
        $self->{'_seqfetcher'} = $seqfetcher;
    }
    return $self->{'_seqfetcher'};
}
unmaskeddescriptionprevnextTop
sub unmasked {
    my ( $self, $seq ) = @_;
    if ($seq) {
        unless ( $seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::Seq") ) {
            die Dumper($seq);
            throw("Input isn't a Bio::Seq or Bio::PrimarySeq");
        }
        $self->{'_unmasked'} = $seq;
    }
    return $self->{'_unmasked'};
}
General documentation
NAME - Bio::EnsEMBL::Analysis::Runnable::Finished_ESTTop
AUTHORTop
James Gilbert email jgrg@sanger.ac.uk
Modified by Sindhu K. Pillai email sp1@sanger.ac.uk