Bio::EnsEMBL::Analysis::Runnable::Finished
Est2Genome
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome->new(
-genomic => $genseq,
-est => $estseq
);
or
my $obj = Bio::EnsEMBL::Analysis::Runnable::Finished::Est2Genome->new()
Description
Object to store the details of an est2genome run.
Stores the est2genome matches as an array of Bio::EnsEMBL::FeaturePair
new,
genomic_sequence,
est_sequence,
run,
output.
Methods
Methods description
Title : arguments Usage : $obj->arguments(' -reverse '); Function: Get/set method for est_genome arguments Args : File path (optional) |
Title : est_sequence Usage : $self->est_sequence($seq) Function: Get/set method for est sequence Returns : Bio::Seq object Args : Bio::Seq object |
Title : genomic_sequence Usage : $self->genomic_sequence($seq) Function: Get/set method for genomic sequence Returns : Bio::Seq object Args : Bio::Seq object |
Title : new Usage : $self->new(-GENOMIC => $genomicseq, -EST => $estseq, -E2G => $executable, -ARGS => $args); Function: creates a Bio::EnsEMBL::Analysis::Runnable::Est2Genome object Returns : A Bio::EnsEMBL::Analysis::Runnable::Est2Genome object Args : -genomic: Bio::PrimarySeqI object (genomic sequence) -est: Bio::PrimarySeqI object (est sequence), -e2g: Path to Est2Genome executable -args: Arguments when running est2genome |
Title : run Usage : $self->run() or $self->run("genomic.seq", "est.seq") Function: Runs est2genome and stores results as FeaturePairs Returns : TRUE on success, FALSE on failure. Args : Temporary filenames for genomic and est sequences |
Methods code
sub Exon
{ my ($self) = shift;
my $p = {
-score => undef,
-percent_id => undef,
-gen_start => undef,
-gen_end => undef,
-gen_id => undef,
-est_start => undef,
-est_end => undef,
-est_id => undef,
@_
};
$self->_get_add_exon_lines(
[ $p->{-gen_start}, $p->{-gen_end}, $p->{-score}, $p->{-percent_id} ] ); } |
sub Segment
{ my ($self) = shift;
my $p = {
-score => undef,
-percent_id => undef,
-gen_start => undef,
-gen_end => undef,
-gen_id => undef,
-est_start => undef,
-est_end => undef,
-est_id => undef,
-primary => undef,
@_
};
if ( $p->{-gen_end} < $p->{-gen_start} ) {
( $p->{-gen_end}, $p->{-gen_start} ) =
( $p->{-gen_start}, $p->{-gen_end} );
}
if ( $p->{-est_end} < $p->{-est_start} ) {
( $p->{-est_end}, $p->{-est_start} ) =
( $p->{-est_start}, $p->{-est_end} );
}
my $fp = $self->_create_FeaturePair(
$p->{-score}, $p->{-percent_id},
$p->{-gen_start}, $p->{-gen_end}, $p->{-gen_id},
$p->{-est_start}, $p->{-est_end}, $p->{-est_id},
$self->{'_source_tag'},
$self->gen_strand,
$self->est_strand, $p->{-primary}
);
$self->_add_segment_to_exon( $fp, $p ); } |
sub _add_fp_to_exon
{
my $self = shift;
my $fp = shift;
my $exon = shift;
if ( defined($exon) && defined($fp) ) {
$self->{'_exons'}->{$exon} ||= [];
push( @{ $self->{'_exons'}->{$exon} }, $fp );
} } |
sub _add_segment_to_exon
{
my ( $self, $fp, $p ) = @_;
my $exon_line_count = scalar( @{ $self->{'_exon_lines'} } );
for ( my $i = 0 ; $i < $exon_line_count ; $i++ ) {
my ( $start, $end, $score, $pid ) =
( @{ $self->{'_exon_lines'}->[$i] } );
next if ( $p->{-gen_start} < $start || $p->{-gen_start} > $end );
if ( my $prev_fp = $self->_get_last_fp_of_exon($i) ) {
if ( $fp->start eq ( $prev_fp->end + $fp->strand )
|| $fp->end eq ( $prev_fp->start + $fp->strand )
|| $fp->hstart eq ( $prev_fp->hend + $fp->hstrand )
|| $fp->hend eq ( $prev_fp->hstart + $fp->hstrand ) )
{
$fp->score($score);
$fp->percent_id($pid);
$self->_add_fp_to_exon( $fp, $i );
}
else {
$self->_change_exon_line( $i,
[ $start, $prev_fp->end, $score, $pid ] );
my $new_exon_lines =
$self->_get_add_exon_lines(
[ $fp->start - 1, $end, $score, $pid ] );
$fp->score($score);
$fp->percent_id($pid);
$self->_add_fp_to_exon( $fp,
( scalar( @{$new_exon_lines} ) - 1 ) );
}
}
else {
$fp->score($score);
$fp->percent_id($pid);
$self->_add_fp_to_exon( $fp, $i );
}
} } |
sub _change_exon_line
{
my $self = shift;
my $index = shift;
my $exon_line = shift;
if ( defined($index) && @{$exon_line} ) {
$self->{'_exon_lines'}->[$index] = $exon_line;
}
return $self->{'_exon_lines'}; } |
sub _create_FeaturePair
{
my (
$self, $f1score, $f1percent_id, $f1start, $f1end,
$f1id, $f2start, $f2end, $f2id, $f1source,
$f1strand, $f2strand, $f1primary
)
= @_;
my $analysis_obj = new Bio::EnsEMBL::Analysis(
-db => "none",
-db_version => "none",
-program => "est_genome",
-program_version => "none",
-gff_source => $f1source,
-gff_feature => $f1primary,
);
my $feat1 = new Bio::EnsEMBL::SeqFeature(
-start => $f1start,
-end => $f1end,
-seqname => $f1id,
-strand => $f1strand,
-score => $f1score,
-percent_id => $f1percent_id,
-source_tag => $f1source,
-primary_tag => $f1primary,
-analysis => $analysis_obj
);
my $feat2 = new Bio::EnsEMBL::SeqFeature(
-start => $f2start,
-end => $f2end,
-seqname => $f2id,
-strand => $f2strand,
-score => $f1score,
-percent_id => $f1percent_id,
-source_tag => $f1source,
-primary_tag => $f1primary,
-analysis => $analysis_obj
);
my $fp = new Bio::EnsEMBL::FeaturePair(
-feature1 => $feat1,
-feature2 => $feat2
);
return $fp; } |
sub _createfeatures
{ my (
$self, $f1score, $f1start, $f1end, $f1id,
$f2start, $f2end, $f2id, $f1source, $f2source,
$f1strand, $f2strand, $f1primary, $f2primary
)
= @_;
my $analysis_obj = new Bio::EnsEMBL::Analysis(
-db => "none",
-db_version => "none",
-program => "est_genome",
-program_version => "none",
-gff_source => $f1source,
-gff_feature => $f1primary,
);
my $feat1 = new Bio::EnsEMBL::SeqFeature(
-start => $f1start,
-end => $f1end,
-seqname => $f1id,
-strand => $f1strand,
-score => $f1score,
-percent_id => $f1score,
-source_tag => $f1source,
-primary_tag => $f1primary,
-analysis => $analysis_obj
);
my $feat2 = new Bio::EnsEMBL::SeqFeature(
-start => $f2start,
-end => $f2end,
-seqname => $f2id,
-strand => $f2strand,
-score => $f1score,
-percent_id => $f1score,
-source_tag => $f2source,
-primary_tag => $f2primary,
-analysis => $analysis_obj
);
my $fp = new Bio::EnsEMBL::FeaturePair(
-feature1 => $feat1,
-feature2 => $feat2
);
push( @{ $self->{'_fplist'} }, $fp ); } |
sub _createfiles
{ my ( $self, $genfile, $estfile, $dirname ) = @_;
my $spacelimit = 0.01; my $dir = $dirname;
unless ( $self->_diskspace( $dir, $spacelimit ) ) {
throw("Not enough disk space ($spacelimit Gb required)");
}
$genfile = $self->_getname("genfile") unless ($genfile);
$estfile = $self->_getname("estfile") unless ($estfile);
$genfile = $dirname . $genfile;
$estfile = $dirname . $estfile;
throw("No directory $dirname") unless -e $dirname;
return ( $genfile, $estfile ); } |
sub _deletefiles
{ my ( $self, @files ) = @_;
my $unlinked = unlink(@files);
if ( $unlinked == @files ) {
return 1;
}
else {
my @fails = grep -e, @files;
warning("Failed to remove @fails : $!\n");
} } |
sub _diskspace
{ my ( $self, $dir, $limit ) = @_;
my $Gb = 1024**3;
open DF, "df -k $dir |" || throw("FAILED to open 'df' pipe ".
"Runnable::diskspace : $!\n");
my $count = 0;
my $status = 1;
while (<DF>) {
if($count && $count > 0){
my @values = split;
my $space_in_Gb = $values[3] * 1024 / $Gb; $status = 0 if ($space_in_Gb < $limit);
}
$count++;
}
close DF || throw("FAILED to close 'df' pipe ".
"Runnable::diskspace : $!\n");
return $status; } |
sub _get_add_exon_lines
{
my $self = shift;
my $exon_line = shift;
if ( @{$exon_line} ) {
push( @{ $self->{'_exon_lines'} }, $exon_line )
if scalar( @{$exon_line} ) == 4;
}
return $self->{'_exon_lines'}; } |
sub _get_last_fp_of_exon
{
my $self = shift;
my $exon = shift;
my $last = 0;
if ( defined($exon) ) {
$self->{'_exons'}->{$exon} ||= [];
$last = scalar( @{ $self->{'_exons'}->{$exon} } );
$last = $last - 1 if $last;
}
return $self->{'_exons'}->{$exon}->[$last] || undef; } |
sub _getname
{ my ( $self, $typename ) = @_;
return $typename . "_" . $$ . ".fn"; } |
sub add_output
{
my ( $self, $fp ) = @_;
push( @{ $self->{'_output'} }, $fp ) if defined($fp); } |
sub arguments
{ my ( $self, $args ) = @_;
if ($args) {
$self->{'_arguments'} = $args;
}
return $self->{'_arguments'};
}
} |
sub convert_output
{ my ($self) = @_;
my @genes;
my @exons;
my @supp_feat;
foreach my $f ( @{ $self->{'_fplist'} } ) {
print "Feature " . $f->gffstring . "\n";
if ( $f->primary_tag eq 'Span' ) {
push( @genes, $f );
}
elsif ( $f->primary_tag eq 'Exon' ) {
push( @exons, $f );
}
elsif ( $f->primary_tag eq 'Segment' ) {
push( @supp_feat, $f );
}
}
foreach my $ex (@exons) {
my $added = 0;
foreach my $g (@genes) {
if ( $ex->start >= $g->start
&& $ex->end <= $g->end
&& $ex->strand == $g->strand
&& !$added )
{
$g->feature1->add_sub_SeqFeature( $ex, '' );
$added = 1;
}
}
warning("Exon $ex could not be added to a gene ...\n") unless $added;
}
foreach my $sf (@supp_feat) {
my $added = 0;
foreach my $e (@exons) {
if ( $sf->start >= $e->start
&& $sf->end <= $e->end
&& $sf->strand == $e->strand
&& !$added )
{
$e->feature1->add_sub_SeqFeature( $sf, '' );
$added = 1;
}
}
warning("Feature $sf could not be added to an exon ...\n")
unless $added;
}
push( @{ $self->{'_output'} }, @genes ); } |
sub est_genome
{ my ( $self, $arg ) = @_;
if ( defined($arg) ) {
$self->{'_est_genome'} = $arg;
}
return $self->{'_est_genome'};
}
1; } |
sub est_sequence
{ my ( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::PrimarySeqI")
|| throw("Input isn't a Bio::PrimarySeqI");
$self->{'_est_sequence'} = $value;
$self->estfilename( $value->id . ".$$.est.seq" );
}
return $self->{'_est_sequence'}; } |
sub est_strand
{
my ( $self, $sign ) = @_;
if ( defined($sign) && ( $sign eq '1' || $sign eq '-1' ) ) {
$self->{'_est_strand'} = $sign;
}
return $self->{'_est_strand'}; } |
sub estfilename
{ my ( $self, $estfilename ) = @_;
$self->{'_estfilename'} = $estfilename if ($estfilename);
return $self->{'_estfilename'}; } |
sub filename
{ my ( $self, $filename ) = @_;
$self->{'_filename'} = $filename if ($filename);
return $self->{'_filename'}; } |
sub gen_strand
{
my ( $self, $sign ) = @_;
if ( defined($sign) && ( $sign eq '1' || $sign eq '-1' ) ) {
$self->{'_gen_strand'} = $sign;
}
return $self->{'_gen_strand'}; } |
sub genomic_sequence
{ my ( $self, $value ) = @_;
if ($value) {
$value->isa("Bio::PrimarySeqI")
|| throw("Input isn't a Bio::PrimarySeqI");
$self->{'_genomic_sequence'} = $value;
$self->filename( $value->id . ".$$.seq" );
$self->results( $self->filename . ".est_genome.out" );
}
return $self->{'_genomic_sequence'}; } |
sub new
{
my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_fplist'} = []; $self->{'_clone'} = undef; $self->{'_est_genome'} = undef; $self->{'_workdir'} = undef; $self->{'_filename'} = undef; $self->{'_estfilename'} = undef; $self->{'_results'} = undef; $self->{'_protected'} = []; $self->{'_arguments'} = undef;
my ( $genomic, $est, $est_genome, $arguments ) =
rearrange( [qw(GENOMIC EST E2G ARGS)], @args );
$self->genomic_sequence($genomic) if $genomic;
$self->est_sequence($est) if $est;
if ($est_genome) {
$self->est_genome($est_genome);
}
else {
eval { $self->est_genome( $self->locate_executable('est2genome') ); };
throw("Can't find executable") if $@;
}
if ($arguments) {
$self->arguments($arguments);
}
else {
$self->arguments(' -reverse ');
}
return $self;
}
} |
sub output
{ my ($self) = @_;
return $self->{'_output'} || []; } |
sub results
{ my ( $self, $filename ) = @_;
$self->{'_results'} = $filename if ($filename);
return $self->{'_results'}; } |
sub run
{
my ( $self, @args ) = @_;
$self->{'_source_tag'} = "est2genome";
my $dirname = "/tmp/";
my $genomicseq = $self->genomic_sequence
|| throw("Genomic sequence not provided");
my $estseq = $self->est_sequence || throw("EST sequence not provided");
my ( $genname, $estname ) = rearrange( [ 'genomic', 'est' ], @args );
my ( $genfile, $estfile ) =
$self->_createfiles( $genname, $estname, $dirname );
{
my $genOutput =
Bio::SeqIO->new( -file => ">$genfile", '-format' => 'Fasta' )
or throw("Can't create new Bio::SeqIO from $genfile '$' : $!");
my $estOutput =
Bio::SeqIO->new( -file => ">$estfile", '-format' => 'Fasta' )
or throw("Can't create new Bio::SeqIO from $estfile '$' : $!");
$genOutput->write_seq( $genomicseq );
$estOutput->write_seq( $estseq );
}
my $est_genome_command = $BIN_DIR
. "/est2genome -reverse -genome $genfile -est $estfile -space 500000 -out stdout 2>&1 |";
print STDERR "\nRunning command $est_genome_command\n" if $verbose ;
unless ( open( ESTGENOME, $est_genome_command ) ) {
$self->_deletefiles( $genfile, $estfile );
throw("Can't open pipe from '$est_genome_command' : $!");
}
my $firstline = <ESTGENOME>;
print STDERR "firstline:\t $firstline" if $verbose;
if ( $firstline =~ m/Align EST and genomic DNA sequences/ ) {
# Catch 'Align EST and genomic DNA sequences'. This comes from STDERR!! [ 2>&1 ] $firstline = <ESTGENOME>; print STDERR "\$firstline (secondline!):\t $firstline" if $verbose;
if ( $firstline =~ /insufficient memory available/ ) {
close(ESTGENOME) or warning("problem closing est_genome: $!\n");
$self->_deletefiles( $genfile, $estfile );
die qq{"OUT_OF_MEMORY"\n};
}
}
if ( $firstline =~ m/reversed\sest/ ) { $self->est_strand(-1); }
else {
$self->est_strand(1);
}
if ( $firstline =~ m/forward\sgenome/ ) { $self->gen_strand(1); }
else {
$self->gen_strand(-1);
}
while (<ESTGENOME>) {
print STDERR $_ if $verbose;
if ( $_ =~ /^Segment|^Exon/ ) {
my (
$primary, $score, $percent_id, $gen_start, $gen_end,
$gen_id, $est_start, $est_end, $est_id
)
= (split)[ 0, 1, 2, 3, 4, 5, 6, 7, 8 ];
$self->$primary(
-score => $score,
-percent_id => $percent_id,
-gen_start => $gen_start,
-gen_end => $gen_end,
-gen_id => $gen_id,
-est_start => $est_start,
-est_end => $est_end,
-est_id => $est_id,
-primary => $primary
);
}
elsif ( $_ =~ /Segmentation fault/ ) {
close(ESTGENOME) or warning("problem closing est_genome: $!\n");
$self->_deletefiles( $genfile, $estfile );
throw("Segmentation fault from est_genome:\n<$_>\n");
}
elsif ( $_ =~ /(ERROR:.+)/ ) {
close(ESTGENOME) or warning("problem closing est_genome: $!\n");
$self->_deletefiles( $genfile, $estfile );
throw("Error from est_genome:\n<$_>\n");
}
}
foreach my $seg_array ( keys( %{ $self->{'_exons'} } ) ) {
my $dnafp =
Bio::EnsEMBL::DnaDnaAlignFeature->new(
-features => $self->{'_exons'}->{$seg_array} );
$self->add_output($dnafp);
}
if ( !close(ESTGENOME) ) {
$self->_deletefiles( $genfile, $estfile );
throw("Problems running est_genome when closing pipe: $!\n");
}
$self->_deletefiles( $genfile, $estfile );
return 1; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _