Bio::EnsEMBL::Analysis::Runnable
Genewise
Toolbar
Summary
Bio::EnsEMBL::Pipeline::Runnable::GeneWise - aligns protein hits to genomic sequence
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
# Initialise the genewise module
my $genewise = new Bio::EnsEMBL::Pipeline::Runnable::Genewise (
'genomic' => $genomic,
'protein' => $protein,
'memory' => $memory);
$genewise->run;
my @genes = $genewise->output
Description
Methods
check_environment | No description | Code |
endbias | No description | Code |
extension | No description | Code |
gap | No description | Code |
hmm | No description | Code |
make_transcript | Description | Code |
matrix | No description | Code |
memory | No description | Code |
new | No description | Code |
parse_genewise_output | Description | Code |
parse_results | No description | Code |
protein | No description | Code |
protfile | No description | Code |
reverse | No description | Code |
run | No description | Code |
run_analysis | No description | Code |
splice_model | No description | Code |
subs | No description | Code |
target_length | No description | Code |
verbose | No description | Code |
Methods description
Arg [1] : $exons_ref, reference to array of Bio::EnsEMBL::Feature Function : Turns array of exons into transcript & validates it. Returntype: Bio::EnsEMBL::Transcript Exceptions: Caller : Example : |
Arg [1] : $columns_ref, reference to array of strings Arg [2] : $curr_gene_ref, reference to scalar Arg [3] : $curr_exon_ref, reference to Bio::EnsEMBL::Feature Arg [4] : $exons_ref, reference to array of Bio::EnsEMBL::Feature Function : Parses genewise output lines and adds exons/supporting features to exons_ref Returntype: Exceptions: Caller : Example : |
Methods code
check_environment | description | prev | next | Top |
sub check_environment
{ my($self,$genefile) = @_;
if (! -d $ENV{WISECONFIGDIR}) {
throw("No WISECONFIGDIR [" . $ENV{WISECONFIGDIR} . "]");
}
}
1; } |
sub endbias
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_endbias'} = $arg;
}
if (!defined($self->{'_endbias'})) {
$self->{'_endbias'} = 0;
}
return $self->{'_endbias'}; } |
sub extension
{ my ($self,$arg) = @_;
if(!$self->{'_ext'}){
$self->{'_ext'} = undef;
}
if (defined($arg)) {
$self->{'_ext'} = $arg;
}
return $self->{'_ext'}; } |
sub gap
{ my ($self,$arg) = @_;
if(!$self->{'_gap'}){
$self->{'_gap'} = undef;
}
if (defined($arg)) {
$self->{'_gap'} = $arg;
}
return $self->{'_gap'}; } |
sub hmm
{ my ($self,$arg) = @_;
if (defined($arg)) {
open(HMM, $arg) or throw("[$arg] is not a readable file");
my $model_len;
while(<HMM>) {
/^LENG\s+(\d+)$/ and do {
$model_len = $1;
last;
}
}
close(HMM);
throw("[$arg] is not a HMM file in HMMER format") if not defined $model_len;
$self->target_length($model_len);
$self->{'_hmm'} = $arg;
}
return $self->{'_hmm'}; } |
sub make_transcript
{ my ($self, $exonsref) = @_;
my @exons = @$exonsref;
my $transcript = Bio::EnsEMBL::Transcript->new;
my $translation = Bio::EnsEMBL::Translation->new;
$transcript->translation($translation);
if ($#exons < 0) {
warning("Odd. No exons found\n");
return undef;
}
else {
if ($exons[0]->strand == -1) {
@exons = sort {$b->start <=> $a->start} @exons;
} else {
@exons = sort {$a->start <=> $b->start} @exons;
}
$translation->start_Exon($exons[0]);
$translation->end_Exon ($exons[$#exons]);
if ($exons[0]->phase == 0) {
$translation->start(1);
} elsif ($exons[0]->phase == 1) {
$translation->start(3);
} elsif ($exons[0]->phase == 2) {
$translation->start(2);
}
$translation->end ($exons[$#exons]->end - $exons[$#exons]->start + 1);
my ($min_start, $max_end, $min_hstart, $max_hend, $total_hcoverage, $hit_name);
foreach my $exon(@exons){
my ($sf) = @{$exon->get_all_supporting_features};
if (defined $sf) {
$hit_name = $sf->hseqname;
if (not defined $min_start or $sf->start < $min_start) {
$min_start = $sf->start;
}
if (not defined $max_end or $sf->hend > $max_end) {
$max_end = $sf->end;
}
if (not defined $min_hstart or $sf->hstart < $min_hstart) {
$min_hstart = $sf->hstart;
}
if (not defined $max_hend or $sf->hend > $max_hend) {
$max_hend = $sf->hend;
}
$total_hcoverage += $sf->hend - $sf->hstart + 1;
}
$transcript->add_Exon($exon);
}
my $tsf = Bio::EnsEMBL::FeaturePair->new();
$tsf->start($min_start);
$tsf->end($max_end);
$tsf->strand($transcript->strand);
$tsf->seqname($self->query->id);
$tsf->hseqname($hit_name);
$tsf->hstart($min_hstart);
$tsf->hend ($max_hend);
$tsf->hstrand (1);
$tsf->hcoverage(100 * ($total_hcoverage / $self->target_length));
$transcript->add_supporting_features($tsf);
}
return $transcript;
}
} |
sub matrix
{ my ($self,$arg) = @_;
if(!$self->{'_matrix'}){
$self->{'_matrix'} = undef;
}
if (defined($arg)) {
$self->{'_matrix'} = $arg;
}
return $self->{'_matrix'}; } |
sub memory
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_memory'} = $arg;
}
return $self->{'_memory'}; } |
sub new
{ my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($protein, $memory, $reverse,$endbias, $gap,
$ext, $subs, $matrix, $verbose, $hmm, $splice_model) = rearrange([qw(
PROTEIN
MEMORY
REVERSE
ENDBIAS
GAP
EXTENSION
SUBS
MATRIX
VERBOSE
HMM
SPLICE_MODEL)], @args);
$self->program('genewise') unless($self->program);
$self->memory(100000);
$self->gap(12);
$self->extension(2);
$self->subs(0.0000001);
$self->matrix('BLOSUM62.bla');
$self->options('-quiet') unless($self->options);
$self->protein($protein);
$self->hmm($hmm);
throw("Must have a hmm or a protein") if(!$self->protein && !$self->hmm);
throw("Must not have both a hmm ".$self->hmm." and a protein ".$self->protein)
if($self->protein && $self->hmm);
throw("Must have a query sequence defined")
if(!$self->query);
$self->reverse($reverse);
$self->endbias($endbias) if(defined $endbias);
$self->memory ($memory);
$self->gap($gap);
$self->extension($ext);
$self->subs($subs);
$self->matrix($matrix);
$self->splice_model($splice_model) if($splice_model);
$self->verbose($verbose);
return $self; } |
sub parse_genewise_output
{ my ($self, $fh) = @_;
my ($hit_name, @genes);
while(<$fh>) {
$self->verbose and print;
chomp;
if (/^Query\s+\S+:\s+(\S+)/) {
$hit_name = $1;
next;
}
my @l = split;
next unless (defined $l[0] && ($l[0] eq 'Gene' || $l[0] eq 'Exon' || $l[0] eq 'Supporting'));
if($l[0] eq 'Gene' and /^Gene\s+\d+$/){
push @genes, {
exons => [],
};
}
elsif($l[0] eq 'Exon'){
my $start = $l[1];
my $end = $l[2];
my $phase = $l[4];
my $strand = 1;
if($l[1] > $l[2]){
$strand = -1;
$start = $l[2];
$end = $l[1];
}
my $exon_length = $end - $start + 1;
my $end_phase = ( $exon_length + $phase ) %3;
my $exon = new Bio::EnsEMBL::Exon;
$exon->start ($start);
$exon->end ($end);
$exon->strand ($strand);
$exon->phase ($phase);
$exon->end_phase($end_phase);
if (not exists $genes[-1]->{strand}) {
$genes[-1]->{strand} = $exon->strand;
}
push @{$genes[-1]->{exons}}, {
ex => $exon,
sf => [],
};
}
elsif($l[0] eq 'Supporting') {
my $gstart = $l[1];
my $gend = $l[2];
my $pstart = $l[3];
my $pend = $l[4];
my $strand = 1;
if ($gstart > $gend){
$gstart = $l[2];
$gend = $l[1];
$strand = -1;
}
if($strand != $genes[-1]->{strand}) {
warning("incompatible strands between exon and supporting feature - cannot add suppfeat\n");
return;
}
if($pstart > $pend){
warning("Protein start greater than end! Skipping this suppfeat\n");
return;
}
my $fp = create_feature("Bio::EnsEMBL::FeaturePair", $gstart, $gend, $strand,
$pstart, $pend, 1, undef, undef, undef, $hit_name,
undef, $self->analysis);
if (not exists $genes[-1]->{start}) {
$genes[-1]->{start} = $fp->start;
$genes[-1]->{end} = $fp->end;
$genes[-1]->{hstart} = $fp->hstart;
$genes[-1]->{hend} = $fp->hend;
} else {
if ($genes[-1]->{start} > $fp->start) {
$genes[-1]->{start} = $fp->start;
}
if ($genes[-1]->{end} < $fp->end) {
$genes[-1]->{end} = $fp->end;
}
if ($genes[-1]->{hstart} > $fp->hstart) {
$genes[-1]->{hstart} = $fp->hstart;
}
if ($genes[-1]->{hend} < $fp->hend) {
$genes[-1]->{hend} = $fp->hend;
}
}
push @{$genes[-1]->{exons}->[-1]->{sf}}, $fp;
}
}
my @kept_genes;
foreach my $gene (@genes) {
if (not @kept_genes) {
push @kept_genes, $gene;
} else {
my @test_genes = (@kept_genes, $gene);
@test_genes = sort { $a->{hstart} <=> $b->{hstart} } @test_genes;
my $bad = 0;
for(my $i=1; not $bad and $i < @test_genes; $i++) {
my $this = $test_genes[$i];
my $prev = $test_genes[$i-1];
if ($this->{strand} != $prev->{strand}) {
$bad = 1;
} elsif ($this->{hstart} <= $prev->{hend}) {
$bad = 1;
} elsif ($this->{strand} > 0 and $this->{start} <= $prev->{end}) {
$bad = 1;
} elsif ($this->{strand} < 0 and $this->{end} >= $prev->{start}) {
$bad = 1;
}
}
if (not $bad) {
push @kept_genes, $gene;
}
}
}
my @exons;
foreach my $g (@kept_genes) {
foreach my $entry (@{$g->{exons}}) {
my ($exon, @sfs) = ($entry->{ex}, @{$entry->{sf}});
if (@sfs) {
my $align = new Bio::EnsEMBL::DnaPepAlignFeature(-features =>\@ sfs);
$align->seqname($self->query->id);
$align->score(100);
$exon->add_supporting_features($align);
}
push @exons, $exon;
}
}
return @exons; } |
sub parse_results
{ my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
open(GW, $results) or throw("Failed to open ".$results);
my @genesf_exons = $self->parse_genewise_output(\*GW);
close(GW) or throw("Failed to close ".$results);
my $transcript = $self->make_transcript(\@genesf_exons);
if(defined $transcript){
$self->output([$transcript]);
}else{
warning("No valid transcript made, cannot make gene for ".$self->protein->id);
} } |
sub protein
{ my ($self,$arg) = @_;
if (defined($arg)) {
throw("[$arg] is not a Bio::SeqI or Bio::Seq or Bio::PrimarySeqI") unless
($arg->isa("Bio::SeqI") ||
$arg->isa("Bio::Seq") ||
$arg->isa("Bio::PrimarySeqI"));
$self->{'_protein'} = $arg;
$self->target_length($arg->length);
}
return $self->{'_protein'}; } |
sub protfile
{ my ($self, $arg) = @_;
if($arg){
$self->{'protfile'} = $arg;
}
return $self->{'protfile'}; } |
sub reverse
{ my ($self,$arg) = @_;
if (defined($arg)) {
$self->{'_reverse'} = $arg;
}
return $self->{'_reverse'}; } |
sub run
{ my ($self, $dir) = @_;
$self->workdir($dir) if($dir);
$self->checkdir();
my $seqfile = $self->write_seq_file;
$self->files_to_delete($seqfile);
if($self->protein){
my $protfile = $self->create_filename("pep", "fa");
$self->write_seq_file($self->protein, $protfile);
$self->protfile($protfile);
$self->files_to_delete($protfile);
}
$self->resultsfile($self->create_filename("results", "out"));
$self->files_to_delete($self->resultsfile);
$self->run_analysis;
$self->parse_results;
$self->delete_files; } |
sub run_analysis
{ my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
$self->check_environment;
throw($program." is not executable Runnable::Genewise::run_analysis ")
unless($program && -x $program);
my $options = " -genesf -kbyte ".$self->memory." -ext ".
$self->extension." -gap ".$self->gap." -subs ".$self->subs." -m ".
$self->matrix." ".$self->options;
if($self->endbias == 1){
$options =~ s/-init endbias//;
$options =~ s/-splice flat//;
$options =~ s/-splice_gtag//;
if (!$self->splice_model || $self->splice_model == 0){
print "USING STANDARD SPLICE MODEL\n";
$options .= " -init endbias -splice_gtag ";
}elsif ($self->splice_model == 1){
print "USING ALTERNATIVE SPLICE MODEL\n";
$options .= " -splice model ";
}
}
if (($self->reverse) && $self->reverse == 1) {
$options .= " -trev ";
}
my $command = $self->program." ";
if($self->protein){
$command .= $self->protfile." ".$self->queryfile." ".$options;
}elsif($self->hmm){
$command .= $options." -hmmer ".$self->hmm." ".$self->queryfile;
}else{
throw("Seem to be without either a hmm or a protein sequence");
}
$command .= " > ".$self->resultsfile;
logger_info($command);
print "MMG COMMAND for ".$self->protein->id." ".$self->query->id." ".
$command."\n";
system($command) == 0 or throw("FAILED to run ".$command); } |
sub splice_model
{ my ($self, $arg) = @_;
if(!$self->{'_splice_model'}){
$self->{'_splice_model'} = undef;
}
if ($arg){
$self->{'_splice_model'} = $arg;
}
return $self->{'_splice_model'}; } |
sub subs
{ my ($self,$arg) = @_;
if(!$self->{'_subs'}){
$self->{'_subs'} = undef;
}
if (defined($arg)) {
$self->{'_subs'} = $arg;
}
return $self->{'_subs'}; } |
sub target_length
{ my ($self, $arg) = @_;
if (defined $arg) {
$self->{'_target_length'} = $arg;
}
return $self->{'_target_length'}; } |
sub verbose
{ my ($self,$arg) = @_;
if(not exists $self->{'_verbose'}){
$self->{'_verbose'} = 0;
}
if (defined($arg)) {
$self->{'_verbose'} = $arg;
}
return $self->{'_verbose'}; } |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _