Bio::EnsEMBL::Analysis::RunnableDB ExonerateSolexa
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateSolexa
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::ExonerateSolexa
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateAlignFeature
Bio::EnsEMBL::Analysis::Tools::Utilities
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateAlignFeature
Synopsis
my $runnableDB = Bio::EnsEMBL::Analysis::RunnableDB::ExonerateSolexa->new(
-db => $refdb,
-analysis => $analysis_obj,
);
$runnableDB->fetch_input();
$runnableDB->run();
$runnableDB->write_output(); #writes to DB
Description
Extends Bio::EnsEMBL::Analysis::RunnableDB::ExonerateAlignFeature to allow
use of compressed dna align features, useful when aligning millions of short
Solexa reads
Methods
BIOTYPE
No description
Code
COMPRESSION
No description
Code
INTRON_MODELS
No description
Code
INTRON_OVERLAP
No description
Code
MISSMATCH
No description
Code
OUT_DB
No description
Code
PAIR
No description
Code
PAIREDEND
No description
Code
PAIREDGAP
No description
Code
PROJECT
No description
Code
TRANSDB
No description
Code
filter_solexa
No description
Code
merge_pair
No description
Code
new
No description
Code
pair_features
No description
Code
run
No description
Code
write_outputDescriptionCode
Methods description
write_outputcode    nextTop
  Arg [1]   : array ref of Bio::EnsEMBL::DnaDnaAlignFeature
Function : Overrides the write_output method from the superclass
to allow use of compressed DnaAlignFeatures
Returntype: 1
Exceptions: Throws if the feature cannot be stored
Methods code
BIOTYPEdescriptionprevnextTop
sub BIOTYPE {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_BIOTYPE'} = $value;
  }
  
  if (exists($self->{'_CONFIG_BIOTYPE'})) {
    return $self->{'_CONFIG_BIOTYPE'};
  } else {
    return undef;
  }
}
COMPRESSIONdescriptionprevnextTop
sub COMPRESSION {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_COMPRESSION'} = $value;
  }
  
  if (exists($self->{'_CONFIG_COMPRESSION'})) {
    return $self->{'_CONFIG_COMPRESSION'};
  } else {
    return undef;
  }
}
INTRON_MODELSdescriptionprevnextTop
sub INTRON_MODELS {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_INTRON_MODELS'} = $value;
  }
  
  if (exists($self->{'_CONFIG_INTRON_MODELS'})) {
    return $self->{'_CONFIG_INTRON_MODELS'};
  } else {
    return undef;
  }
}
INTRON_OVERLAPdescriptionprevnextTop
sub INTRON_OVERLAP {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_INTRON_OVERLAP'} = $value;
  }
  
  if (exists($self->{'_CONFIG_INTRON_OVERLAP'})) {
    return $self->{'_CONFIG_INTRON_OVERLAP'};
  } else {
    return undef;
  }
}
MISSMATCHdescriptionprevnextTop
sub MISSMATCH {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_MISSMATCH'} = $value;
  }
  
  if (exists($self->{'_CONFIG_MISSMATCH'})) {
    return $self->{'_CONFIG_MISSMATCH'};
  } else {
    return undef;
  }
}

1;
}
OUT_DBdescriptionprevnextTop
sub OUT_DB {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_OUT_DB'} = $value;
  }
  
  if (exists($self->{'_CONFIG_OUT_DB'})) {
    return $self->{'_CONFIG_OUT_DB'};
  } else {
    return undef;
  }
}
PAIRdescriptionprevnextTop
sub PAIR {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_PAIR'} = $value;
  }
  
  if (exists($self->{'_CONFIG_PAIR'})) {
    return $self->{'_CONFIG_PAIR'};
  } else {
    return undef;
  }
}
PAIREDENDdescriptionprevnextTop
sub PAIREDEND {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_PAIREDEND'} = $value;
  }
  
  if (exists($self->{'_CONFIG_PAIREDEND'})) {
    return $self->{'_CONFIG_PAIREDEND'};
  } else {
    return undef;
  }
}
PAIREDGAPdescriptionprevnextTop
sub PAIREDGAP {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_PAIREDGAP'} = $value;
  }
  
  if (exists($self->{'_CONFIG_PAIREDGAP'})) {
    return $self->{'_CONFIG_PAIREDGAP'};
  } else {
    return undef;
  }
}
PROJECTdescriptionprevnextTop
sub PROJECT {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_PROJECT'} = $value;
  }
  
  if (exists($self->{'_CONFIG_PROJECT'})) {
    return $self->{'_CONFIG_PROJECT'};
  } else {
    return undef;
  }
}
TRANSDBdescriptionprevnextTop
sub TRANSDB {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_TRANSDB'} = $value;
  }
  
  if (exists($self->{'_CONFIG_TRANSDB'})) {
    return $self->{'_CONFIG_TRANSDB'};
  } else {
    return undef;
  }
}
filter_solexadescriptionprevnextTop
sub filter_solexa {
  my ($self,$features_ref) = @_;
  my @features = @$features_ref;
  my @filtered_features;
  # allow no more than MISSMATCH missmatches and
# dont allow gapping...
foreach my $feat ( @features ) { # dont allow gapping
if ( $feat->cigar_string =~ /[ID]/ ) { # gapped reject it
next } # check missmatches
if ( $self->MISSMATCH ) { my $aligned_length = abs($feat->hend - $feat->hstart) +1; # print $feat->hseqname ." ";
# print $feat->percent_id . " " ;
# print " aligned length = $aligned_length ";
my $matches = $aligned_length * $feat->percent_id / 100;
# print " matches $matches ";
my $missmatches = ( $aligned_length - $matches) / $aligned_length * 100;
# print " missmatches $missmatches \n";
next if $missmatches > $self->MISSMATCH; # print "accepting\n";
} push @filtered_features, $feat; } return\@ filtered_features;
}
merge_pairdescriptionprevnextTop
sub merge_pair {
  my ( $self, $read_pair) = @_;
  my @ugfs;
  my @features;
  if ( $self->PAIR ) {
    # Use the combined scores and identity  of the reads for the new read
my $score = $read_pair->[0]->score + $read_pair->[1]->score; my $pid = ( $read_pair->[0]->percent_id + $read_pair->[1]->percent_id ) /2;
# Because the features are split you can get the second half of
# the read aligning before the first which can screw up the
# hit start ends as you might expect
# I am  going to flip a and b over
if ( $read_pair->[1]->end <= $read_pair->[0]->start ) { my $tmp = $read_pair->[0]; $read_pair->[0] = $read_pair->[1]; $read_pair->[1] = $tmp; } for ( my $i = 0 ; $i <=1 ; $i++ ) { my $read = $read_pair->[$i]; if ( $read->hseqname =~ /(\S+):([a,b])$/ ) { $read->hseqname($1); } if ( $read->hseqname =~ /(\S+):([A,B])$/ ) { $read->hseqname($1 .":3p"); } foreach my $ugf ( $read->ungapped_features ) { # need to modify the 'b' read hit starts ends to make it
# appear to come from one long read
# neeed to flip it onto the forward strand or else
# the dna align feature will fall over
my $hstart = $ugf->hstart ; my $hend = $ugf->hend ; if ( $i == 1 ) { if ( $ugf->hstrand == -1 ) { $ugf->hstrand(1); } } else { $ugf->hstrand(1) if $ugf->hstrand == -1; } $ugf->score($score); $ugf->percent_id($pid); push @ugfs, $ugf; } } my $feat = new Bio::EnsEMBL::DnaDnaAlignFeature(-features =>\@ ugfs); $feat->analysis($self->analysis); $feat->hstart($read_pair->[0]->hstart); $feat->hend($read_pair->[0]->hend+$read_pair->[1]->hend); push @features,$feat; } else { $read_pair->[0]->hseqname($read_pair->[0]->hseqname . ":aa"); $read_pair->[1]->hseqname($read_pair->[1]->hseqname . ":bb"); push @features,($read_pair->[0],$read_pair->[1]); } # Otherwise leave them as they are but change the a and b suffix to indictate that they have
# been paired but are not joined together.
return\@ features;
}
newdescriptionprevnextTop
sub new {
  my ( $class, @args ) = @_;
  my $self = $class->SUPER::new(@args);

  $self->read_and_check_config($EXONERATE_SOLEXA_CONFIG_BY_LOGIC);

  return $self;
}
pair_featuresdescriptionprevnextTop
sub pair_features {
  my ($self,$features_ref) = @_;
  my @features = @$features_ref;
  my $paired_features;
  my @selected_reads;
  foreach my $feat ( @features ) {
    if ( $feat->hseqname =~ /(\S+):([a,b,A,B])$/ ) {
      my $suffix = lc($2);
      push @{$paired_features->{$1}->{$suffix}},$feat;
    } else {
      $self->throw("Read name not recognised " . $feat->hseqname . "\n");
    }
  }

  foreach my $pair ( keys %$paired_features ) {
    my ( @as, @bs ) ;
    @as =  @{$paired_features->{$pair}->{'a'}} if $paired_features->{$pair}->{'a'};
    @bs =  @{$paired_features->{$pair}->{'b'}} if $paired_features->{$pair}->{'b'};
    my @potential_pairs;
    # Pick the potential paired reads bases on chr positon
foreach my $afeat ( @as ) { READ: foreach my $bfeat ( @bs ) { if ( $afeat->seqname eq $bfeat->seqname ) { # They lie on the same chr within PAIREDGAP bases of each other
# but they don't overlap
if ( ( $afeat->start > $bfeat->end && $afeat->start - $bfeat->end <= $self->PAIREDGAP ) or ( $bfeat->start > $afeat->end && $bfeat->start - $afeat->end <= $self->PAIREDGAP ) ) { # they should be oriented pointing towards each other on
# opposite strands...
if ( $afeat->strand == 1 && $bfeat->strand == 1 && $afeat->hstrand != $bfeat->hstrand ) { # are they in the right order
# on the forward strand a should come before b
# and vice versa
next READ unless ( ($afeat->hstrand == 1 && $afeat->end < $bfeat->start) or ($afeat->hstrand == -1 && $bfeat->end < $afeat->start) ); } # storing the pair using the score as the index
# so I can easily pull out the highest scoring pair
# if they score the SAME then choose the pair that is
# closest together
my $combined_score = $afeat->score + $bfeat->score; # work out the lengths the pairs are separated by
if ( $potential_pairs[$combined_score] ) { my $new_length; my $old_length; my $old_afeat = $potential_pairs[$combined_score]->[0]; my $old_bfeat = $potential_pairs[$combined_score]->[1]; if ( $afeat->start > $bfeat->end ) { $new_length = $afeat->start - $bfeat->end; } else { $new_length = $bfeat->start - $afeat->end; } if ( $old_afeat->start > $old_bfeat->end ) { $old_length = $old_afeat->start - $old_bfeat->end; } else { $old_length = $old_bfeat->start - $old_afeat->end; } # only replace the pair if the new pair with the same combined score
# are closer together ( hopefully this might stop clusters getting
# tangled up
if ( $old_length > $new_length ) { $potential_pairs[$combined_score] = [($afeat,$bfeat)] ; } } else { $potential_pairs[$combined_score] = [($afeat,$bfeat)] ; } } } } } if ( scalar(@potential_pairs) >= 1 ){ # lets join together the highest scoring pair of reads to make one new feature
push @selected_reads, @{$self->merge_pair(pop @potential_pairs)} if scalar(@potential_pairs) >= 1; } else { if ( scalar(@as) >= 1 ) { @as = sort { $a->score <=> $b->score } @as ; my $selected = pop @as; if ( $selected->hseqname =~ /(\S+):([A])$/ ) { $selected->hseqname($1 .":a3p"); } push @selected_reads, $selected; } if ( scalar(@bs) >= 1 ) { @bs = sort { $a->score <=> $b->score } @bs ; my $selected = pop @bs; if ( $selected->hseqname =~ /(\S+):([B])$/ ) { $selected->hseqname($1 .":b3p"); } push @selected_reads, $selected; } } } return\@ selected_reads;
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
 # do all the normal stuff
$self->SUPER::run(); # filter the results carefully to only allow strict matches
my $filtered_features = $self->filter_solexa($self->output); # Pair features together if they come from paired end reads
if ( $self->PAIREDEND ) { my $paired_features = $self->pair_features($filtered_features); # clear the output array to load in the modified features
$self->{'output'} = []; $self->output($paired_features); }
}
write_outputdescriptionprevnextTop
sub write_output {
  my ( $self, @output ) = @_;
  # Flag set to 1 = return a pipeline adaptor
my $outdb; my $fa; if ( $self->COMPRESSION ) { # Flag set to 1 = return a pipeline adaptor
# Remember you need to have the pipeline_tables in your OUT_DB if you like to use compression
$outdb = $self->get_dbadaptor($self->OUT_DB,1); $fa = $outdb->get_CompressedDnaAlignFeatureAdaptor; } else { # return a NORMAL adaptor NOT a pipeline adaptor ...
$outdb = $self->get_dbadaptor($self->OUT_DB); $fa = $outdb->get_DnaAlignFeatureAdaptor; } foreach my $f (@{$self->output}){ # calculate the hcoverage and use the evalue feild to store the
# depth of the features
$f->p_value(1) if $self->COMPRESSION; eval{ $fa->store($f); }; if ($@) { $self->throw("Unable to store DnaAlignFeature\n $@"); } } } ###########################################################
# containers
}
General documentation
CONTACTTop
Post general queries to ensembl-dev@ebi.ac.uk
APPENDIXTop