Bio::EnsEMBL::Analysis::RunnableDB
ExonerateSolexa
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateSolexa
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::ExonerateSolexa
Inherit
Synopsis
my $runnableDB = Bio::EnsEMBL::Analysis::RunnableDB::ExonerateSolexa->new(
-db => $refdb,
-analysis => $analysis_obj,
);
$runnableDB->fetch_input();
$runnableDB->run();
$runnableDB->write_output(); #writes to DB
Description
Extends Bio::EnsEMBL::Analysis::RunnableDB::ExonerateAlignFeature to allow
use of compressed dna align features, useful when aligning millions of short
Solexa reads
Methods
BIOTYPE | No description | Code |
COMPRESSION | No description | Code |
INTRON_MODELS | No description | Code |
INTRON_OVERLAP | No description | Code |
MISSMATCH | No description | Code |
OUT_DB | No description | Code |
PAIR | No description | Code |
PAIREDEND | No description | Code |
PAIREDGAP | No description | Code |
PROJECT | No description | Code |
TRANSDB | No description | Code |
filter_solexa | No description | Code |
merge_pair | No description | Code |
new | No description | Code |
pair_features | No description | Code |
run | No description | Code |
write_output | Description | Code |
Methods description
Arg [1] : array ref of Bio::EnsEMBL::DnaDnaAlignFeature Function : Overrides the write_output method from the superclass to allow use of compressed DnaAlignFeatures Returntype: 1 Exceptions: Throws if the feature cannot be stored |
Methods code
sub BIOTYPE
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_BIOTYPE'} = $value;
}
if (exists($self->{'_CONFIG_BIOTYPE'})) {
return $self->{'_CONFIG_BIOTYPE'};
} else {
return undef;
} } |
sub COMPRESSION
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_COMPRESSION'} = $value;
}
if (exists($self->{'_CONFIG_COMPRESSION'})) {
return $self->{'_CONFIG_COMPRESSION'};
} else {
return undef;
} } |
sub INTRON_MODELS
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_INTRON_MODELS'} = $value;
}
if (exists($self->{'_CONFIG_INTRON_MODELS'})) {
return $self->{'_CONFIG_INTRON_MODELS'};
} else {
return undef;
} } |
sub INTRON_OVERLAP
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_INTRON_OVERLAP'} = $value;
}
if (exists($self->{'_CONFIG_INTRON_OVERLAP'})) {
return $self->{'_CONFIG_INTRON_OVERLAP'};
} else {
return undef;
} } |
sub MISSMATCH
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_MISSMATCH'} = $value;
}
if (exists($self->{'_CONFIG_MISSMATCH'})) {
return $self->{'_CONFIG_MISSMATCH'};
} else {
return undef;
}
}
1; } |
sub OUT_DB
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_OUT_DB'} = $value;
}
if (exists($self->{'_CONFIG_OUT_DB'})) {
return $self->{'_CONFIG_OUT_DB'};
} else {
return undef;
} } |
sub PAIR
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_PAIR'} = $value;
}
if (exists($self->{'_CONFIG_PAIR'})) {
return $self->{'_CONFIG_PAIR'};
} else {
return undef;
} } |
sub PAIREDEND
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_PAIREDEND'} = $value;
}
if (exists($self->{'_CONFIG_PAIREDEND'})) {
return $self->{'_CONFIG_PAIREDEND'};
} else {
return undef;
} } |
sub PAIREDGAP
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_PAIREDGAP'} = $value;
}
if (exists($self->{'_CONFIG_PAIREDGAP'})) {
return $self->{'_CONFIG_PAIREDGAP'};
} else {
return undef;
} } |
sub PROJECT
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_PROJECT'} = $value;
}
if (exists($self->{'_CONFIG_PROJECT'})) {
return $self->{'_CONFIG_PROJECT'};
} else {
return undef;
} } |
sub TRANSDB
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_TRANSDB'} = $value;
}
if (exists($self->{'_CONFIG_TRANSDB'})) {
return $self->{'_CONFIG_TRANSDB'};
} else {
return undef;
} } |
sub filter_solexa
{ my ($self,$features_ref) = @_;
my @features = @$features_ref;
my @filtered_features;
foreach my $feat ( @features ) {
if ( $feat->cigar_string =~ /[ID]/ ) {
next
}
if ( $self->MISSMATCH ) {
my $aligned_length = abs($feat->hend - $feat->hstart) +1;
my $matches = $aligned_length * $feat->percent_id / 100; my $missmatches = ( $aligned_length - $matches) / $aligned_length * 100; next if $missmatches > $self->MISSMATCH;
}
push @filtered_features, $feat;
}
return\@ filtered_features; } |
sub merge_pair
{ my ( $self, $read_pair) = @_;
my @ugfs;
my @features;
if ( $self->PAIR ) {
my $score = $read_pair->[0]->score + $read_pair->[1]->score;
my $pid = ( $read_pair->[0]->percent_id + $read_pair->[1]->percent_id ) /2;
if ( $read_pair->[1]->end <= $read_pair->[0]->start ) {
my $tmp = $read_pair->[0];
$read_pair->[0] = $read_pair->[1];
$read_pair->[1] = $tmp;
}
for ( my $i = 0 ; $i <=1 ; $i++ ) {
my $read = $read_pair->[$i];
if ( $read->hseqname =~ /(\S+):([a,b])$/ ) {
$read->hseqname($1);
}
if ( $read->hseqname =~ /(\S+):([A,B])$/ ) {
$read->hseqname($1 .":3p");
}
foreach my $ugf ( $read->ungapped_features ) {
my $hstart = $ugf->hstart ;
my $hend = $ugf->hend ;
if ( $i == 1 ) {
if ( $ugf->hstrand == -1 ) {
$ugf->hstrand(1);
}
} else {
$ugf->hstrand(1) if $ugf->hstrand == -1;
}
$ugf->score($score);
$ugf->percent_id($pid);
push @ugfs, $ugf;
}
}
my $feat = new Bio::EnsEMBL::DnaDnaAlignFeature(-features =>\@ ugfs);
$feat->analysis($self->analysis);
$feat->hstart($read_pair->[0]->hstart);
$feat->hend($read_pair->[0]->hend+$read_pair->[1]->hend);
push @features,$feat;
} else {
$read_pair->[0]->hseqname($read_pair->[0]->hseqname . ":aa");
$read_pair->[1]->hseqname($read_pair->[1]->hseqname . ":bb");
push @features,($read_pair->[0],$read_pair->[1]);
}
return\@ features; } |
sub new
{ my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($EXONERATE_SOLEXA_CONFIG_BY_LOGIC);
return $self; } |
sub pair_features
{ my ($self,$features_ref) = @_;
my @features = @$features_ref;
my $paired_features;
my @selected_reads;
foreach my $feat ( @features ) {
if ( $feat->hseqname =~ /(\S+):([a,b,A,B])$/ ) {
my $suffix = lc($2);
push @{$paired_features->{$1}->{$suffix}},$feat;
} else {
$self->throw("Read name not recognised " . $feat->hseqname . "\n");
}
}
foreach my $pair ( keys %$paired_features ) {
my ( @as, @bs ) ;
@as = @{$paired_features->{$pair}->{'a'}} if $paired_features->{$pair}->{'a'};
@bs = @{$paired_features->{$pair}->{'b'}} if $paired_features->{$pair}->{'b'};
my @potential_pairs;
foreach my $afeat ( @as ) {
READ: foreach my $bfeat ( @bs ) {
if ( $afeat->seqname eq $bfeat->seqname ) {
if ( ( $afeat->start > $bfeat->end &&
$afeat->start - $bfeat->end <= $self->PAIREDGAP ) or
( $bfeat->start > $afeat->end &&
$bfeat->start - $afeat->end <= $self->PAIREDGAP ) ) {
if ( $afeat->strand == 1 && $bfeat->strand == 1
&& $afeat->hstrand != $bfeat->hstrand ) {
next READ unless
(
($afeat->hstrand == 1 && $afeat->end < $bfeat->start) or
($afeat->hstrand == -1 && $bfeat->end < $afeat->start)
);
}
my $combined_score = $afeat->score + $bfeat->score;
if ( $potential_pairs[$combined_score] ) {
my $new_length;
my $old_length;
my $old_afeat = $potential_pairs[$combined_score]->[0];
my $old_bfeat = $potential_pairs[$combined_score]->[1];
if ( $afeat->start > $bfeat->end ) {
$new_length = $afeat->start - $bfeat->end;
} else {
$new_length = $bfeat->start - $afeat->end;
}
if ( $old_afeat->start > $old_bfeat->end ) {
$old_length = $old_afeat->start - $old_bfeat->end;
} else {
$old_length = $old_bfeat->start - $old_afeat->end;
}
if ( $old_length > $new_length ) {
$potential_pairs[$combined_score] = [($afeat,$bfeat)] ;
}
} else {
$potential_pairs[$combined_score] = [($afeat,$bfeat)] ;
}
}
}
}
}
if ( scalar(@potential_pairs) >= 1 ){
push @selected_reads, @{$self->merge_pair(pop @potential_pairs)} if scalar(@potential_pairs) >= 1;
} else {
if ( scalar(@as) >= 1 ) {
@as = sort { $a->score <=> $b->score } @as ;
my $selected = pop @as;
if ( $selected->hseqname =~ /(\S+):([A])$/ ) {
$selected->hseqname($1 .":a3p");
}
push @selected_reads, $selected;
}
if ( scalar(@bs) >= 1 ) {
@bs = sort { $a->score <=> $b->score } @bs ;
my $selected = pop @bs;
if ( $selected->hseqname =~ /(\S+):([B])$/ ) {
$selected->hseqname($1 .":b3p");
}
push @selected_reads, $selected;
}
}
}
return\@ selected_reads; } |
sub run
{ my ($self) = @_;
$self->SUPER::run();
my $filtered_features = $self->filter_solexa($self->output);
if ( $self->PAIREDEND ) {
my $paired_features = $self->pair_features($filtered_features);
$self->{'output'} = [];
$self->output($paired_features);
} } |
sub write_output
{ my ( $self, @output ) = @_;
my $outdb;
my $fa;
if ( $self->COMPRESSION ) {
$outdb = $self->get_dbadaptor($self->OUT_DB,1);
$fa = $outdb->get_CompressedDnaAlignFeatureAdaptor;
} else {
$outdb = $self->get_dbadaptor($self->OUT_DB);
$fa = $outdb->get_DnaAlignFeatureAdaptor;
}
foreach my $f (@{$self->output}){
$f->p_value(1) if $self->COMPRESSION;
eval{
$fa->store($f);
};
if ($@) {
$self->throw("Unable to store DnaAlignFeature\n $@");
}
}
}
} |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk