Bio::EnsEMBL::Analysis::RunnableDB
AlignmentFilter
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::AlignmentFilter
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Abstract base class of AlignmentChains and AlignmentNets
Description
Methods
COMPARA_DB | No description | Code |
INPUT_METHOD_LINK_TYPE | No description | Code |
MAX_GAP | No description | Code |
MIN_CHAIN_SCORE | No description | Code |
OUTPUT_METHOD_LINK_TYPE | No description | Code |
compara_cigars_from_daf_cigar | No description | Code |
convert_output | No description | Code |
daf_cigar_from_compara_cigars | No description | Code |
new | No description | Code |
output_MethodLinkSpeciesSet | No description | Code |
query_DnaFrag_hash | No description | Code |
run | Description | Code |
sort_chains_by_max_block_score | No description | Code |
split_feature | No description | Code |
target_DnaFrag_hash | No description | Code |
write_output | Description | Code |
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Function : cycles through all the runnables, calls run and pushes their output into the RunnableDBs output array Returntype: array ref Exceptions: none Example : |
Title : write_output Usage : $self->write_output() Function: Writes contents of $self->output into $self->db Returns : 1 Args : None |
Methods code
sub COMPARA_DB
{ my ($self, $db) = @_;
if (defined $db) {
$self->{_compara_db} = $db;
}
return $self->{_compara_db}; } |
sub INPUT_METHOD_LINK_TYPE
{ my ($self, $type) = @_;
if (defined $type) {
$self->{_in_method_link_type} = $type;
}
return $self->{_in_method_link_type}; } |
sub MAX_GAP
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_max_gap} = $val;
}
return $self->{_max_gap}; } |
sub MIN_CHAIN_SCORE
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_min_chain_score} = $val ;
}
if (not exists $self->{_min_chain_score}) {
return undef;
} else {
return $self->{_min_chain_score};
}
}
1; } |
sub OUTPUT_METHOD_LINK_TYPE
{ my ($self, $type) = @_;
if (defined $type) {
$self->{_out_method_link_type} = $type;
}
return $self->{_out_method_link_type}; } |
sub compara_cigars_from_daf_cigar
{ my ($self, $daf_cigar) = @_;
my ($q_cigar_line, $t_cigar_line, $align_length);
my @pieces = split(/(\d*[MDI])/, $daf_cigar);
my ($q_counter, $t_counter) = (0,0);
foreach my $piece ( @pieces ) {
next if ($piece !~ /^(\d*)([MDI])$/);
my $num = ($1 or 1);
my $type = $2;
if( $type eq "M" ) {
$q_counter += $num;
$t_counter += $num;
} elsif( $type eq "D" ) {
$q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M";
$q_counter = 0;
$q_cigar_line .= (($num == 1) ? "" : $num)."D";
$t_counter += $num;
} elsif( $type eq "I" ) {
$q_counter += $num;
$t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M";
$t_counter = 0;
$t_cigar_line .= (($num == 1) ? "" : $num)."D";
}
$align_length += $num;
}
$q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M"
if $q_counter;
$t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M"
if $t_counter;
return ($q_cigar_line, $t_cigar_line, $align_length); } |
sub convert_output
{ my ($self, $chains_of_dafs) = @_;
my (@chains_of_blocks);
foreach my $chain_of_dafs (@$chains_of_dafs) {
my @chain_of_blocks;
foreach my $raw_daf (sort {$a->start <=> $b->start} @$chain_of_dafs) {
my @split_dafs;
if ($self->MAX_GAP) {
@split_dafs = $self->split_feature($raw_daf, $self->MAX_GAP);
} else {
@split_dafs = ($raw_daf);
}
my ($group, @gas);
if (defined $self->INPUT_GROUP_TYPE) {
$group = Bio::EnsEMBL::Compara::GenomicAlignGroup->new
(-type => $self->INPUT_GROUP_TYPE);
}
foreach my $daf (@split_dafs) {
my ($q_cigar, $t_cigar, $al_len) =
$self->compara_cigars_from_daf_cigar($daf->cigar_string);
my $q_dnafrag = $self->query_DnaFrag_hash->{$daf->seqname};
my $t_dnafrag = $self->target_DnaFrag_hash->{$daf->hseqname};
my $out_mlss = $self->output_MethodLinkSpeciesSet;
my $q_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
(-dnafrag => $q_dnafrag,
-dnafrag_start => $daf->start,
-dnafrag_end => $daf->end,
-dnafrag_strand => $daf->strand,
-cigar_line => $q_cigar,
-level_id => $daf->level_id ? $daf->level_id : 1,
-method_link_species_set => $out_mlss);
my $t_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
(-dnafrag => $t_dnafrag,
-dnafrag_start => $daf->hstart,
-dnafrag_end => $daf->hend,
-dnafrag_strand => $daf->hstrand,
-cigar_line => $t_cigar,
-level_id => $daf->level_id ? $daf->level_id : 1,
-method_link_species_set => $out_mlss);
if (defined $group) {
unless (defined $group->dbID) {
$group->dbID($daf->group_id);
}
push @gas, ($q_genomic_align, $t_genomic_align);
}
my $gen_al_block = Bio::EnsEMBL::Compara::GenomicAlignBlock->new
(-genomic_align_array => [$q_genomic_align, $t_genomic_align],
-score => $daf->score,
-length => $al_len,
-method_link_species_set => $out_mlss);
push @chain_of_blocks, $gen_al_block;
}
if (defined $group) {
$group->genomic_align_array(\@gas);
}
}
push @chains_of_blocks,\@ chain_of_blocks;
}
return\@ chains_of_blocks;
}
} |
sub daf_cigar_from_compara_cigars
{ my ($self, $q_cigar, $t_cigar) = @_;
my (@q_pieces, @t_pieces);
foreach my $piece (split(/(\d*[MDGI])/, $q_cigar)) {
next if ($piece !~ /^(\d*)([MDGI])$/);
my $num = $1; $num = 1 if $num eq "";
my $type = $2; $type = 'D' if $type ne 'M';
if ($num > 0) {
push @q_pieces, { num => $num,
type => $type,
};
}
}
foreach my $piece (split(/(\d*[MDGI])/, $t_cigar)) {
next if ($piece !~ /^(\d*)([MDGI])$/);
my $num = $1; $num = 1 if $num eq "";
my $type = $2; $type = 'D' if $type ne 'M';
if ($num > 0) {
push @t_pieces, { num => $num,
type => $type,
};
}
}
my $daf_cigar = "";
while(@q_pieces and @t_pieces) {
my $q = shift @q_pieces;
my $t = shift @t_pieces;
if ($q->{num} == $t->{num}) {
if ($q->{type} eq 'M' and $t->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
} elsif ($q->{type} eq 'M' and $t->{type} eq 'D') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'I';
} elsif ($q->{type} eq 'D' and $t->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
} else {
warn("The following cigars have a simultaneous gap:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
} elsif ($q->{num} > $t->{num}) {
if ($q->{type} ne 'M') {
warn("The following cigars are strange:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
if ($t->{type} eq 'M') {
$daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'M';
} elsif ($t->{type} eq 'D') {
$daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'I';
}
unshift @q_pieces, {
type => 'M',
num => $q->{num} - $t->{num},
};
} else {
if ($t->{type} ne 'M') {
warn("The following cigars are strange:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
if ($q->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
} elsif ($q->{type} eq 'D') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
}
unshift @t_pieces, {
type => 'M',
num => $t->{num} - $q->{num},
};
}
}
if (@q_pieces or @t_pieces) {
warn("Left with dangling pieces in the following cigars:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
return undef;
}
my $last_type;
foreach my $piece (split(/(\d*[MDI])/, $daf_cigar)) {
next if not $piece;
my ($type) = ($piece =~ /\d*([MDI])/);
if (defined $last_type and
(($last_type eq 'I' and $type eq 'D') or
($last_type eq 'D' and $type eq 'I'))) {
warn("Adjacent Insert/Delete in the following cigars:\n" .
$q_cigar . "\n".
$t_cigar . "\n".
$daf_cigar . "\n");
return undef;
}
$last_type = $type;
}
return $daf_cigar; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->query_DnaFrag_hash({});
$self->target_DnaFrag_hash({});
return $self; } |
sub output_MethodLinkSpeciesSet
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_out_mlss} = $val;
}
return $self->{_out_mlss};
}
} |
sub query_DnaFrag_hash
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_q_dna_frags} = $val;
}
return $self->{_q_dna_frags}; } |
sub run
{ my ($self) = @_;
foreach my $runnable(@{$self->runnable}){
$runnable->run;
my $converted_output = $self->convert_output($runnable->output);
$self->output($converted_output);
rmdir($runnable->workdir) if (defined $runnable->workdir);
} } |
sub sort_chains_by_max_block_score
{ my ($self, $chains) = @_;
my @chain_hashes;
foreach my $chain (@$chains) {
my $chain_hash = { chain => $chain };
foreach my $block (@$chain) {
if (not exists $chain_hash->{qname}) {
$chain_hash->{qname} = $block->seqname;
$chain_hash->{tname} = $block->hseqname;
}
if (not exists $chain_hash->{score} or
$block->score > $chain_hash->{score}) {
$chain_hash->{score} = $block->score;
}
}
push @chain_hashes, $chain_hash;
}
my @sorted = map { $_->{chain}} sort {
$b->{score} <=> $a->{score}
or $a->{qname} cmp $b->{qname}
or $a->{tname} cmp $b->{tname}
} @chain_hashes;
return\@ sorted;
}
} |
sub split_feature
{ my ($self, $f, $max_gap) = @_;
my @split_dafs;
my $need_to_split = 0;
my @pieces = split(/(\d*[MDI])/, $f->cigar_string);
foreach my $piece ( @pieces ) {
next if ($piece !~ /^(\d*)([MDI])$/);
my $num = ($1 or 1);
my $type = $2;
if (($type eq "I" or $type eq "D") and $num >= $max_gap) {
$need_to_split = 1;
last;
}
}
if ($need_to_split) {
my (@new_feats);
foreach my $ug (sort {$a->start <=> $b->start} $f->ungapped_features) {
if (@new_feats) {
my ($dist, $hdist);
my $last_ug = $new_feats[-1]->[-1];
if ($ug->end < $last_ug->start) {
$dist = $last_ug->start - $ug->end - 1;
} else {
$dist = $ug->start - $last_ug->end - 1;
}
if ($ug->hend < $last_ug->hstart) {
$hdist = $last_ug->hstart - $ug->hend - 1;
} else {
$hdist = $ug->hstart - $last_ug->hend - 1;
}
if ($dist >= $max_gap or $hdist >= $max_gap) {
push @new_feats, [];
}
} else {
push @new_feats, [];
}
push @{$new_feats[-1]}, $ug;
}
foreach my $mini_list (@new_feats) {
push @split_dafs, Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => $mini_list);
}
} else {
@split_dafs = ($f)
}
return @split_dafs;
}
} |
sub target_DnaFrag_hash
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_t_dna_frags} = $val;
}
return $self->{_t_dna_frags}; } |
sub write_output
{ my($self) = @_;
my $compara_dbh;
if (defined $self->COMPARA_DB) {
$compara_dbh = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(%{$self->COMPARA_DB});
} else {
$compara_dbh = $self->{'comparaDBA'};
}
my @gen_al_groups;
foreach my $chain (@{$self->output}) {
my $group_id;
my $first_block = shift @$chain;
$compara_dbh->get_GenomicAlignBlockAdaptor->store($first_block);
unless (defined($first_block->group_id)) {
$group_id = $first_block->dbID;
$compara_dbh->get_GenomicAlignBlockAdaptor->store_group_id($first_block, $group_id);
}
foreach my $block (@$chain) {
$block->group_id($group_id);
$compara_dbh->get_GenomicAlignBlockAdaptor->store($block);
}
}
}
} |
General documentation
No general documentation available.