Bio::EnsEMBL::Compara::Production::GenomicAlignBlock
AlignmentProcessing
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Abstract base class of AlignmentChains and AlignmentNets
Description
Methods
INPUT_METHOD_LINK_TYPE | No description | Code |
MAX_GAP | No description | Code |
MIN_CHAIN_SCORE | No description | Code |
OUTPUT_METHOD_LINK_TYPE | No description | Code |
cleanse_output | No description | Code |
compara_cigars_from_daf_cigar | No description | Code |
compara_dba | No description | Code |
convert_output | No description | Code |
daf_cigar_from_compara_cigars | No description | Code |
delete_alignments | No description | Code |
fetch_input | No description | Code |
get_params | No description | Code |
new | No description | Code |
output | No description | Code |
output_MethodLinkSpeciesSet | No description | Code |
query_DnaFrag_hash | No description | Code |
run | Description | Code |
sort_chains_by_max_block_score | No description | Code |
split_feature | No description | Code |
target_DnaFrag_hash | No description | Code |
write_output | Description | Code |
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Function : cycles through all the runnables, calls run and pushes their output into the RunnableDBs output array Returntype: array ref Exceptions: none Example : |
Title : write_output Usage : $self->write_output() Function: Writes contents of $self->output into $self->db Returns : 1 Args : None |
Methods code
INPUT_METHOD_LINK_TYPE | description | prev | next | Top |
sub INPUT_METHOD_LINK_TYPE
{ my ($self, $type) = @_;
if (defined $type) {
$self->{_in_method_link_type} = $type;
}
return $self->{_in_method_link_type}; } |
sub MAX_GAP
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_max_gap} = $val;
}
return $self->{_max_gap}; } |
sub MIN_CHAIN_SCORE
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_min_chain_score} = $val ;
}
if (not exists $self->{_min_chain_score}) {
return undef;
} else {
return $self->{_min_chain_score};
}
}
1; } |
sub OUTPUT_METHOD_LINK_TYPE
{ my ($self, $type) = @_;
if (defined $type) {
$self->{_out_method_link_type} = $type;
}
return $self->{_out_method_link_type}; } |
sub cleanse_output
{ my ($self, $chains) = @_;
foreach my $chain (@{$chains}) {
foreach my $gab (@{$chain}) {
$gab->{'adaptor'} = undef;
$gab->{'dbID'} = undef;
$gab->{'method_link_species_set_id'} = undef;
$gab->method_link_species_set($self->output_MethodLinkSpeciesSet);
foreach my $ga (@{$gab->get_all_GenomicAligns}) {
$ga->{'adaptor'} = undef;
$ga->{'dbID'} = undef;
$ga->{'method_link_species_set_id'} = undef;
$ga->method_link_species_set($self->output_MethodLinkSpeciesSet);
}
}
}
}
} |
sub compara_cigars_from_daf_cigar
{ my ($self, $daf_cigar) = @_;
my ($q_cigar_line, $t_cigar_line, $align_length);
my @pieces = split(/(\d*[MDI])/, $daf_cigar);
my ($q_counter, $t_counter) = (0,0);
foreach my $piece ( @pieces ) {
next if ($piece !~ /^(\d*)([MDI])$/);
my $num = ($1 or 1);
my $type = $2;
if( $type eq "M" ) {
$q_counter += $num;
$t_counter += $num;
} elsif( $type eq "D" ) {
$q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M";
$q_counter = 0;
$q_cigar_line .= (($num == 1) ? "" : $num)."D";
$t_counter += $num;
} elsif( $type eq "I" ) {
$q_counter += $num;
$t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M";
$t_counter = 0;
$t_cigar_line .= (($num == 1) ? "" : $num)."D";
}
$align_length += $num;
}
$q_cigar_line .= (($q_counter == 1) ? "" : $q_counter)."M"
if $q_counter;
$t_cigar_line .= (($t_counter == 1) ? "" : $t_counter)."M"
if $t_counter;
return ($q_cigar_line, $t_cigar_line, $align_length); } |
sub compara_dba
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_compara_dba} = $val;
}
return $self->{_compara_dba}; } |
sub convert_output
{ my ($self, $chains_of_dafs) = @_;
my (@chains_of_blocks);
foreach my $chain_of_dafs (@$chains_of_dafs) {
my @chain_of_blocks;
foreach my $raw_daf (sort {$a->start <=> $b->start} @$chain_of_dafs) {
my @split_dafs;
if ($self->MAX_GAP) {
@split_dafs = $self->split_feature($raw_daf, $self->MAX_GAP);
} else {
@split_dafs = ($raw_daf);
}
foreach my $daf (@split_dafs) {
my ($q_cigar, $t_cigar, $al_len) =
$self->compara_cigars_from_daf_cigar($daf->cigar_string);
my $q_dnafrag = $self->query_DnaFrag_hash->{$daf->seqname};
my $t_dnafrag = $self->target_DnaFrag_hash->{$daf->hseqname};
my $out_mlss = $self->output_MethodLinkSpeciesSet;
my $q_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
(-dnafrag => $q_dnafrag,
-dnafrag_start => $daf->start,
-dnafrag_end => $daf->end,
-dnafrag_strand => $daf->strand,
-cigar_line => $q_cigar,
-level_id => $daf->level_id ? $daf->level_id : 1,
-method_link_species_set => $out_mlss);
my $t_genomic_align = Bio::EnsEMBL::Compara::GenomicAlign->new
(-dnafrag => $t_dnafrag,
-dnafrag_start => $daf->hstart,
-dnafrag_end => $daf->hend,
-dnafrag_strand => $daf->hstrand,
-cigar_line => $t_cigar,
-level_id => $daf->level_id ? $daf->level_id : 1,
-method_link_species_set => $out_mlss);
my $gen_al_block = Bio::EnsEMBL::Compara::GenomicAlignBlock->new
(-genomic_align_array => [$q_genomic_align, $t_genomic_align],
-score => $daf->score,
-length => $al_len,
-method_link_species_set => $out_mlss,
-group_id => $daf->group_id);
push @chain_of_blocks, $gen_al_block;
}
}
push @chains_of_blocks,\@ chain_of_blocks;
}
return\@ chains_of_blocks; } |
sub daf_cigar_from_compara_cigars
{ my ($self, $q_cigar, $t_cigar) = @_;
my (@q_pieces, @t_pieces);
foreach my $piece (split(/(\d*[MDGI])/, $q_cigar)) {
next if ($piece !~ /^(\d*)([MDGI])$/);
my $num = $1; $num = 1 if $num eq "";
my $type = $2; $type = 'D' if $type ne 'M';
if ($num > 0) {
push @q_pieces, { num => $num,
type => $type,
};
}
}
foreach my $piece (split(/(\d*[MDGI])/, $t_cigar)) {
next if ($piece !~ /^(\d*)([MDGI])$/);
my $num = $1; $num = 1 if $num eq "";
my $type = $2; $type = 'D' if $type ne 'M';
if ($num > 0) {
push @t_pieces, { num => $num,
type => $type,
};
}
}
my $daf_cigar = "";
while(@q_pieces and @t_pieces) {
my $q = shift @q_pieces;
my $t = shift @t_pieces;
if ($q->{num} == $t->{num}) {
if ($q->{type} eq 'M' and $t->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
} elsif ($q->{type} eq 'M' and $t->{type} eq 'D') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'I';
} elsif ($q->{type} eq 'D' and $t->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
} else {
warn("The following cigars have a simultaneous gap:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
} elsif ($q->{num} > $t->{num}) {
if ($q->{type} ne 'M') {
warn("The following cigars are strange:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
if ($t->{type} eq 'M') {
$daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'M';
} elsif ($t->{type} eq 'D') {
$daf_cigar .= ($t->{num} > 1 ? $t->{num} : "") . 'I';
}
unshift @q_pieces, {
type => 'M',
num => $q->{num} - $t->{num},
};
} else {
if ($t->{type} ne 'M') {
warn("The following cigars are strange:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
}
if ($q->{type} eq 'M') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'M';
} elsif ($q->{type} eq 'D') {
$daf_cigar .= ($q->{num} > 1 ? $q->{num} : "") . 'D';
}
unshift @t_pieces, {
type => 'M',
num => $t->{num} - $q->{num},
};
}
}
if (@q_pieces or @t_pieces) {
warn("Left with dangling pieces in the following cigars:\n" .
$q_cigar . "\n".
$t_cigar . "\n");
return undef;
}
my $last_type;
foreach my $piece (split(/(\d*[MDI])/, $daf_cigar)) {
next if not $piece;
my ($type) = ($piece =~ /\d*([MDI])/);
if (defined $last_type and
(($last_type eq 'I' and $type eq 'D') or
($last_type eq 'D' and $type eq 'I'))) {
warn("Adjacent Insert/Delete in the following cigars:\n" .
$q_cigar . "\n".
$t_cigar . "\n".
$daf_cigar . "\n");
return undef;
}
$last_type = $type;
}
return $daf_cigar; } |
sub delete_alignments
{ my ($self, $mlss, $qy_dnafrag, $tg_dnafrag) = @_;
my $dbc = $self->compara_dba->dbc;
my $sql = "SELECT ga1.genomic_align_block_id, ga1.genomic_align_id, ga2.genomic_align_id
FROM genomic_align ga1, genomic_align ga2
WHERE ga1.genomic_align_block_id=ga2.genomic_align_block_id
AND ga1.genomic_align_id != ga2.genomic_align_id
AND ga1.dnafrag_id = ?
AND ga1.method_link_species_set_id = ?";
if (defined $tg_dnafrag) {
$sql .= " AND ga2.dnafrag_id = ?";
}
my $sth = $dbc->prepare($sql);
if (defined $tg_dnafrag) {
$sth->execute( $qy_dnafrag->dbID, $mlss->dbID, $tg_dnafrag->dbID);
} else {
$sth->execute( $qy_dnafrag->dbID, $mlss->dbID);
}
my $nb_gabs = 0;
my @gabs;
while (my $aref = $sth->fetchrow_arrayref) {
my ($gab_id, $ga_id1, $ga_id2) = @$aref;
push @gabs, [$gab_id, $ga_id1, $ga_id2];
$nb_gabs++;
}
my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
my $sql_ga = "delete from genomic_align where genomic_align_id in ";
for (my $i=0; $i < scalar @gabs; $i=$i+20000) {
my (@gab_ids, @ga1_ids, @ga2_ids);
for (my $j = $i; ($j < scalar @gabs && $j < $i+20000); $j++) {
push @gab_ids, $gabs[$j][0];
push @ga1_ids, $gabs[$j][1];
push @ga2_ids, $gabs[$j][2];
}
my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
my $sql_ga_to_exec1 = $sql_ga . "(" . join(",", @ga1_ids) . ")";
my $sql_ga_to_exec2 = $sql_ga . "(" . join(",", @ga2_ids) . ")";
foreach my $sql ($sql_gab_to_exec,$sql_ga_to_exec1,$sql_ga_to_exec2) {
my $sth = $dbc->prepare($sql);
$sth->execute;
$sth->finish;
}
}
}
} |
sub get_params
{ my ($self, $param_string) = @_;
return unless($param_string);
my $params = eval($param_string);
return unless($params);
if(defined($params->{'input_method_link'})) {
$self->INPUT_METHOD_LINK_TYPE($params->{'input_method_link'});
}
if(defined($params->{'output_method_link'})) {
$self->OUTPUT_METHOD_LINK_TYPE($params->{'output_method_link'});
}
if (defined($params->{'max_gap'})) {
$self->MAX_GAP($params->{'max_gap'});
}
if (defined($params->{'min_chain_score'})) {
$self->MIN_CHAIN_SCORE($params->{'min_chain_score'});
} } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->query_DnaFrag_hash({});
$self->target_DnaFrag_hash({});
$self->get_params($self->analysis->parameters);
$self->get_params($self->input_id);
return $self; } |
sub output
{ my ($self, $output) = @_;
if(!$self->{output}){
$self->{output} = [];
}
if($output){
if(ref($output) ne 'ARRAY'){
throw('Must pass RunnableDB:output an array ref not a '.$output);
}
push(@{$self->{output}}, @$output);
}
return $self->{output};
}
} |
sub output_MethodLinkSpeciesSet
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_out_mlss} = $val;
}
return $self->{_out_mlss}; } |
sub query_DnaFrag_hash
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_q_dna_frags} = $val;
}
return $self->{_q_dna_frags}; } |
sub run
{ my ($self) = @_;
foreach my $runnable(@{$self->runnable}){
$runnable->run;
my $converted_output = $self->convert_output($runnable->output);
$self->output($converted_output);
rmdir($runnable->workdir) if (defined $runnable->workdir);
} } |
sub sort_chains_by_max_block_score
{ my ($self, $chains) = @_;
my @chain_hashes;
foreach my $chain (@$chains) {
my $chain_hash = { chain => $chain };
foreach my $block (@$chain) {
if (not exists $chain_hash->{qname}) {
$chain_hash->{qname} = $block->seqname;
$chain_hash->{tname} = $block->hseqname;
}
if (not exists $chain_hash->{score} or
$block->score > $chain_hash->{score}) {
$chain_hash->{score} = $block->score;
}
}
push @chain_hashes, $chain_hash;
}
my @sorted = map { $_->{chain}} sort {
$b->{score} <=> $a->{score}
or $a->{qname} cmp $b->{qname}
or $a->{tname} cmp $b->{tname}
} @chain_hashes;
return\@ sorted;
}
} |
sub split_feature
{ my ($self, $f, $max_gap) = @_;
my @split_dafs;
my $need_to_split = 0;
my @pieces = split(/(\d*[MDI])/, $f->cigar_string);
foreach my $piece ( @pieces ) {
next if ($piece !~ /^(\d*)([MDI])$/);
my $num = ($1 or 1);
my $type = $2;
if (($type eq "I" or $type eq "D") and $num >= $max_gap) {
$need_to_split = 1;
last;
}
}
if ($need_to_split) {
my (@new_feats);
foreach my $ug (sort {$a->start <=> $b->start} $f->ungapped_features) {
if (@new_feats) {
my ($dist, $hdist);
my $last_ug = $new_feats[-1]->[-1];
if ($ug->end < $last_ug->start) {
$dist = $last_ug->start - $ug->end - 1;
} else {
$dist = $ug->start - $last_ug->end - 1;
}
if ($ug->hend < $last_ug->hstart) {
$hdist = $last_ug->hstart - $ug->hend - 1;
} else {
$hdist = $ug->hstart - $last_ug->hend - 1;
}
if ($dist >= $max_gap or $hdist >= $max_gap) {
push @new_feats, [];
}
} else {
push @new_feats, [];
}
push @{$new_feats[-1]}, $ug;
}
foreach my $mini_list (@new_feats) {
push @split_dafs, Bio::EnsEMBL::DnaDnaAlignFeature->new(-features => $mini_list);
}
} else {
@split_dafs = ($f)
}
return @split_dafs;
}
} |
sub target_DnaFrag_hash
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_t_dna_frags} = $val;
}
return $self->{_t_dna_frags}; } |
sub write_output
{ my($self) = @_;
my $compara_dba = $self->compara_dba;
my @gen_al_groups;
foreach my $chain (@{$self->output}) {
my $group_id;
my $first_block = shift @$chain;
$compara_dba->get_GenomicAlignBlockAdaptor->store($first_block);
unless (defined($first_block->group_id)) {
$group_id = $first_block->dbID;
$compara_dba->get_GenomicAlignBlockAdaptor->store_group_id($first_block, $group_id);
}
foreach my $block (@$chain) {
if (defined $group_id) {
$block->group_id($group_id);
}
$compara_dba->get_GenomicAlignBlockAdaptor->store($block);
}
}
}
} |
General documentation
No general documentation available.