Bio::EnsEMBL::Analysis::RunnableDB
AlignmentNets
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::AlignmentNets
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::AlignmentFilter
Inherit
Synopsis
my $db = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Analysis::RunnableDB::AlignmentNets->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to DB
Description
Given an compara MethodLinkSpeciesSet identifer, and a reference genomic
slice identifer, fetches the GenomicAlignBlocks from the given compara
database, infers chains from the group identifiers, and then forms
an alignment net from the chains and writes the result
back to the database.
This module (at least for now) relies heavily on Jim Kent\'s Axt tools.
Methods
CHAIN_NET | No description | Code |
METHOD | No description | Code |
NET_FILTER | No description | Code |
NET_SYNTENIC | No description | Code |
QUERY_CORE_DB | No description | Code |
QUERY_SPECIES | No description | Code |
TARGET_SPECIES | No description | Code |
calculate_simple_high_net | No description | Code |
calculate_simple_low_net | No description | Code |
chains | No description | Code |
fetch_input | No description | Code |
make_runnable | No description | Code |
new | No description | Code |
query_seq_level_projection | No description | Code |
read_and_check_config | No description | Code |
run | No description | Code |
Methods description
None available.
Methods code
sub CHAIN_NET
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_chain_net_prog} = $val;
}
return $self->{_chain_net_prog}; } |
sub METHOD
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_primary_method} = $val;
}
return $self->{_primary_method}; } |
sub NET_FILTER
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_net_filter_prog} = $val;
}
return $self->{_net_filter_prog};
}
1; } |
sub NET_SYNTENIC
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_net_syntenic_prog} = $val;
}
return $self->{_net_syntenic_prog}; } |
sub QUERY_CORE_DB
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_core_db} = $val;
}
return $self->{_query_core_db}; } |
sub QUERY_SPECIES
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_species} = $val;
}
return $self->{_query_species}; } |
sub TARGET_SPECIES
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_target_species} = $val;
}
return $self->{_target_species}; } |
sub calculate_simple_high_net
{ my ($self, $chains) = @_;
my (@net_chains, @retained_blocks, %contigs_of_kept_blocks);
foreach my $c (@$chains) {
my @b = sort { $a->start <=> $b->start } @$c;
my $c_st = $b[0]->start;
my $c_en = $b[-1]->end;
my $keep_chain = 1;
foreach my $oc (@net_chains) {
my @ob = sort { $a->start <=> $b->start } @$oc;
my $oc_st = $ob[0]->start;
my $oc_en = $ob[-1]->end;
if ($oc_st <= $c_en and $oc_en >= $c_st) {
$keep_chain = 0;
last;
}
}
if ($keep_chain) {
my (%contigs_of_blocks, @split_blocks);
foreach my $bl (@b) {
my ($inside_seg, @overlap_segs);
foreach my $seg (@{$self->query_seq_level_projection}) {
if ($bl->start >= $seg->from_start and
$bl->end <= $seg->from_end) {
$inside_seg = $seg;
last;
} elsif ($seg->from_start <= $bl->end and
$seg->from_end >= $bl->start) {
push @overlap_segs, $seg;
} elsif ($seg->from_start > $bl->end) {
last;
}
}
if (defined $inside_seg) {
push @split_blocks, $bl;
$contigs_of_blocks{$bl} = $inside_seg;
} else {
my @cut_blocks;
foreach my $seg (@overlap_segs) {
my ($reg_start, $reg_end) = ($bl->start, $bl->end);
$reg_start = $seg->from_start if $seg->from_start > $reg_start;
$reg_end = $seg->from_end if $seg->from_end < $reg_end;
my $cut_block = $bl->restrict_between_positions($reg_start,
$reg_end,
"SEQ");
if (defined $cut_block) {
push @cut_blocks, $cut_block;
$contigs_of_blocks{$cut_block} = $seg;
}
}
push @split_blocks, @cut_blocks;
}
}
@b = @split_blocks;
my %new_block;
map { $new_block{$_} = 1 } @b;
my @new_list = (@retained_blocks, @b);
@new_list = sort { $a->start <=> $b->start } @new_list;
NEWBLOCK:
for(my $i = 0; $i < @new_list; $i++) {
my $bl = $new_list[$i];
if (exists($new_block{$bl})) {
my ($flank_left, $flank_right);
if ($i > 0 and
not exists($new_block{$new_list[$i-1]})) {
$flank_left = $new_list[$i-1];
}
if ($i < scalar(@new_list) - 1 and
not exists($new_block{$new_list[$i+1]})) {
$flank_right = $new_list[$i+1];
}
if (defined $flank_left and
$contigs_of_blocks{$bl}->to_Slice->seq_region_name eq
$contigs_of_kept_blocks{$flank_left}->to_Slice->seq_region_name
) {
$keep_chain = 0;
last NEWBLOCK;
}
if (defined $flank_right and
$contigs_of_blocks{$bl}->to_Slice->seq_region_name eq
$contigs_of_kept_blocks{$flank_right}->to_Slice->seq_region_name
) {
$keep_chain = 0;
last NEWBLOCK;
}
}
}
if ($keep_chain) {
foreach my $bid (keys %contigs_of_blocks) {
$contigs_of_kept_blocks{$bid} = $contigs_of_blocks{$bid};
}
push @net_chains,\@ b;
push @retained_blocks, @b;
@retained_blocks = sort { $a->start <=> $b->start } @retained_blocks;
}
}
}
return\@ net_chains;
}
} |
sub calculate_simple_low_net
{ my ($self, $chains) = @_;
my (@net_chains, @retained_blocks, %contigs_of_kept_blocks);
foreach my $c (@$chains) {
my @b = sort { $a->start <=> $b->start } @$c;
my $keep_chain = 1;
BLOCK: foreach my $b (@b) {
OTHER_BLOCK: foreach my $ob (@retained_blocks) {
if ($ob->start <= $b->end and $ob->end >= $b->start) {
$keep_chain = 0;
last BLOCK;
} elsif ($ob->start > $b->end) {
last OTHER_BLOCK;
}
}
}
if ($keep_chain) {
my (%contigs_of_blocks, @split_blocks);
foreach my $bl (@b) {
my ($inside_seg, @overlap_segs);
foreach my $seg (@{$self->query_seq_level_projection}) {
if ($bl->start >= $seg->from_start and
$bl->end <= $seg->from_end) {
$inside_seg = $seg;
last;
} elsif ($seg->from_start <= $bl->end and
$seg->from_end >= $bl->start) {
push @overlap_segs, $seg;
} elsif ($seg->from_start > $bl->end) {
last;
}
}
if (defined $inside_seg) {
push @split_blocks, $bl;
$contigs_of_blocks{$bl} = $inside_seg;
} else {
my @cut_blocks;
foreach my $seg (@overlap_segs) {
my ($reg_start, $reg_end) = ($bl->start, $bl->end);
$reg_start = $seg->from_start if $seg->from_start > $reg_start;
$reg_end = $seg->from_end if $seg->from_end < $reg_end;
my $cut_block = $bl->restrict_between_positions($reg_start,
$reg_end,
"SEQ");
if (defined $cut_block) {
push @cut_blocks, $cut_block;
$contigs_of_blocks{$cut_block} = $seg;
}
}
push @split_blocks, @cut_blocks;
}
}
@b = @split_blocks;
my %new_block;
map { $new_block{$_} = 1 } @b;
my @new_list = (@retained_blocks, @b);
@new_list = sort { $a->start <=> $b->start } @new_list;
NEWBLOCK:
for(my $i = 0; $i < @new_list; $i++) {
my $bl = $new_list[$i];
if (exists($new_block{$bl})) {
my ($flank_left, $flank_right);
if ($i > 0 and
not exists($new_block{$new_list[$i-1]})) {
$flank_left = $new_list[$i-1];
}
if ($i < scalar(@new_list) - 1 and
not exists($new_block{$new_list[$i+1]})) {
$flank_right = $new_list[$i+1];
}
if (defined $flank_left and
$contigs_of_blocks{$bl}->to_Slice->seq_region_name eq
$contigs_of_kept_blocks{$flank_left}->to_Slice->seq_region_name
) {
$keep_chain = 0;
last NEWBLOCK;
}
if (defined $flank_right and
$contigs_of_blocks{$bl}->to_Slice->seq_region_name eq
$contigs_of_kept_blocks{$flank_right}->to_Slice->seq_region_name
) {
$keep_chain = 0;
last NEWBLOCK;
}
}
}
if ($keep_chain) {
foreach my $bid (keys %contigs_of_blocks) {
$contigs_of_kept_blocks{$bid} = $contigs_of_blocks{$bid};
}
push @net_chains,\@ b;
push @retained_blocks, @b;
@retained_blocks = sort { $a->start <=> $b->start } @retained_blocks;
}
}
}
return\@ net_chains;
}
} |
sub chains
{ my ($self,$value) = @_;
if (defined $value) {
$self->{_chains} = $value;
}
return $self->{_chains}; } |
sub fetch_input
{ my( $self) = @_;
$self->throw("No input id") unless defined($self->input_id);
my ($seq_name, $seq_start, $seq_end);
if ($self->input_id =~ /^([^:]+):(\d+):(\d+)$/) {
($seq_name, $seq_start, $seq_end) = ($1, $2, $3);
} elsif ($self->input_id =~ /(\S+)/) {
$seq_name = $1;
} else {
throw("Input id could not be parsed: ", $self->input_id);
}
if ($self->QUERY_CORE_DB) {
my $q_dbh = Bio::EnsEMBL::DBSQL::DBAdaptor->new(%{$self->QUERY_CORE_DB});
my $sl = $q_dbh->get_SliceAdaptor->fetch_by_region('toplevel',
$seq_name);
my @segs = @{$sl->project('seqlevel')};
$self->query_seq_level_projection(\@segs);
}
my $compara_dbh = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(%{$self->COMPARA_DB});
my $query_species = $self->QUERY_SPECIES;
my $target_species = $self->TARGET_SPECIES;
my $q_gdb = $compara_dbh->get_GenomeDBAdaptor->fetch_by_name_assembly($query_species);
my $t_gdb = $compara_dbh->get_GenomeDBAdaptor->fetch_by_name_assembly($target_species);
throw("Could not get GenomeDB for '$query_species'") if not defined $q_gdb;
throw("Could not get GenomeDB for '$target_species'") if not defined $t_gdb;
my $mlss = $compara_dbh->get_MethodLinkSpeciesSetAdaptor
->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE,
[$q_gdb, $t_gdb]);
throw("No MethodLinkSpeciesSet for :\n" .
$self->INPUT_METHOD_LINK_TYPE . "\n" .
$query_species . "\n" .
$target_species)
if not $mlss;
my $out_mlss = $compara_dbh->get_MethodLinkSpeciesSetAdaptor
->fetch_by_method_link_type_GenomeDBs($self->OUTPUT_METHOD_LINK_TYPE,
[$q_gdb, $t_gdb]);
throw("No MethodLinkSpeciesSet for :\n" .
$self->OUTPUT_METHOD_LINK_TYPE . "\n" .
$query_species . "\n" .
$target_species)
if not $out_mlss;
$self->output_MethodLinkSpeciesSet($out_mlss);
my $ref_dnafrag = $compara_dbh->get_DnaFragAdaptor->fetch_by_GenomeDB_and_name($q_gdb,
$seq_name);
my $gen_al_blocks = $compara_dbh->get_GenomicAlignBlockAdaptor
->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $ref_dnafrag, $seq_start, $seq_end);
my (%features_by_group);
foreach my $block (@$gen_al_blocks) {
my ($qy_al) = $block->reference_genomic_align;
my ($tg_al) = @{$block->get_all_non_reference_genomic_aligns};
if (not exists($self->query_DnaFrag_hash->{$qy_al->dnafrag->name})) {
$self->query_DnaFrag_hash->{$qy_al->dnafrag->name} = $qy_al->dnafrag;
}
if (not exists($self->target_DnaFrag_hash->{$tg_al->dnafrag->name})) {
$self->target_DnaFrag_hash->{$tg_al->dnafrag->name} = $tg_al->dnafrag;
}
my $daf_cigar = $self->daf_cigar_from_compara_cigars($qy_al->cigar_line,
$tg_al->cigar_line);
my $daf = Bio::EnsEMBL::DnaDnaAlignFeature->new
(-seqname => $qy_al->dnafrag->name,
-start => $qy_al->dnafrag_start,
-end => $qy_al->dnafrag_end,
-strand => $qy_al->dnafrag_strand,
-hseqname => $tg_al->dnafrag->name,
-hstart => $tg_al->dnafrag_start,
-hend => $tg_al->dnafrag_end,
-hstrand => $tg_al->dnafrag_strand,
-score => $block->score,
-cigar_string => $daf_cigar,
-group_id => $block->group_id);
push @{$features_by_group{$group_id}}, $daf;
}
$self->chains($self->sort_chains_by_max_block_score([values %features_by_group]));
if ($self->METHOD eq 'STANDARD' or $self->METHOD eq 'SYNTENIC') {
my $run = $self->make_runnable($self->METHOD eq 'SYNTENIC');
$self->runnable($run);
}
}
} |
sub make_runnable
{ my ($self, $syntenic) = @_;
my (%query_lengths, %target_lengths);
foreach my $nm (keys %{$self->query_DnaFrag_hash}) {
$query_lengths{$nm} = $self->query_DnaFrag_hash->{$nm}->length;
}
foreach my $nm (keys %{$self->target_DnaFrag_hash}) {
$target_lengths{$nm} = $self->target_DnaFrag_hash->{$nm}->length;
}
my %parameters = (-analysis => $self->analysis,
-chains => $self->chains,
-query_lengths =>\% query_lengths,
-target_lengths =>\% target_lengths,
-min_chain_score => $self->MIN_CHAIN_SCORE,
-filter_non_syntenic => $syntenic);
$parameters{-chainNet} = $self->CHAIN_NET
? $self->CHAIN_NET
: $BIN_DIR . "/" . "chainNet";
$parameters{-netSyntenic} = $self->NET_SYNTENIC
? $self->NET_SYNTENIC
: $BIN_DIR . "/" . "netSyntenic";
$parameters{-netFilter} = $self->NET_FILTER
? $self->NET_FILTER
: $BIN_DIR . "/" . "netFilter";
my $run = Bio::EnsEMBL::Analysis::Runnable::AlignmentNets->new(%parameters);
return $run;
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($NET_CONFIG_BY_LOGIC);
return $self;
}
} |
sub query_seq_level_projection
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_proj_segs} = $val;
}
return $self->{_query_proj_segs};
}
} |
sub read_and_check_config
{ my ($self, $hash) = @_;
$self->SUPER::read_and_check_config($hash);
my $logic = $self->analysis->logic_name;
foreach my $var (qw(INPUT_METHOD_LINK_TYPE
OUTPUT_METHOD_LINK_TYPE
QUERY_SPECIES
TARGET_SPECIES
COMPARA_DB
METHOD)) {
throw("You must define $var in config for logic '$logic'" .
" or in the DEFAULT entry")
if not $self->$var;
}
my %allowable_methods =
(
STANDARD => 1,
SYNTENIC => 1,
SIMPLE_HIGH => 1,
SIMPLE_MEDIUM => 1,
SIMPLE_LOW => 1,
);
if ($self->METHOD and
not $allowable_methods{$self->METHOD}) {
throw("You must set METHOD to one of the reserved names\n" .
"See the .example file for these names");
} } |
sub run
{ my ($self) = @_;
if (@{$self->runnable}) {
$self->SUPER::run;
} else {
my $filtered_chains;
if ($self->METHOD eq 'SIMPLE_HIGH') {
$filtered_chains = $self->calculate_simple_high_net($self->chains);
} elsif ($self->METHOD eq 'SIMPLE_LOW') {
$filtered_chains = $self->calculate_simple_low_net($self->chains);
}
if (defined $filtered_chains) {
my $converted_out = $self->convert_output($filtered_chains);
$self->output($converted_out);
}
}
}
} |
General documentation
No general documentation available.