Bio::EnsEMBL::Compara::Production::GenomicAlignBlock
SimpleNets
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlign::AlignmentSimple
Package variables
Privates (from "my" definitions)
@ALLOWABLE_METHODS = qw(ContigAwareNet)
Included modules
Inherit
Synopsis
my $db = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Compara::Production::GenomicAlign::SimpleNets->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to DB
Description
Given an compara MethodLinkSpeciesSet identifer, and a reference genomic
slice identifer, fetches the GenomicAlignBlocks from the given compara
database, infers chains from the group identifiers, and then forms
an alignment net from the chains and writes the result
back to the database.
This module implements some simple net-inspired functionality directly
in Perl, and does not rely on Jim Kent's original Axt tools
Methods
ContigAwareNet | No description | Code |
NET_METHOD | No description | Code |
QUERY_DNAFRAG_ID | No description | Code |
SUPPORTED_METHOD | No description | Code |
TARGET_GENOMEDB_ID | No description | Code |
fetch_input | Description | Code |
get_params | No description | Code |
input_chains | No description | Code |
query_seq_level_projection | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: Returns : nothing Args : none |
Methods code
ContigAwareNet | description | prev | next | Top |
sub ContigAwareNet
{ my ($self) = @_;
my $chains = $self->input_chains;
my (@net_chains, @retained_blocks, %contigs_of_kept_blocks);
foreach my $c (@$chains) {
my @blocks = @$c;
my $keep_chain = 1;
BLOCK: foreach my $block (@blocks) {
my $qga = $block->reference_genomic_align;
OTHER_BLOCK: foreach my $oblock (@retained_blocks) {
my $oqga = $oblock->reference_genomic_align;
if ($oqga->dnafrag_start <= $qga->dnafrag_end and
$oqga->dnafrag_end >= $qga->dnafrag_start) {
$keep_chain = 0;
last BLOCK;
} elsif ($oqga->dnafrag_start > $qga->dnafrag_end) {
last OTHER_BLOCK;
}
}
}
if ($keep_chain) {
my (%contigs_of_blocks, @split_blocks);
foreach my $block (@blocks) {
my ($inside_seg, @overlap_segs);
my $qga = $block->reference_genomic_align;
foreach my $seg (@{$self->query_seq_level_projection}) {
if ($qga->dnafrag_start >= $seg->from_start and
$qga->dnafrag_end <= $seg->from_end) {
$inside_seg = $seg;
last;
} elsif ($seg->from_start <= $qga->dnafrag_end and
$seg->from_end >= $qga->dnafrag_start) {
push @overlap_segs, $seg;
} elsif ($seg->from_start > $qga->dnafrag_end) {
last;
}
}
if (defined $inside_seg) {
push @split_blocks, $block;
$contigs_of_blocks{$block} = $inside_seg;
} else {
my @cut_blocks;
foreach my $seg (@overlap_segs) {
my ($reg_start, $reg_end) = ($qga->dnafrag_start, $qga->dnafrag_end);
$reg_start = $seg->from_start if $seg->from_start > $reg_start;
$reg_end = $seg->from_end if $seg->from_end < $reg_end;
my $cut_block = $block->restrict_between_reference_positions($reg_start,
$reg_end);
$cut_block->score($block->score);
if (defined $cut_block) {
push @cut_blocks, $cut_block;
$contigs_of_blocks{$cut_block} = $seg;
}
}
push @split_blocks, @cut_blocks;
}
}
@blocks = @split_blocks;
my @diff_contig_blocks;
my %kept_contigs = reverse %contigs_of_kept_blocks;
foreach my $block (@blocks) {
if (not exists $kept_contigs{$contigs_of_blocks{$block}}) {
push @diff_contig_blocks, $block;
}
}
my $kept_len = 0;
my $total_len = 0;
map {
$kept_len += $_->reference_genomic_align->dnafrag_end - $_->reference_genomic_align->dnafrag_start + 1;
} @diff_contig_blocks;
map {
$total_len += $_->reference_genomic_align->dnafrag_end - $_->reference_genomic_align->dnafrag_start + 1;
} @blocks;
if ($kept_len / $total_len > 0.5) { foreach my $bid (keys %contigs_of_blocks) { $contigs_of_kept_blocks{$bid} = $contigs_of_blocks{$bid}; }
push @net_chains,\@ diff_contig_blocks;
push @retained_blocks, @diff_contig_blocks;
@retained_blocks = sort {
$a->reference_genomic_align->dnafrag_start <=> $b->reference_genomic_align->dnafrag_start;
} @retained_blocks;
}
}
}
foreach my $ch (@net_chains) {
foreach my $bl (@{$ch}) {
foreach my $al (@{$bl->get_all_GenomicAligns}) {
$al->dnafrag;
}
}
}
return\@ net_chains;
}
} |
sub NET_METHOD
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_net_type} = $val;
}
return $self->{_net_type}; } |
sub QUERY_DNAFRAG_ID
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_query_dnafrag_id'} = $value;
}
return $self->{'_query_dnafrag_id'}; } |
sub SUPPORTED_METHOD
{ my ($class, $method ) = @_;
my $allowed = 0;
foreach my $meth (@ALLOWABLE_METHODS) {
if ($meth eq $method) {
$allowed = 1;
last;
}
}
return $allowed; } |
sub TARGET_GENOMEDB_ID
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_target_genomedb_id'} = $value;
}
return $self->{'_target_genomedb_id'};
}
1; } |
sub fetch_input
{ my( $self) = @_;
$self->SUPER::fetch_input;
$self->compara_dba->dbc->disconnect_when_inactive(0);
my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
my $dnafa = $self->compara_dba->get_DnaFragAdaptor;
my $gdba = $self->compara_dba->get_GenomeDBAdaptor;
my $gaba = $self->compara_dba->get_GenomicAlignBlockAdaptor;
$self->get_params($self->analysis->parameters);
$self->get_params($self->input_id);
my $qy_dnafrag;
if ($self->QUERY_DNAFRAG_ID) {
$qy_dnafrag = $dnafa->fetch_by_dbID($self->QUERY_DNAFRAG_ID);
my @seq_level_bits = @{$qy_dnafrag->slice->project('seqlevel')};
$self->query_seq_level_projection(\@seq_level_bits);
}
throw("Could not fetch DnaFrag with dbID " . $self->QUERY_DNAFRAG_ID )
if not defined $qy_dnafrag;
my $tg_gdb;
if ($self->TARGET_GENOMEDB_ID) {
$tg_gdb = $gdba->fetch_by_dbID($self->TARGET_GENOMEDB_ID);
}
throw("Could not fetch GenomeDB with dbID " . $self->TARGET_GENOMEDB_ID)
if not defined $tg_gdb;
my $mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE,
[$qy_dnafrag->genome_db, $tg_gdb]);
throw("No MethodLinkSpeciesSet for " . $self->INPUT_METHOD_LINK_TYPE)
if not defined $mlss;
my $out_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
$out_mlss->method_link_type($self->OUTPUT_METHOD_LINK_TYPE);
$out_mlss->species_set($mlss->species_set);
$mlssa->store($out_mlss);
$self->output_MethodLinkSpeciesSet($out_mlss);
if ($self->input_job->retry_count > 0) {
print STDERR "Deleting alignments as it is a rerun\n";
$self->delete_alignments($out_mlss,
$qy_dnafrag);
}
my $gabs = $gaba->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss,
$qy_dnafrag);
my %chains;
while (my $gab = shift @{$gabs}) {
my ($qy_ga) = $gab->reference_genomic_align;
my ($tg_ga) = @{$gab->get_all_non_reference_genomic_aligns};
my $group_id = $gab->group_id;
if (not exists $chains{$group_id}) {
$chains{$group_id} = {
score => $gab->score,
query_name => $qy_ga->dnafrag->name,
query_pos => $qy_ga->dnafrag_start,
target_name => $tg_ga->dnafrag->name,
target_pos => $tg_ga->dnafrag_start,
blocks => [],
};
} else {
if ($gab->score > $chains{$group_id}->{score}) {
$chains{$group_id}->{score} = $gab->score;
}
if ($chains{$group_id}->{query_pos} > $qy_ga->dnafrag_start) {
$chains{$group_id}->{query_pos} = $qy_ga->dnafrag_start;
}
if ($chains{$group_id}->{target_pos} > $tg_ga->dnafrag_start) {
$chains{$group_id}->{target_pos} = $tg_ga->dnafrag_start;
}
}
push @{$chains{$group_id}->{blocks}}, $gab;
}
foreach my $group_id (keys %chains) {
$chains{$group_id}->{blocks} = [sort {
$a->reference_genomic_align->dnafrag_start <=> $b->reference_genomic_align->dnafrag_start;
} @{$chains{$group_id}->{blocks}}];
}
my @chains;
foreach my $group_id (sort { $chains{$b}->{score} <=> $chains{$a}->{score} or
$chains{$a}->{target_name} cmp $chains{$b}->{target_name} or
$chains{$a}->{target_pos} <=> $chains{$b}->{target_pos} or
$chains{$a}->{query_pos} <=> $chains{$b}->{query_pos}
} keys %chains) {
push @chains, $chains{$group_id}->{blocks};
}
$self->input_chains(\@chains); } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
$self->SUPER::get_params($param_string);
if (defined($params->{'qy_dnafrag_id'})) {
$self->QUERY_DNAFRAG_ID($params->{'qy_dnafrag_id'});
}
if (defined($params->{'tg_genomedb_id'})) {
$self->TARGET_GENOMEDB_ID($params->{'tg_genomedb_id'});
}
if (defined $params->{'net_method'}) {
$self->NET_METHOD($params->{'net_method'});
}
return 1; } |
sub input_chains
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_chains} = $val;
}
return $self->{_query_chains}; } |
sub query_seq_level_projection
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_query_seq_level_bits} = $val;
}
return $self->{_query_seq_level_bits};
}
} |
sub run
{ my ($self) = @_;
my $output;
if ($self->NET_METHOD) {
no strict 'refs';
my $method = $self->NET_METHOD;
$output = $self->$method;
} else {
$output = $self->ContigAwareNet();
}
$self->cleanse_output($output);
$self->output($output); } |
sub write_output
{ my $self = shift;
my $disconnect_when_inactive_default = $self->db->dbc->disconnect_when_inactive;
$self->compara_dba->dbc->disconnect_when_inactive(0);
$self->SUPER::write_output;
$self->compara_dba->dbc->disconnect_when_inactive($disconnect_when_inactive_default);
}
my @ALLOWABLE_METHODS = qw(ContigAwareNet); } |
General documentation
No general documentation available.