Raw content of Bio::Das::ProServer::SourceAdaptor::compara
#
# Bio::Das::ProServer::SourceAdaptor::compara
#
# Copyright EnsEMBL Team
#
# You may distribute this module under the same terms as perl itself
#
# pod documentation - main docs before the code
=head1 NAME
Bio::Das::ProServer::SourceAdaptor::compara - Extension of the ProServer for e! genomic alignments
and synteny block.
=head1 INHERITANCE
This module inherits attributes and methods from Bio::Das::ProServer::SourceAdaptor
=head1 DAS CONFIGURATION FILE
There are some specific parameters for this module you can use in the DAS server configuration file
=head2 registry
Your registry configuration file to connect to the compara database
=head2 database
The species name in your Registry configuration file.
=head2 this_species
The main species. Features will be shown for this species.
=head2 other_species
The other species. This DAS track will show alignments between this_species and other_species.
You will have to skip this one for self-alignments. You can add more than one other species
separated by comas.
=head2 analysis
The method_link_type. This defines the type of alignments. E.g. TRANSLATED_BLAT, BLASTZ_NET...
See perldoc Bio::EnsEMBL::Compara::MethodLinkSpeciesSet for more details about the
method_link_type
=head2 Example
=head3 registry configuration file
use strict;
use Bio::EnsEMBL::Utils::ConfigRegistry;
use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor;
new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor(
-host => 'ensembldb.ensembl.org',
-user => 'anonymous',
-port => 3306,
-species => 'ensembl-compara-41',
-dbname => 'ensembl_compara_41');
=head3 DAS server configuration file
[general]
hostname = ecs4b.internal.sanger.ac.uk
prefork = 6
maxclients = 100
port = 9013
[Hsap-Mmus-blastznet]
transport = ensembl
adaptor = compara
registry = /home/foo/ProServer/eg/reg.pl
state = on
database = ensembl-compara-41
this_species = Homo sapiens
other_species = Mus musculus
analysis = BLASTZ_NET
description = Human-mouse blastz-net alignments
[Mmus-Hsap-blastznet]
transport = ensembl
adaptor = compara
registry = /home/foo/ProServer/eg/reg.pl
state = on
database = ensembl-compara-41
this_species = Mus musculus
other_species = Homo sapiens
analysis = BLASTZ_NET
description = Mouse-Human blastz-net alignments
[primates-mlagan-hs]
transport = ensembl
adaptor = compara
registry = /home/foo/ProServer/eg/reg.pl
state = on
database = ensembl-compara-41
this_species = Homo sapiens
other_species = Pan troglodytes, Macaca mulatta
analysis = MLAGAN
description = Primates Mlagan alignments on human
[human-platypus-bz]
transport = ensembl
adaptor = compara
registry = /home/foo/ProServer/eg/reg.pl
state = on
database = ensembl-compara-41
this_species = Homo sapiens
other_species = Ornithorhynchus anatinus
analysis = BLASTZ_NET
description = Human-platypus blastz alignments
=cut
package Bio::Das::ProServer::SourceAdaptor::compara;
use strict;
use Bio::EnsEMBL::Registry;
use Bio::EnsEMBL::Utils::Exception;
use base qw( Bio::Das::ProServer::SourceAdaptor );
sub init
{
my ($self) = @_;
$self->{'capabilities'} = { 'features' => '1.0',
'stylesheet' => '1.0' };
my $registry = $self->config()->{'registry'};
unless (defined $registry) {
throw("registry not defined\n");
}
if (not $Bio::EnsEMBL::Registry::registry_register->{'seen'}) {
Bio::EnsEMBL::Registry->load_all($registry);
}
my $db = "Bio::EnsEMBL::Registry";
$db->no_version_check(1);
my $dbname = $self->config()->{'database'};
$self->{'compara'}{'mlss_adaptor'} =
$db->get_adaptor($dbname, 'compara', 'MethodLinkSpeciesSet') or
die "can't get $dbname, 'compara', 'MethodLinkSpeciesSet'\n";
$self->{'compara'}{'dnafrag_adaptor'} =
$db->get_adaptor($dbname, 'compara', 'DnaFrag') or
die "can't get $dbname, 'compara', 'DnaFrag'\n";
$self->{'compara'}{'genomic_align_block_adaptor'} =
$db->get_adaptor($dbname, 'compara', 'GenomicAlignBlock') or
die "can't get $dbname, 'compara', 'GenomicAlignBlock'\n";
$self->{'compara'}{'synteny_region_adaptor'} =
$db->get_adaptor($dbname, 'compara', 'SyntenyRegion') or
die "can't get $dbname, 'compara', 'SyntenyRegion'\n";
my $genome_db_adaptor =
$db->get_adaptor($dbname, 'compara', 'GenomeDB') or
die "can't get $dbname, 'compara', 'GenomeDB'\n";
$self->{'compara'}{'genomedbs'} = $genome_db_adaptor->fetch_all();
}
sub build_features
{
my ($self, $opts) = @_;
my $daschr = $opts->{'segment'} || return ( );
my $dasstart = $opts->{'start'} || return ( );
my $dasend = $opts->{'end'} || return ( );
my $species1 = $self->config()->{'this_species'};
my @other_species = split(/\s*\,\s*/, $self->config()->{'other_species'});
my $chr1 = $daschr;
my $start1 = $dasstart;
my $end1 = $dasend;
my $method_link = $self->config()->{'analysis'};
my $link_template = $self->config()->{'link_template'} || '/';
$link_template .= '%s/contigview?chr=%s;vc_start=%d;vc_end=%d';
$self->{'compara'}->{'link_template'} = $link_template;
my $stored_max_alignment_length;
my $mlss_adaptor = $self->{'compara'}{'mlss_adaptor'};
my $dnafrag_adaptor = $self->{'compara'}{'dnafrag_adaptor'};
my $genomic_align_block_adaptor =
$self->{'compara'}{'genomic_align_block_adaptor'};
my $genomedbs = $self->{'compara'}{'genomedbs'};
my $species1_genome_db;
my @other_species_genome_dbs;
## Get the Bio::EnsEMBL::Compara::GenomeDB object for the primary species
foreach my $this_genome_db (@$genomedbs){
if ($this_genome_db->name eq $species1) {
$species1_genome_db = $this_genome_db;
}
}
if (!defined($species1_genome_db)) {
die "No species called $species1 in the database -- check spelling\n";
}
## Get the Bio::EnsEMBL::Compara::GenomeDB objects for the remaining species
foreach my $this_other_species (@other_species) {
my $this_other_genome_db;
foreach my $this_genome_db (@$genomedbs){
if ($this_genome_db->name eq $this_other_species) {
$this_other_genome_db = $this_genome_db;
last;
}
}
if (!defined($this_other_genome_db)) {
die "No species called $this_other_species in the database -- check spelling\n";
}
push(@other_species_genome_dbs, $this_other_genome_db);
}
## Fetch the Bio::EnsEMBL::Compara::DnaFrag object for the query segment
my $dnafrag1 =
$dnafrag_adaptor->fetch_by_GenomeDB_and_name($species1_genome_db, $chr1);
return ( ) if (!defined $dnafrag1);
## Fetch the Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
my $method_link_species_set;
$method_link_species_set =
$mlss_adaptor->fetch_by_method_link_type_GenomeDBs(
$method_link, [$species1_genome_db, @other_species_genome_dbs]);
## Fetch the alginments on the query segment
my $features;
if ($method_link_species_set->method_link_class =~ /SyntenyRegion/) {
$features = $self->get_features_from_SyntenyRegions(
$method_link_species_set, $dnafrag1, $start1, $end1);
} else {
$features = $self->get_features_from_GenomicAlingBlocks(
$method_link_species_set, $dnafrag1, $start1, $end1);
}
return @$features;
}
sub get_features_from_SyntenyRegions {
my ($self, $method_link_species_set, $dnafrag1, $start1, $end1) = @_;
my $features = [];
my $synteny_region_adaptor = $self->{'compara'}{'synteny_region_adaptor'};
my $synteny_regions = $synteny_region_adaptor->fetch_all_by_MethodLinkSpeciesSet_DnaFrag(
$method_link_species_set, $dnafrag1, $start1, $end1);
my $data;
foreach my $this_synteny_region (@$synteny_regions) {
my $these_dnafrag_regions = $this_synteny_region->get_all_DnaFragRegions();
foreach my $this_dnafrag_region (@{$these_dnafrag_regions}) {
if ($this_dnafrag_region->genome_db->name == $dnafrag1->genome_db->name and
$this_dnafrag_region->dnafrag->name == $dnafrag1->name and
$this_dnafrag_region->dnafrag_start <= $end1 and
$this_dnafrag_region->dnafrag_end >= $start1) {
push(@$data, [$this_dnafrag_region, $these_dnafrag_regions]);
}
}
}
foreach my $this_data (@$data) {
my ($reference_dnafrag_region, $all_dnafrag_regions) = @$this_data;
## Set link, linktxt, grouplink, grouplinktxt
my @links;
my @link_txts;
foreach my $this_dnafrag_region (@{$all_dnafrag_regions}) {
next if ($this_dnafrag_region == $reference_dnafrag_region);
my ($species2, $name2, $start2, $end2) = (
$this_dnafrag_region->dnafrag->genome_db->name(),
$this_dnafrag_region->dnafrag->name(),
$this_dnafrag_region->dnafrag_start(),
$this_dnafrag_region->dnafrag_end(),
);
my $ens_species = $species2;
$ens_species =~ s/ /_/g;
my $short_species2 = $species2;
$short_species2 =~ /(.)\S+\s(.{3})/;
$short_species2 = "$1.$2";
push(@links, sprintf($self->{compara}->{link_template}, $ens_species, $name2, $start2, $end2, $short_species2));
push(@link_txts, sprintf("%s:%s:%d-%d", $short_species2, $name2, $start2, $end2));
}
my $synteny_region_id = $reference_dnafrag_region->synteny_region_id;
push @$features, {
'id' => $synteny_region_id,
'label' => "Synteny Region #$synteny_region_id",
'method'=> $method_link_species_set->method_link_type,
'start' => $reference_dnafrag_region->dnafrag_start,
'end' => $reference_dnafrag_region->dnafrag_end,
'ori' => ($reference_dnafrag_region->dnafrag_strand() == 1 ? '+' : '-'),
'score' => '-',
'link' => [@links],
'linktxt' => [@link_txts],
'typecategory' => $method_link_species_set->method_link_class,
'type' => $method_link_species_set->name,
};
}
return $features;
}
sub get_features_from_GenomicAlingBlocks {
my ($self, $method_link_species_set, $dnafrag1, $start1, $end1) = @_;
my $features = [];
my $genomic_align_block_adaptor = $self->{'compara'}{'genomic_align_block_adaptor'};
my $genomic_align_blocks = $genomic_align_block_adaptor->fetch_all_by_MethodLinkSpeciesSet_DnaFrag(
$method_link_species_set, $dnafrag1, $start1, $end1);
## Get the start and end coordinates for each group. The coordinates are indexed
## by species_name and chr_name to ensure the continuity in the coordinates.
my $group_coordinates;
foreach my $gab (@$genomic_align_blocks) {
my $group_id = $gab->group_id;
foreach my $genomic_align2 (@{$gab->get_all_non_reference_genomic_aligns()}) {
my $species_name = $genomic_align2->dnafrag->genome_db->name;
my $chr_name = $genomic_align2->dnafrag->name;
my $chr_start = $genomic_align2->dnafrag_start;
my $chr_end = $genomic_align2->dnafrag_end;
if (!defined($group_coordinates->{$group_id}->{$species_name}->{$chr_name})) {
$group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{start} = $chr_start;
$group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{end} = $chr_end;
} else {
if ($chr_start < $group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{start}) {
$group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{start} = $chr_start;
}
if ($chr_end > $group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{end}) {
$group_coordinates->{$group_id}->{$species_name}->{$chr_name}->{end} = $chr_end;
}
}
}
}
foreach my $gab (@$genomic_align_blocks) {
$genomic_align_block_adaptor->retrieve_all_direct_attributes($gab);
my $genomic_align1 = $gab->reference_genomic_align();
my $other_genomic_aligns = $gab->get_all_non_reference_genomic_aligns();
my $group_id = $gab->group_id;
my $id = $gab->dbID;
my $label;
my $group_label;
# note will contain the perc_id if it exists
my $note = $gab->perc_id?$gab->perc_id.'% identity':undef;
## Set link, linktxt, grouplink, grouplinktxt
my @links;
my @link_txts;
my @group_links;
my @group_link_txts;
foreach my $this_genomic_align (@{$gab->get_all_non_reference_genomic_aligns()}) {
my ($species2, $name2, $start2, $end2) = (
$this_genomic_align->dnafrag->genome_db->name(),
$this_genomic_align->dnafrag->name(),
$this_genomic_align->dnafrag_start(),
$this_genomic_align->dnafrag_end(),
);
my $ens_species = $species2;
$ens_species =~ s/ /_/g;
my $short_species2 = $species2;
$short_species2 =~ /(.)\S+\s(.{3})/;
$short_species2 = "$1.$2";
push(@links, sprintf($self->{compara}->{link_template}, $ens_species, $name2, $start2, $end2, $short_species2));
push(@link_txts, sprintf("%s:%s:%d-%d", $short_species2, $name2, $start2, $end2));
next if (!defined($group_id));
my $group_start2 = $group_coordinates->{$group_id}->{$species2}->{$name2}->{start};
my $group_end2 = $group_coordinates->{$group_id}->{$species2}->{$name2}->{end};
$group_label = "$name2: $group_start2-$group_end2";
push(@group_links, sprintf($self->{compara}->{link_template}, $ens_species, $name2,
$group_start2, $group_end2));
push(@group_link_txts, sprintf("%s:%s:%d-%d", $short_species2, $name2,
$group_start2, $group_end2));
}
if (@{$method_link_species_set->species_set} <= 2) {
## for pairwise and self-alignments
my $ga = $gab->get_all_non_reference_genomic_aligns()->[0];
$label = $ga->dnafrag->name.": ".$ga->dnafrag_start."-".$ga->dnafrag_end;
} else {
## for multiple alignments
$group_label = $group_id?"group $group_id":undef;
}
push @$features, {
'id' => $id,
'label' => $label,
'method'=> $method_link_species_set->method_link_type,
'start' => $genomic_align1->dnafrag_start,
'end' => $genomic_align1->dnafrag_end,
'ori' => ($genomic_align1->dnafrag_strand() == 1 ? '+' : '-'),
'score' => $gab->score(),
'note' => $note,
'link' => [@links],
'linktxt' => [@link_txts],
'group' => $group_id,
'grouplabel'=> $group_label,
'grouplink' => [@group_links],
'grouplinktxt' => [@group_link_txts],
'typecategory' => $method_link_species_set->method_link_class,
'type' => $method_link_species_set->name,
};
}
return $features;
}
sub das_stylesheet
{
my $self = shift;
return <
blue
aquamarine2
brown
chocolate
firebrick3
firebrick1
black
DarkSeaGreen3
EOT
}
1;