Raw content of Bio::EnsEMBL::Compara::DBSQL::PeptideAlignFeatureAdaptor
=head1 NAME
Bio::EnsEMBL::Hive::DBSQL::PeptideAlignFeatureAdaptor
=head1 SYNOPSIS
$peptideAlignFeatureAdaptor = $db_adaptor->get_PeptideAlignFeatureAdaptor;
$peptideAlignFeatureAdaptor = $peptideAlignFeatureObj->adaptor;
=head1 DESCRIPTION
Module to encapsulate all db access for persistent class PeptideAlignFeature
There should be just one per application and database connection.
=head1 CONTACT
Contact Jessica Severin on implementation/design detail: jessica@ebi.ac.uk
Contact Albert Vilella on implementation/design detail: avilella@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
=cut
package Bio::EnsEMBL::Compara::DBSQL::PeptideAlignFeatureAdaptor;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
#use Bio::EnsEMBL::Compara::SyntenyPair;
use Bio::EnsEMBL::Compara::PeptideAlignFeature;
use Bio::EnsEMBL::Compara::DBSQL::MemberAdaptor;
use Bio::EnsEMBL::Utils::Exception;
use vars '@ISA';
@ISA = ('Bio::EnsEMBL::DBSQL::BaseAdaptor');
#############################
#
# fetch methods
#
#############################
=head2 fetch_all_by_qmember_id
Arg [1] : int $member->dbID
the database id for a peptide member
Example : $pafs = $adaptor->fetch_all_by_qmember_id($member->dbID);
Description: Returns all PeptideAlignFeatures from all target species
where the query peptide member is know.
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_all_by_qmember_id{
my $self = shift;
my $member_id = shift;
throw("member_id undefined") unless($member_id);
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($member_id);
my $gdb = $member->genome_db;
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.qmember_id = $member_id";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_qmember_id{
# my $self = shift;
# my $member_id = shift;
# throw("member_id undefined") unless($member_id);
# my $constraint = "paf.qmember_id = $member_id";
# return $self->_generic_fetch($constraint);
# }
=head2 fetch_all_by_hmember_id
Arg [1] : int $member->dbID
the database id for a peptide member
Example : $pafs = $adaptor->fetch_all_by_hmember_id($member->dbID);
Description: Returns all PeptideAlignFeatures from all query species
where the hit peptide member is know.
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_all_by_hmember_id{
my $self = shift;
my $member_id = shift;
throw("member_id undefined") unless($member_id);
my $sql = 'SHOW TABLES LIKE "peptide_align_feature_%"';
my $sth = $self->dbc->prepare($sql);
$sth->execute();
my @tbl_names;
while ( my $tbl_name = $sth->fetchrow ) {
push @tbl_names, $tbl_name;
}
$sth->finish;
my $paf;
foreach my $tbl_name (@tbl_names) {
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns FROM $tbl_name paf WHERE paf.hmember_id=$member_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$paf = $self->_objs_from_sth($sth);
if (defined($paf) && (0 != scalar @$paf)) {
foreach my $this_paf (@$paf) {
push @pafs, $this_paf;
}
}
}
$sth->finish;
return \@pafs;
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_hmember_id{
# my $self = shift;
# my $member_id = shift;
# throw("member_id undefined") unless($member_id);
# my $constraint = "paf.hmember_id = $member_id";
# return $self->_generic_fetch($constraint);
# }
=head2 fetch_all_by_qmember_id_hmember_id
Arg [1] : int $query_member->dbID
the database id for a peptide member
Arg [2] : int $hit_member->dbID
the database id for a peptide member
Example : $pafs = $adaptor->fetch_all_by_qmember_id_hmember_id($qmember_id, $hmember_id);
Description: Returns all PeptideAlignFeatures for a given query member and
hit member. If pair did not align, array will be empty.
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if either member_id is not defined
Caller : general
=cut
sub fetch_all_by_qmember_id_hmember_id{
my $self = shift;
my $qmember_id = shift;
my $hmember_id = shift;
throw("must specify query member dbID") unless($qmember_id);
throw("must specify hit member dbID") unless($hmember_id);
my $qmember = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $qmember->genome_db;
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.qmember_id = $qmember_id AND paf.hmember_id = $hmember_id";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_qmember_id_hmember_id{
# my $self = shift;
# my $qmember_id = shift;
# my $hmember_id = shift;
# throw("must specify query member dbID") unless($qmember_id);
# throw("must specify hit member dbID") unless($hmember_id);
# my $constraint = "paf.qmember_id=$qmember_id AND paf.hmember_id=$hmember_id";
# return $self->_generic_fetch($constraint);
# }
=head2 fetch_all_by_qmember_id_hgenome_db_id
Arg [1] : int $query_member->dbID
the database id for a peptide member
Arg [2] : int $hit_genome_db->dbID
the database id for a genome_db
Example : $pafs = $adaptor->fetch_all_by_qmember_id_hgenome_db_id(
$member->dbID, $genome_db->dbID);
Description: Returns all PeptideAlignFeatures for a given query member and
target hit species specified via a genome_db_id
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if either member->dbID or genome_db->dbID is not defined
Caller : general
=cut
sub fetch_all_by_qmember_id_hgenome_db_id{
my $self = shift;
my $qmember_id = shift;
my $hgenome_db_id = shift;
throw("must specify query member dbID") unless($qmember_id);
throw("must specify hit genome_db dbID") unless($hgenome_db_id);
my $qmember = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $qmember->genome_db;
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.qmember_id = $qmember_id AND paf.hgenome_db_id = $hgenome_db_id";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_qmember_id_hgenome_db_id{
# my $self = shift;
# my $qmember_id = shift;
# my $hgenome_db_id = shift;
# throw("must specify query member dbID") unless($qmember_id);
# throw("must specify hit genome_db dbID") unless($hgenome_db_id);
# my $constraint = "paf.qmember_id=$qmember_id AND paf.hgenome_db_id=$hgenome_db_id";
# return $self->_generic_fetch($constraint);
# }
=head2 fetch_all_by_hmember_id_qgenome_db_id
Arg [1] : int $hit_member->dbID
the database id for a peptide member
Arg [2] : int $query_genome_db->dbID
the database id for a genome_db
Example : $pafs = $adaptor->fetch_all_by_hmember_id_qgenome_db_id(
$member->dbID, $genome_db->dbID);
Description: Returns all PeptideAlignFeatures for a given hit member and
query species specified via a genome_db_id
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if either member->dbID or genome_db->dbID is not defined
Caller : general
=cut
sub fetch_all_by_hmember_id_qgenome_db_id{
my $self = shift;
my $hmember_id = shift;
my $qgenome_db_id = shift;
throw("must specify hit member dbID") unless($hmember_id);
throw("must specify query genome_db dbID") unless($qgenome_db_id);
my $species_name = lc($self->db->get_GenomeDBAdaptor->fetch_by_dbID($qgenome_db_id)->name);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$qgenome_db_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.hmember_id = $hmember_id";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_hmember_id_qgenome_db_id{
# my $self = shift;
# my $hmember_id = shift;
# my $qgenome_db_id = shift;
# throw("must specify hit member dbID") unless($hmember_id);
# throw("must specify query genome_db dbID") unless($qgenome_db_id);
# my $constraint = "paf.hmember_id=$hmember_id AND paf.qgenome_db_id=$qgenome_db_id";
# return $self->_generic_fetch($constraint);
# }
sub fetch_all_by_hgenome_db_id{
my $self = shift;
my $hgenome_db_id = shift;
throw("must specify hit genome_db dbID") unless($hgenome_db_id);
my $sql = 'SHOW TABLES LIKE "peptide_align_feature_%"';
my $sth = $self->dbc->prepare($sql);
$sth->execute();
my @tbl_names;
while ( my $tbl_name = $sth->fetchrow ) {
push @tbl_names, $tbl_name;
}
$sth->finish;
my $paf;
foreach my $tbl_name (@tbl_names) {
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns FROM $tbl_name paf WHERE paf.hgenome_db_id=$hgenome_db_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$paf = $self->_objs_from_sth($sth);
if (defined($paf) && (0 != scalar @$paf)) {
foreach my $this_paf (@$paf) {
push @pafs, $this_paf;
}
}
}
$sth->finish;
return \@pafs;
}
# Previous all-in-one-table paf
# sub old_fetch_all_by_hgenome_db_id{
# my $self = shift;
# my $hgenome_db_id = shift;
# throw("must specify hit genome_db dbID") unless($hgenome_db_id);
# my $constraint = "paf.hgenome_db_id=$hgenome_db_id";
# return $self->_generic_fetch($constraint);
# }
sub fetch_all_by_qgenome_db_id{
my $self = shift;
my $qgenome_db_id = shift;
throw("must specify query genome_db dbID") unless($qgenome_db_id);
my $gdb = $$self->db->get_GenomeDBAdaptor->fetch_by_dbID($qgenome_db_id);
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
# my $constraint = "paf.qmember_id = $member_id";
my $sql = "SELECT $columns FROM $tbl_name paf";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
sub fetch_all_by_qgenome_db_id_hgenome_db_id{
my $self = shift;
my $qgenome_db_id = shift;
my $hgenome_db_id = shift;
throw("must specify query genome_db dbID") unless($qgenome_db_id);
throw("must specify hit genome_db dbID") unless($hgenome_db_id);
my $gdb = $self->db->get_GenomeDBAdaptor->fetch_by_dbID($qgenome_db_id);
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.hgenome_db_id = $hgenome_db_id";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
sub fetch_all_besthit_by_qgenome_db_id{
my $self = shift;
my $qgenome_db_id = shift;
throw("must specify query genome_db dbID") unless($qgenome_db_id);
my $gdb = $self->db->get_GenomeDBAdaptor->fetch_by_dbID($qgenome_db_id);
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.hit_rank=1";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
sub fetch_all_besthit_by_qgenome_db_id_hgenome_db_id{
my $self = shift;
my $qgenome_db_id = shift;
my $hgenome_db_id = shift;
throw("must specify query genome_db dbID") unless($qgenome_db_id);
throw("must specify hit genome_db dbID") unless($hgenome_db_id);
my $gdb = $self->db->get_GenomeDBAdaptor->fetch_by_dbID($qgenome_db_id);
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $constraint = "paf.hgenome_db_id = $hgenome_db_id AND paf.hit_rank=1";
my $sql = "SELECT $columns FROM $tbl_name paf WHERE $constraint";
my $sth = $self->prepare($sql);
$sth->execute;
return $self->_objs_from_sth($sth);
}
=head2 fetch_selfhit_by_qmember_id
Arg [1] : int $member->dbID
the database id for a peptide member
Example : $paf = $adaptor->fetch_selfhit_by_qmember_id($member->dbID);
Description: Returns the selfhit PeptideAlignFeature defined by the id $id.
Returntype : Bio::EnsEMBL::Compara::PeptideAlignFeature
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_selfhit_by_qmember_id {
my $self= shift;
my $qmember_id = shift;
throw("qmember_id undefined") unless($qmember_id);
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $member->genome_db;
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns FROM $tbl_name paf WHERE qmember_id=$qmember_id AND qmember_id=hmember_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute();
$paf = $self->_objs_from_sth($sth)->[0];
$sth->finish;
return $paf;
}
=head2 final_clause
Arg [1] : SQL clause
Example : $adaptor->final_clause("ORDER BY paf.qmember_id LIMIT 10");
$pafs = $adaptor->fetch_all;
$adaptor->final_clause("");
Description: getter/setter method for specifying an extension to the SQL prior to
a fetch operation. Useful final clauses are either 'ORDER BY' or 'LIMIT'
Returntype :
Caller : general
=cut
sub final_clause {
my $self = shift;
$self->{'_final_clause'} = shift if(@_);
return $self->{'_final_clause'};
}
#############################
#
# store methods
#
#############################
sub store {
my ($self, @features) = @_;
my @pafList = ();
foreach my $feature (@features) {
if($feature->isa('Bio::EnsEMBL::BaseAlignFeature')) {
#displayHSP_short($feature);
my $pepFeature = $self->_create_PAF_from_BaseAlignFeature($feature);
#$pepFeature->display_short();
push @pafList, $pepFeature;
}
elsif($feature->isa('Bio::EnsEMBL::Compara::PeptideAlignFeature')) {
push @pafList, $feature;
}
}
@pafList = sort sort_by_score_evalue_and_pid @pafList;
my $rank=1;
my $prevPaf = undef;
foreach my $paf (@pafList) {
$rank++ if($prevPaf and !pafs_equal($prevPaf, $paf));
$paf->hit_rank($rank);
$prevPaf = $paf;
}
$self->_store_PAFS(@pafList);
}
sub _store_PAFS {
my ($self, @out) = @_;
return unless(@out and scalar(@out));
# Query genome db id should always be the same
my $first_qgenome_db_id = $out[0]->query_genome_db_id;
my $gdb = $self->db->get_GenomeDBAdaptor->fetch_by_dbID($first_qgenome_db_id);
my $species_name = lc($gdb->name);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$first_qgenome_db_id";
my $query = "INSERT INTO $tbl_name(".
"qmember_id,hmember_id,qgenome_db_id,hgenome_db_id,analysis_id," .
"qstart,qend,hstart,hend,".
"score,evalue,align_length," .
"identical_matches,perc_ident,".
"positive_matches,perc_pos,hit_rank,cigar_line) VALUES ";
my $addComma=0;
foreach my $paf (@out) {
if($paf->isa('Bio::EnsEMBL::Compara::PeptideAlignFeature')) {
my $analysis_id = 0;
if($paf->analysis()) {
#print("paf has analysis '".$paf->analysis->logic_name()."' dbID=".$paf->analysis->dbID."\n");
$analysis_id=$paf->analysis()->dbID();
}
# print STDERR "== ", $paf->query_member_id, " - ", $paf->hit_member_id, "\n";
my $qgenome_db_id = $paf->query_genome_db_id;
$qgenome_db_id = 0 unless($qgenome_db_id);
my $hgenome_db_id = $paf->hit_genome_db_id;
$hgenome_db_id = 0 unless($hgenome_db_id);
eval {$paf->query_member->source_name eq 'ENSEMBLPEP';};
if ($@) { $self->throw("Not an ENSEMBLPEP\n"); }
# Null_cigar option for leaner paf tables
$paf->cigar_line('') if (defined $paf->{null_cigar});
$query .= ", " if($addComma);
$query .= "(".$paf->query_member_id.
",".$paf->hit_member_id.
",".$qgenome_db_id.
",".$hgenome_db_id.
",".$analysis_id.
",".$paf->qstart.
",".$paf->qend.
",".$paf->hstart.
",".$paf->hend.
",".$paf->score.
",".$paf->evalue.
",".$paf->alignment_length.
",".$paf->identical_matches.
",".$paf->perc_ident.
",".$paf->positive_matches.
",".$paf->perc_pos.
",".$paf->hit_rank.
",'".$paf->cigar_line."')";
$addComma=1;
# $paf->display_short();
}
}
#print("$query\n");
my $sth = $self->prepare($query);
$sth->execute();
$sth->finish();
}
sub _create_PAF_from_BaseAlignFeature {
my($self, $feature) = @_;
unless(defined($feature) and $feature->isa('Bio::EnsEMBL::BaseAlignFeature')) {
throw("arg must be a [Bio::EnsEMBL::BaseAlignFeature] not a [$feature]");
}
my $paf = new Bio::EnsEMBL::Compara::PeptideAlignFeature;
my $memberDBA = $self->db->get_MemberAdaptor();
if ($feature->seqname =~ /IDs\:(\d+)\:(\d+)/) {
my $query_member = $memberDBA->fetch_by_dbID($2);
eval {$query_member->source_name eq 'ENSEMBLPEP'};
if ($@) { $self->throw("$1 is not an ENSEMBLPEP\n"); }
$paf->query_genome_db_id($1);
$paf->query_member_id($2);
} elsif($feature->seqname =~ /member_id_(\d+)/) {
#printf("qseq: member_id = %d\n", $1);
my $query_member = $memberDBA->fetch_by_dbID($1);
eval {$query_member->source_name eq 'ENSEMBLPEP'};
if ($@) { $self->throw("$1 is not an ENSEMBLPEP\n"); }
$paf->query_member($query_member);
} else {
my ($source_name, $stable_id) = split(/:/, $feature->seqname);
#printf("qseq: %s %s\n", $source_name, $stable_id);
$paf->query_member($memberDBA->fetch_by_source_stable_id($source_name, $stable_id));
}
if ($feature->hseqname =~ /IDs\:(\d+)\:(\d+)/) {
$paf->hit_genome_db_id($1);
$paf->hit_member_id($2);
} elsif ($feature->hseqname =~ /member_id_(\d+)/) {
#printf("hseq: member_id = %d\n", $1);
$paf->hit_member($memberDBA->fetch_by_dbID($1));
} else {
my ($source_name, $stable_id) = split(/:/, $feature->hseqname);
#printf("hseq: %s %s\n", $source_name, $stable_id);
my $hit_member = $memberDBA->fetch_by_source_stable_id($source_name, $stable_id);
if (defined($hit_member)) {
$paf->hit_member($hit_member);
} else {
die "couldnt find $stable_id\n";
}
}
$paf->analysis($feature->analysis);
$paf->qstart($feature->start);
$paf->hstart($feature->hstart);
$paf->qend($feature->end);
$paf->hend($feature->hend);
#$paf->qlength($qlength);
#$paf->hlength($hlength);
$paf->score($feature->score);
$paf->evalue($feature->p_value);
$paf->cigar_line($feature->cigar_string);
$paf->{null_cigar} = 1 if (defined $feature->{null_cigar});
$paf->alignment_length($feature->alignment_length);
$paf->identical_matches($feature->identical_matches);
$paf->positive_matches($feature->positive_matches);
$paf->perc_ident(int($feature->identical_matches*100/$feature->alignment_length));
$paf->perc_pos(int($feature->positive_matches*100/$feature->alignment_length));
return $paf;
}
sub sort_by_score_evalue_and_pid {
$b->score <=> $a->score ||
$a->evalue <=> $b->evalue ||
$b->perc_ident <=> $a->perc_ident ||
$b->perc_pos <=> $a->perc_pos;
}
sub pafs_equal {
my ($paf1, $paf2) = @_;
return 0 unless($paf1 and $paf2);
return 1 if(($paf1->score == $paf2->score) and
($paf1->evalue == $paf2->evalue) and
($paf1->perc_ident == $paf2->perc_ident) and
($paf1->perc_pos == $paf2->perc_pos));
return 0;
}
sub displayHSP {
my($paf) = @_;
my $percent_ident = int($paf->identical_matches*100/$paf->alignment_length);
my $pos = int($paf->positive_matches*100/$paf->alignment_length);
print("=> $paf\n");
print("pep_align_feature :\n" .
" seqname : " . $paf->seqname . "\n" .
" start : " . $paf->start . "\n" .
" end : " . $paf->end . "\n" .
" hseqname : " . $paf->hseqname . "\n" .
" hstart : " . $paf->hstart . "\n" .
" hend : " . $paf->hend . "\n" .
" score : " . $paf->score . "\n" .
" p_value : " . $paf->p_value . "\n" .
" alignment_length : " . $paf->alignment_length . "\n" .
" identical_matches : " . $paf->identical_matches . "\n" .
" perc_ident : " . $percent_ident . "\n" .
" positive_matches : " . $paf->positive_matches . "\n" .
" perc_pos : " . $pos . "\n" .
" cigar_line : " . $paf->cigar_string . "\n");
}
sub displayHSP_short {
my($paf) = @_;
unless(defined($paf)) {
print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\n");
return;
}
my $perc_ident = int($paf->identical_matches*100/$paf->alignment_length);
my $perc_pos = int($paf->positive_matches*100/$paf->alignment_length);
print("HSP ".$paf->seqname."(".$paf->start.",".$paf->end.")".
"\t" . $paf->hseqname. "(".$paf->hstart.",".$paf->hend.")".
"\t" . $paf->score .
"\t" . $paf->alignment_length .
"\t" . $perc_ident .
"\t" . $perc_pos . "\n");
}
############################
#
# INTERNAL METHODS
# (pseudo subclass methods)
#
############################
#internal method used in multiple calls above to build objects from table data
sub _tables {
my $self = shift;
return (['peptide_align_feature', 'paf'] );
}
sub _columns {
my $self = shift;
return qw (paf.peptide_align_feature_id
paf.qmember_id
paf.hmember_id
paf.analysis_id
paf.qstart
paf.qend
paf.hstart
paf.hend
paf.score
paf.evalue
paf.align_length
paf.identical_matches
paf.perc_ident
paf.positive_matches
paf.perc_pos
paf.hit_rank
paf.cigar_line
);
}
sub _default_where_clause {
my $self = shift;
return '';
}
sub _objs_from_sth {
my ($self, $sth) = @_;
my %column;
$sth->bind_columns( \( @column{ @{$sth->{NAME_lc} } } ));
my @pafs = ();
while ($sth->fetch()) {
my $paf;
$paf = Bio::EnsEMBL::Compara::PeptideAlignFeature->new();
$paf->dbID($column{'peptide_align_feature_id'});
$paf->qstart($column{'qstart'});
$paf->qend($column{'qend'});
$paf->hstart($column{'hstart'});
$paf->hend($column{'hend'});
$paf->score($column{'score'});
$paf->evalue($column{'evalue'});
$paf->alignment_length($column{'align_length'});
$paf->identical_matches($column{'identical_matches'});
$paf->perc_ident($column{'perc_ident'});
$paf->positive_matches($column{'positive_matches'});
$paf->perc_pos($column{'perc_pos'});
$paf->hit_rank($column{'hit_rank'});
$paf->cigar_line($column{'cigar_line'});
$paf->rhit_dbID($column{'pafid2'});
if($column{'analysis_id'} and $self->db->get_AnalysisAdaptor) {
$paf->analysis($self->db->get_AnalysisAdaptor->fetch_by_dbID($column{'analysis_id'}));
}
my $memberDBA = $self->db->get_MemberAdaptor;
if($column{'qmember_id'} and $memberDBA) {
$paf->query_member($memberDBA->fetch_by_dbID($column{'qmember_id'}));
}
if($column{'hmember_id'} and $memberDBA) {
$paf->hit_member($memberDBA->fetch_by_dbID($column{'hmember_id'}));
}
#$paf->display_short();
push @pafs, $paf;
}
$sth->finish;
return \@pafs;
}
###############################################################################
#
# General access methods that could be moved
# into a superclass
#
###############################################################################
#sub fetch_by_dbID_qgenome_db_id {
=head2 fetch_by_dbID
Arg [1] : int $id
the unique database identifier for the feature to be obtained
Example : $paf = $adaptor->fetch_by_dbID(1234);
Description: Returns the PeptideAlignFeature created from the database defined by the
the id $id.
Returntype : Bio::EnsEMBL::Compara::PeptideAlignFeature
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_by_dbID{
my ($self,$id) = @_;
unless(defined $id) {
$self->throw("fetch_by_dbID must have an id");
}
my $sql = 'SHOW TABLES LIKE "peptide_align_feature_%"';
my $sth = $self->dbc->prepare($sql);
$sth->execute();
my @tbl_names;
while ( my $tbl_name = $sth->fetchrow ) {
push @tbl_names, $tbl_name;
}
$sth->finish;
my $paf;
foreach my $tbl_name (@tbl_names) {
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns FROM $tbl_name paf WHERE peptide_align_feature_id=$id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$paf = $self->_objs_from_sth($sth)->[0];
if (defined($paf)) {
# Found it in one of the tables, no need to go any further
# assuming UpdatePAFIds has done its job properly
$sth->finish;
return $paf;
}
}
$sth->finish;
return undef;
}
# Previous all-in-one-table paf
# sub old_fetch_by_dbID{
# my ($self,$id) = @_;
# unless(defined $id) {
# $self->throw("fetch_by_dbID must have an id");
# }
# my @tabs = $self->_tables;
# my ($name, $syn) = @{$tabs[0]};
# #construct a constraint like 't1.table1_id = 1'
# my $constraint = "${syn}.${name}_id = $id";
# #return first element of _generic_fetch list
# my ($obj) = @{$self->_generic_fetch($constraint)};
# return $obj;
# }
=head2 fetch_by_dbIDs
Arg [1...] : int $id (multiple)
the unique database identifier for the feature to be obtained
Example : $pafs = $adaptor->fetch_by_dbID(paf1_id, $paf2_id, $paf3_id);
Description: Returns the PeptideAlignFeature created from the database defined by the
the id $id.
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_by_dbIDs{
my $self = shift;
my @ids = @_;
return undef unless(scalar(@ids));
my $id_string = join(",", @ids);
my $sql = 'SHOW TABLES LIKE "peptide_align_feature_%"';
my $sth = $self->dbc->prepare($sql);
$sth->execute();
my @tbl_names;
while ( my $tbl_name = $sth->fetchrow ) {
push @tbl_names, $tbl_name;
}
$sth->finish;
my @pafs;
my $paf;
foreach my $tbl_name (@tbl_names) {
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns FROM $tbl_name paf WHERE peptide_align_feature_id in ($id_string)";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$paf = $self->_objs_from_sth($sth);
if (defined($paf) && (0 != scalar @$paf)) {
foreach my $this_paf (@$paf) {
push @pafs, $this_paf;
}
}
}
$sth->finish;
return \@pafs;
}
# Previous all-in-one-table paf
# sub old_fetch_by_dbIDs{
# my $self = shift;
# my @ids = @_;
# return undef unless(scalar(@ids));
# my $id_string = join(",", @ids);
# my $constraint = "paf.peptide_align_feature_id in ($id_string)";
# #printf("fetch_by_dbIDs has contraint\n$constraint\n");
# #return first element of _generic_fetch list
# return $self->_generic_fetch($constraint);
# }
=head2 fetch_BRH_by_member_genomedb
Arg [1] : member_id of query peptide member
Arg [2] : genome_db_id of hit species
Example : $paf = $adaptor->fetch_BRH_by_member_genomedb(31957, 3);
Description: Returns the PeptideAlignFeature created from the database
This is the old algorithm for pulling BRHs (compara release 20-23)
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions : none
Caller : general
=cut
sub fetch_BRH_by_member_genomedb
{
# using trick of specifying table twice so can join to self
my $self = shift;
my $qmember_id = shift;
my $hit_genome_db_id = shift;
#print(STDERR "fetch_all_RH_by_member_genomedb qmember_id=$qmember_id, genome_db_id=$hit_genome_db_id\n");
return unless($qmember_id and $hit_genome_db_id);
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $member->genome_db;
my $species_name1 = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name1 =~ s/\ /\_/g;
my $tbl_name1 = "peptide_align_feature"."_"."$species_name1"."_"."$gdb_id";
my $species_name2 = lc($self->db->get_GenomeDBAdaptor->fetch_by_dbID($hit_genome_db_id)->name);
$species_name2 =~ s/\ /\_/g;
my $tbl_name2 = "peptide_align_feature"."_"."$species_name2"."_"."$hit_genome_db_id";
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns, paf2.peptide_align_feature_id AS pafid2".
" FROM $tbl_name1 paf, $tbl_name2 paf2".
" WHERE paf.hmember_id=paf2.qmember_id".
" AND paf.hit_rank=1 AND paf2.hit_rank=1".
" AND paf.qmember_id=$qmember_id and paf.hgenome_db_id=$hit_genome_db_id".
" AND paf2.hmember_id=$qmember_id and paf2.qgenome_db_id=$hit_genome_db_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
my $obj = $self->_objs_from_sth($sth)->[0];
$sth->finish;
return $obj;
}
# Previous all-in-one-table paf
# sub old_fetch_BRH_by_member_genomedb
# {
# # using trick of specifying table twice so can join to self
# my $self = shift;
# my $qmember_id = shift;
# my $hit_genome_db_id = shift;
# #print(STDERR "fetch_BRH_by_member_genomedb qmember_id=$qmember_id, genome_db_id=$hit_genome_db_id\n");
# return unless($qmember_id and $hit_genome_db_id);
# my $extrajoin = [
# [ ['peptide_align_feature', 'paf2'],
# 'paf.qmember_id=paf2.hmember_id AND paf.hmember_id=paf2.qmember_id',
# ['paf2.peptide_align_feature_id AS pafid2']]
# ];
# my $constraint = "paf.hit_rank=1 AND paf2.hit_rank=1".
# " AND paf.qmember_id='".$qmember_id."'".
# " AND paf2.hmember_id='".$qmember_id."'".
# " AND paf.hgenome_db_id='".$hit_genome_db_id."'".
# " AND paf2.qgenome_db_id='".$hit_genome_db_id."'";
# return $self->_generic_fetch($constraint, $extrajoin);
# }
=head2 fetch_all_RH_by_member_genomedb
Overview : This an experimental method and not currently used in production
Arg [1] : member_id of query peptide member
Arg [2] : genome_db_id of hit species
Example : $feat = $adaptor->fetch_by_dbID($musBlastAnal, $ratBlastAnal);
Description: Returns all the PeptideAlignFeatures that reciprocal hit the qmember_id
onto the hit_genome_db_id
Returntype : array of Bio::EnsEMBL::Compara::PeptideAlignFeature objects by reference
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_all_RH_by_member_genomedb
{
# using trick of specifying table twice so can join to self
my $self = shift;
my $qmember_id = shift;
my $hit_genome_db_id = shift;
#print(STDERR "fetch_all_RH_by_member_genomedb qmember_id=$qmember_id, genome_db_id=$hit_genome_db_id\n");
return unless($qmember_id and $hit_genome_db_id);
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $member->genome_db;
my $species_name1 = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name1 =~ s/\ /\_/g;
my $tbl_name1 = "peptide_align_feature"."_"."$species_name1"."_"."$gdb_id";
my $species_name2 = lc($self->db->get_GenomeDBAdaptor->fetch_by_dbID($hit_genome_db_id)->name);
$species_name2 =~ s/\ /\_/g;
my $tbl_name2 = "peptide_align_feature"."_"."$species_name2"."_"."$hit_genome_db_id";
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns, paf2.peptide_align_feature_id AS pafid2".
" FROM $tbl_name1 paf, $tbl_name2 paf2".
" WHERE paf.hmember_id=paf2.qmember_id".
" AND paf.qmember_id=$qmember_id and paf.hgenome_db_id=$hit_genome_db_id".
" AND paf2.hmember_id=$qmember_id and paf2.qgenome_db_id=$hit_genome_db_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
my $objs = $self->_objs_from_sth($sth);
$sth->finish;
return $objs;
}
# Previous all-in-one-table paf
# sub old_fetch_all_RH_by_member_genomedb
# {
# # using trick of specifying table twice so can join to self
# my $self = shift;
# my $qmember_id = shift;
# my $hit_genome_db_id = shift;
# #print(STDERR "fetch_all_RH_by_member_genomedb qmember_id=$qmember_id, genome_db_id=$hit_genome_db_id\n");
# return unless($qmember_id and $hit_genome_db_id);
# my $extrajoin = [
# [ ['peptide_align_feature', 'paf2'],
# 'paf.qmember_id=paf2.hmember_id AND paf.hmember_id=paf2.qmember_id',
# ['paf2.peptide_align_feature_id AS pafid2']]
# ];
# my $constraint = " paf.qmember_id='".$qmember_id."'".
# " AND paf2.hmember_id='".$qmember_id."'".
# " AND paf.hgenome_db_id='".$hit_genome_db_id."'".
# " AND paf2.qgenome_db_id='".$hit_genome_db_id."'";
# #$self->final_clause("ORDER BY paf.hit_rank");
# my $objs = $self->_generic_fetch($constraint, $extrajoin);
# #$self->final_clause("");
# return $objs;
# }
=head2 fetch_all_RH_by_member
Overview : This an experimental method and not currently used in production
Arg [1] : member_id of query peptide member
Example : $feat = $adaptor->fetch_by_dbID($musBlastAnal, $ratBlastAnal);
Description: Returns all the PeptideAlignFeatures that reciprocal hit all genomes
Returntype : array of Bio::EnsEMBL::Compara::PeptideAlignFeature objects by reference
Exceptions : thrown if $id is not defined
Caller : general
=cut
sub fetch_all_RH_by_member
{
# using trick of specifying table twice so can join to self
my $self = shift;
my $qmember_id = shift;
#print(STDERR "fetch_all_RH_by_member_genomedb qmember_id=$qmember_id, genome_db_id=$hit_genome_db_id\n");
return unless($qmember_id);
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $member->genome_db;
my $species_name = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name =~ s/\ /\_/g;
my $tbl_name = "peptide_align_feature"."_"."$species_name"."_"."$gdb_id";
my $columns = join(', ', $self->_columns());
my $sql = "SELECT $columns, paf2.peptide_align_feature_id AS pafid2".
" FROM $tbl_name paf, $tbl_name paf2".
" WHERE paf.hmember_id=paf2.qmember_id".
" AND paf.qmember_id=$qmember_id".
" AND paf2.hmember_id=$qmember_id";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
my $objs = $self->_objs_from_sth($sth);
$sth->finish;
return $objs;
}
# Previous all-in-one-table paf
# sub old_fetch_all_RH_by_member
# {
# # using trick of specifying table twice so can join to self
# my $self = shift;
# my $qmember_id = shift;
# #print(STDERR "fetch_all_RH_by_member qmember_id=$qmember_id\n");
# return unless($qmember_id);
# my $extrajoin = [
# [ ['peptide_align_feature', 'paf2'],
# 'paf.qmember_id=paf2.hmember_id AND paf.hmember_id=paf2.qmember_id',
# ['paf2.peptide_align_feature_id AS pafid2']]
# ];
# my $constraint = " paf.qmember_id='".$qmember_id."'".
# " AND paf2.hmember_id='".$qmember_id."'";
# return $self->_generic_fetch($constraint, $extrajoin);
# }
=head2 fetch_all
Arg : None
Example : @pafs = @{$adaptor->fetch_all};
Description: fetch all peptide align features. Not generally useful since it
can return millions of objects.
Returntype : array reference of Bio::EnsEMBL::Compara::PeptideAlignFeature objects
Exceptions :
Caller :
=cut
sub fetch_all {
my $self = shift;
return $self->_generic_fetch();
}
=head2 fetch_BRH_web_for_member_genome_db
Overview : This is the new (compara_24) algorithm method for finding UBRH and MBRH
homologies.
Arg [1] : member_id of query peptide member
Arg [2] : genome_db_id of hit species
Description: Returns all the 'best' PeptideAlignFeatures starting with qmember_id
hitting onto the hit_genome_db_id via recursive search
Returntype : array of Bio::EnsEMBL::Compara::PeptideAlignFeature objects by reference
or undef if nothing found
Exceptions : none
Caller : general
=cut
sub fetch_BRH_web_for_member_genome_db
{
# recursive search to find web of 'best' hits starting with a given
# qmember_id and hit_genome_db_id
my $self = shift;
my $qmember_id = shift;
my $hit_genome_db_id = shift;
my $tested_member_ids = {};
my $found_paf_ids = {};
$self->_recursive_find_brh_pafs_for_member_genome_db(
$qmember_id,
$hit_genome_db_id,
$tested_member_ids,
$found_paf_ids);
my $pafsArray = $self->fetch_by_dbIDs(keys %$found_paf_ids);
return undef unless($pafsArray);
#match up the reciprocals
foreach my $paf1 (@$pafsArray) {
foreach my $paf2 (@$pafsArray) {
if(($paf1->query_member->dbID == $paf2->hit_member->dbID) and
($paf1->hit_member->dbID == $paf2->query_member->dbID))
{
$paf1->rhit_dbID($paf2->dbID);
$paf2->rhit_dbID($paf1->dbID);
}
}
}
return $pafsArray;
}
sub _recursive_find_brh_pafs_for_member_genome_db
{
# recursive search to find web of 'best' hits starting with a given
# member_id and genome_db_ids
my $self = shift;
my $qmember_id = shift;
my $hit_genome_db_id = shift;
my $tested_member_ids = shift; #ref to hash
my $found_paf_ids = shift; #ref to hash
my $member = $self->db->get_MemberAdaptor->fetch_by_dbID($qmember_id);
my $gdb = $member->genome_db;
my $species_name1 = lc($gdb->name);
my $gdb_id = lc($gdb->dbID);
$species_name1 =~ s/\ /\_/g;
my $tbl_name1 = "peptide_align_feature"."_"."$species_name1"."_"."$gdb_id";
my $species_name2 = lc($self->db->get_GenomeDBAdaptor->fetch_by_dbID($hit_genome_db_id)->name);
$species_name2 =~ s/\ /\_/g;
my $tbl_name2 = "peptide_align_feature"."_"."$species_name2"."_"."$hit_genome_db_id";
return unless($qmember_id and $hit_genome_db_id);
return if($tested_member_ids->{$qmember_id}); #already tested this member
$tested_member_ids->{$qmember_id} = 1;
#printf(" recursive_web qm=%d hg=%d\n", $qmember_id, $hit_genome_db_id);
my $sql = "SELECT paf1.peptide_align_feature_id, paf1.hmember_id, paf1.qgenome_db_id ".
" FROM $tbl_name1 paf1, $tbl_name2 paf2".
" WHERE paf1.hmember_id=paf2.qmember_id".
" AND paf1.qmember_id=? and paf1.hgenome_db_id=? and paf1.hit_rank=1".
" AND paf2.hmember_id=? and paf2.qgenome_db_id=? and paf2.hit_rank=1";
my $sth = $self->prepare($sql);
$sth->execute($qmember_id, $hit_genome_db_id, $qmember_id, $hit_genome_db_id);
while (my ($pafID, $hmember_id, $qgenome_db_id) = $sth->fetchrow_array()) {
#printf(" found pafID $pafID in recursive search\n");
$found_paf_ids->{$pafID} = 1;
$self->_recursive_find_brh_pafs_for_member_genome_db($hmember_id, $qgenome_db_id, $tested_member_ids, $found_paf_ids);
}
$sth->finish;
}
# sub _old_recursive_find_brh_pafs_for_member_genome_db
# {
# # recursive search to find web of 'best' hits starting with a given
# # member_id and genome_db_ids
# my $self = shift;
# my $qmember_id = shift;
# my $hit_genome_db_id = shift;
# my $tested_member_ids = shift; #ref to hash
# my $found_paf_ids = shift; #ref to hash
# return unless($qmember_id and $hit_genome_db_id);
# return if($tested_member_ids->{$qmember_id}); #already tested this member
# $tested_member_ids->{$qmember_id} = 1;
# #printf(" recursive_web qm=%d hg=%d\n", $qmember_id, $hit_genome_db_id);
# my $sql = "SELECT paf1.peptide_align_feature_id, paf1.hmember_id, paf1.qgenome_db_id ".
# " FROM peptide_align_feature paf1, peptide_align_feature paf2".
# " WHERE paf1.hmember_id=paf2.qmember_id".
# " AND paf1.qmember_id=? and paf1.hgenome_db_id=? and paf1.hit_rank=1".
# " AND paf2.hmember_id=? and paf2.qgenome_db_id=? and paf2.hit_rank=1";
# my $sth = $self->prepare($sql);
# $sth->execute($qmember_id, $hit_genome_db_id, $qmember_id, $hit_genome_db_id);
# while (my ($pafID, $hmember_id, $qgenome_db_id) = $sth->fetchrow_array()) {
# #printf(" found pafID $pafID in recursive search\n");
# $found_paf_ids->{$pafID} = 1;
# $self->_recursive_find_brh_pafs_for_member_genome_db($hmember_id, $qgenome_db_id, $tested_member_ids, $found_paf_ids);
# }
# $sth->finish;
# }
sub _generic_fetch {
my ($self, $constraint, $join) = @_;
my @tables = $self->_tables;
my $columns = join(', ', $self->_columns());
if ($join) {
foreach my $single_join (@{$join}) {
my ($tablename, $condition, $extra_columns) = @{$single_join};
if ($tablename && $condition) {
push @tables, $tablename;
if($constraint) {
$constraint .= " AND $condition";
} else {
$constraint = " $condition";
}
}
if ($extra_columns) {
$columns .= ", " . join(', ', @{$extra_columns});
}
}
}
#construct a nice table string like 'table1 t1, table2 t2'
my $tablenames = join(', ', map({ join(' ', @$_) } @tables));
my $sql = "SELECT $columns FROM $tablenames";
my $default_where = $self->_default_where_clause;
my $final_clause = $self->final_clause;
#append a where clause if it was defined
if($constraint) {
$sql .= " WHERE $constraint ";
if($default_where) {
$sql .= " AND $default_where ";
}
} elsif($default_where) {
$sql .= " WHERE $default_where ";
}
#append additional clauses which may have been defined
$sql .= " $final_clause" if($final_clause);
# print STDERR $sql,"\n";
my $sth = $self->prepare($sql);
$sth->execute;
# print STDERR $sql,"\n";
# print STDERR "sql execute finished. about to build objects\n";
return $self->_objs_from_sth($sth);
}
1;