Raw content of Bio::EnsEMBL::Compara::PeptideAlignFeature
=head1 NAME - Bio::EnsEMBL::Compara::PeptideAlignFeature
=head1 SYNOPSIS
# Get an $homology object somehow
# For Homology PeptideAlignFeatures, you normally get 2 pafs,
# one for each member used alternatively as query and database
# (hit) in the blast run
my $pafs = $homology->get_all_PeptideAlignFeature;
foreach my $paf (@{$pafs}) {
print $paf->query_member->stable_id," ",$self->hit_member->stable_id," ",$paf->evalue,"\n";
}
# Other stuff in the object:
# $paf->qstart
# $paf->qend
# $paf->hstart
# $paf->hend
# $paf->score
# $paf->alignment_length
# $paf->identical_matches
# $paf->perc_ident
# $paf->positive_matches
# $paf->perc_pos
# $paf->hit_rank
# $paf->cigar_line
=head1 DESCRIPTION
=head1 CONTACT
Describe contact details here
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
my $_paf_build_homology_idx = time(); #global index counter
package Bio::EnsEMBL::Compara::PeptideAlignFeature;
use strict;
use Bio::EnsEMBL::Compara::Homology;
use Bio::EnsEMBL::Compara::Attribute;
use Bio::EnsEMBL::Utils::Exception;
#se overload '<=>' => "sort_by_score_evalue_and_pid"; # named method
sub new {
my ($class) = @_;
my $self = {};
bless $self,$class;
$self->query_member(new Bio::EnsEMBL::Compara::Member);
$self->hit_member(new Bio::EnsEMBL::Compara::Member);
return $self;
}
sub create_homology
{
my $self = shift;
# create an Homology object
my $homology = new Bio::EnsEMBL::Compara::Homology;
my $stable_id;
if($self->query_member->taxon_id < $self->hit_member->taxon_id) {
$stable_id = $self->query_member->taxon_id() . "_" . $self->hit_member->taxon_id . "_";
} else {
$stable_id = $self->hit_member->taxon_id . "_" . $self->query_member->taxon_id . "_";
}
$stable_id .= sprintf ("%011.0d",$_paf_build_homology_idx++);
$homology->stable_id($stable_id);
$homology->method_link_type("ENSEMBL_ORTHOLOGUES");
# NEED TO BUILD THE Attributes (ie homology_members)
#
# QUERY member
#
my $attribute;
$attribute = new Bio::EnsEMBL::Compara::Attribute;
$attribute->peptide_member_id($self->query_member->dbID);
$attribute->cigar_start($self->qstart);
$attribute->cigar_end($self->qend);
my $qlen = ($self->qend - $self->qstart + 1);
$attribute->perc_cov(int($qlen*100/$self->query_member->seq_length));
$attribute->perc_id(int($self->identical_matches*100.0/$qlen));
$attribute->perc_pos(int($self->positive_matches*100/$qlen));
$attribute->peptide_align_feature_id($self->dbID);
my $cigar_line = $self->cigar_line;
#print("original cigar_line '$cigar_line'\n");
$cigar_line =~ s/I/M/g;
$cigar_line = compact_cigar_line($cigar_line);
$attribute->cigar_line($cigar_line);
#print(" '$cigar_line'\n");
if($self->query_member->gene_member) {
#print("add query member gene : ", $self->query_member->gene_member->stable_id, "\n");
$homology->add_Member_Attribute([$self->query_member->gene_member, $attribute]);
} else {
#print("add query member : ", $self->query_member->stable_id, "\n");
$homology->add_Member_Attribute([$self->query_member, $attribute]);
}
# HIT member
#
$attribute = new Bio::EnsEMBL::Compara::Attribute;
$attribute->peptide_member_id($self->hit_member->dbID);
$attribute->cigar_start($self->hstart);
$attribute->cigar_end($self->hend);
my $hlen = ($self->hend - $self->hstart + 1);
$attribute->perc_cov(int($hlen*100/$self->hit_member->seq_length));
$attribute->perc_id(int($self->identical_matches*100.0/$hlen));
$attribute->perc_pos(int($self->positive_matches*100/$hlen));
$attribute->peptide_align_feature_id($self->rhit_dbID);
$cigar_line = $self->cigar_line;
#print("original cigar_line\n '$cigar_line'\n");
$cigar_line =~ s/D/M/g;
$cigar_line =~ s/I/D/g;
$cigar_line = compact_cigar_line($cigar_line);
$attribute->cigar_line($cigar_line);
#print(" '$cigar_line'\n");
if($self->hit_member->gene_member) {
#print("add hit member gene : ", $self->hit_member->gene_member->stable_id, "\n");
$homology->add_Member_Attribute([$self->hit_member->gene_member, $attribute]);
} else {
#print("add hit member : ", $self->hit_member->stable_id, "\n");
$homology->add_Member_Attribute([$self->hit_member, $attribute]);
}
return $homology;
}
sub compact_cigar_line
{
my $cigar_line = shift;
#print("cigar_line '$cigar_line' => ");
my @pieces = ( $cigar_line =~ /(\d*[MDI])/g );
my @new_pieces = ();
foreach my $piece (@pieces) {
$piece =~ s/I/M/;
if (! scalar @new_pieces || $piece =~ /D/) {
push @new_pieces, $piece;
next;
}
if ($piece =~ /\d*M/ && $new_pieces[-1] =~ /\d*M/) {
my ($matches1) = ($piece =~ /(\d*)M/);
my ($matches2) = ($new_pieces[-1] =~ /(\d*)M/);
if (! defined $matches1 || $matches1 eq "") {
$matches1 = 1;
}
if (! defined $matches2 || $matches2 eq "") {
$matches2 = 1;
}
$new_pieces[-1] = $matches1 + $matches2 . "M";
} else {
push @new_pieces, $piece;
}
}
my $new_cigar_line = join("", @new_pieces);
#print(" '$new_cigar_line'\n");
return $new_cigar_line;
}
##########################
#
# getter/setter methods
#
##########################
sub query_member {
my ($self,$arg) = @_;
if (defined($arg)) {
throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
$self->{'_query_member'} = $arg;
}
return $self->{'_query_member'};
}
sub query_member_id {
my $self = shift;
$self->{'_query_member_id'} = shift if (@_);
if ($self->{'_query_member_id'}) {
return $self->{'_query_member_id'};
} elsif ($self->{'_query_member'} and $self->{'_query_member'}->dbID) {
return $self->{'_query_member'}->dbID;
}
return undef;
}
sub query_genome_db_id {
my $self = shift;
$self->{'_query_genome_db_id'} = shift if (@_);
if ($self->{'_query_genome_db_id'}) {
return $self->{'_query_genome_db_id'};
} elsif ($self->{'_query_member'} and $self->{'_query_member'}->genome_db
and $self->{'_query_member'}->genome_db->dbID) {
return $self->{'_query_member'}->genome_db->dbID;
}
return undef;
}
sub hit_member {
my ($self,$arg) = @_;
if (defined($arg)) {
throw("arg must be a [Bio::EnsEMBL::Compara::Member] not a [$arg]")
unless($arg->isa('Bio::EnsEMBL::Compara::Member'));
$self->{'_hit_member'} = $arg;
}
return $self->{'_hit_member'};
}
sub hit_member_id {
my $self = shift;
$self->{'_hit_member_id'} = shift if (@_);
if ($self->{'_hit_member_id'}) {
return $self->{'_hit_member_id'};
} elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->dbID) {
return $self->{'_hit_member'}->dbID;
}
return undef;
}
sub hit_genome_db_id {
my $self = shift;
$self->{'_hit_genome_db_id'} = shift if (@_);
if ($self->{'_hit_genome_db_id'}) {
return $self->{'_hit_genome_db_id'};
} elsif ($self->{'_hit_member'} and $self->{'_hit_member'}->genome_db
and $self->{'_hit_member'}->genome_db->dbID) {
return $self->{'_hit_member'}->genome_db->dbID;
}
return undef;
}
sub qstart {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_qstart} = $arg;
}
return $self->{_qstart};
}
sub hstart {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_hstart} = $arg;
}
return $self->{_hstart};
}
sub qend {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_qend} = $arg;
}
return $self->{_qend};
}
sub qlength {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_qlength} = $arg;
}
return $self->{_qlength};
}
sub hend {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_hend} = $arg;
}
return $self->{_hend};
}
sub hlength{
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_hlength} = $arg;
}
return $self->{_hlength};
}
sub score{
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_score} = $arg;
}
return $self->{_score};
}
sub evalue {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_evalue} = $arg;
}
return $self->{_evalue};
}
sub perc_ident {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_perc_ident} = $arg;
}
return $self->{_perc_ident};
}
sub perc_pos {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_perc_pos} = $arg;
}
return $self->{_perc_pos};
}
sub identical_matches {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_identical_matches} = $arg;
if(defined($self->alignment_length)) {
$self->perc_ident(int($arg*100/$self->alignment_length));
}
}
return $self->{_identical_matches};
}
sub positive_matches {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_positive_matches} = $arg;
if(defined($self->alignment_length)) {
$self->perc_pos(int($arg*100/$self->alignment_length));
}
}
return $self->{_positive_matches};
}
sub alignment_length {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_alignment_length} = $arg;
}
return $self->{_alignment_length};
}
sub cigar_line {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_cigar_line} = $arg;
}
return $self->{_cigar_line};
}
sub hit_rank {
my ($self,$arg) = @_;
if (defined($arg)) {
$self->{_hit_rank} = $arg;
}
return $self->{_hit_rank};
}
sub analysis
{
my ($self,$analysis) = @_;
if (defined($analysis)) {
unless($analysis->isa('Bio::EnsEMBL::Analysis')) {
throw("arg must be a [Bio::EnsEMBL::Analysis] not a [$analysis]");
}
$self->{_analysis} = $analysis;
}
return $self->{_analysis};
}
sub dbID {
my ( $self, $dbID ) = @_;
$self->{'_dbID'} = $dbID if defined $dbID;
return $self->{'_dbID'};
}
sub rhit_dbID {
my ( $self, $dbID ) = @_;
$self->{'_rhit_dbID'} = $dbID if defined $dbID;
return $self->{'_rhit_dbID'};
}
sub display_short {
my $self = shift;
print($self->get_description(), "\n");
}
sub get_description {
my($self) = @_;
unless(defined($self)) {
print("qy_stable_id\t\t\thit_stable_id\t\t\tscore\talen\t\%ident\t\%positive\thit_rank\n");
return;
}
my $qm = $self->query_member;
my $hm = $self->hit_member;
my $dbID = $self->dbID; $dbID = '' unless($dbID);
my $header = "PAF(".$dbID.")";
$header .= "(".$self->rhit_dbID.")" if($self->rhit_dbID);
while(length($header)<17) { $header .= ' '; }
my $qmem = sprintf("%s(%d,%d)(%s:%d)",
$qm->stable_id, $self->qstart, $self->qend, $qm->chr_name, $qm->chr_start);
my $hmem = sprintf("%s(%d,%d)(%s:%d)",
$hm->stable_id, $self->hstart, $self->hend, $hm->chr_name, $hm->chr_start);
while(length($qmem)<50) { $qmem .= ' '; }
while(length($hmem)<50) { $hmem .= ' '; }
my $desc_string = sprintf("%s%s%s%7.3f%7d%7d%7d%7d",
$header, $qmem, $hmem,
$self->score,
$self->alignment_length,
$self->perc_ident,
$self->perc_pos,
$self->hit_rank);
return $desc_string;
}
=head2 hash_key
Args : none
Example : $somehash->{$paf->hash_key} = $someValue;
Description: used for keeping track of known/stored gene/gene relationships
Returntype : string $key
Exceptions : none
Caller : general
=cut
sub hash_key
{
my $self = shift;
my $key = '1';
return $key unless($self->query_member);
return $key unless($self->hit_member);
my $gene1 = $self->query_member->gene_member;
my $gene2 = $self->hit_member->gene_member;
$gene1 = $self->query_member unless($gene1);
$gene2 = $self->hit_member unless($gene2);
if($gene1->genome_db_id > $gene2->genome_db_id) {
my $temp = $gene1;
$gene1 = $gene2;
$gene2 = $temp;
}
$key = $gene1->stable_id . '_' . $gene2->stable_id;
return $key;
}
1;