Raw content of Bio::EnsEMBL::Compara::ProteinTree
=head1 NAME
ProteinTree - DESCRIPTION of Object
=head1 SYNOPSIS
=head1 DESCRIPTION
Specific subclass of NestedSet to add functionality when the leaves of this tree
are AlignedMember objects and the tree is a representation of a Protein derived
Phylogenetic tree
=head1 CONTACT
Contact Jessica Severin on implemetation/design detail: jessica@ebi.ac.uk
Contact Abel Ureta-Vidal on EnsEMBL compara project: abel@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::ProteinTree;
use strict;
use Bio::EnsEMBL::Compara::BaseRelation;
use Bio::EnsEMBL::Compara::SitewiseOmega;
use Bio::EnsEMBL::Utils::Argument;
use Bio::EnsEMBL::Utils::Exception;
use Bio::SimpleAlign;
use IO::File;
use Bio::EnsEMBL::Compara::NestedSet;
our @ISA = qw(Bio::EnsEMBL::Compara::NestedSet);
=head2 description_score
Arg [1] :
Example :
Description:
Returntype :
Exceptions :
Caller :
=cut
sub description_score {
my $self = shift;
$self->{'_description_score'} = shift if(@_);
return $self->{'_description_score'};
}
sub get_leaf_by_Member {
my $self = shift;
my $member = shift;
if($member->isa('Bio::EnsEMBL::Compara::ProteinTree')) {
return $self->find_leaf_by_node_id($member->node_id);
} elsif ($member->isa('Bio::EnsEMBL::Compara::Member')) {
return $self->find_leaf_by_name($member->get_longest_peptide_Member->stable_id);
} else {
die "Need a Member object!";
}
}
sub get_SimpleAlign {
my ($self, @args) = @_;
my $id_type = 'STABLE';
my $unique_seqs = 0;
my $cdna = 0;
my $stop2x = 0;
my $append_taxon_id = 0;
my $append_sp_short_name = 0;
my $append_genomedb_id = 0;
my $exon_cased = 0;
if (scalar @args) {
($unique_seqs, $cdna, $id_type, $stop2x, $append_taxon_id, $append_sp_short_name, $append_genomedb_id, $exon_cased) =
rearrange([qw(UNIQ_SEQ CDNA ID_TYPE STOP2X APPEND_TAXON_ID APPEND_SP_SHORT_NAME APPEND_GENOMEDB_ID EXON_CASED)], @args);
}
$id_type = 'STABLE' unless(defined($id_type));
my $sa = Bio::SimpleAlign->new();
#Hack to try to work with both bioperl 0.7 and 1.2:
#Check to see if the method is called 'addSeq' or 'add_seq'
my $bio07 = 0;
$bio07=1 if(!$sa->can('add_seq'));
my $seq_id_hash = {};
foreach my $member (@{$self->get_all_leaves}) {
next unless($member->isa('Bio::EnsEMBL::Compara::AlignedMember'));
next if($unique_seqs and $seq_id_hash->{$member->sequence_id});
$seq_id_hash->{$member->sequence_id} = 1;
my $seqstr;
if ($cdna) {
$seqstr = $member->cdna_alignment_string;
$seqstr =~ s/\s+//g;
} else {
$seqstr = $member->alignment_string($exon_cased);
}
next if(!$seqstr);
my $seqID = $member->stable_id;
$seqID = $member->sequence_id if($id_type eq "SEQ");
$seqID = $member->member_id if($id_type eq "MEMBER");
$seqID .= "_" . $member->taxon_id if($append_taxon_id);
$seqID .= "_" . $member->genome_db_id if ($append_genomedb_id);
## Append $seqID with Speciae short name, if required
if ($append_sp_short_name) {
my $species = $member->genome_db->short_name;
$species =~ s/\s/_/g;
$seqID .= "_" . $species . "_";
}
# $seqID .= "_" . $member->genome_db->taxon_id if($append_taxon_id); # this may be needed if you have subspecies or things like that
$seqstr =~ s/\*/X/g if ($stop2x);
my $seq = Bio::LocatableSeq->new(-SEQ => $seqstr,
-START => 1,
-END => length($seqstr),
-ID => $seqID,
-STRAND => 0);
if($bio07) {
$sa->addSeq($seq);
} else {
$sa->add_seq($seq);
}
}
return $sa;
}
# Takes a protein tree and creates a consensus cigar line from the
# constituent leaf nodes.
sub consensus_cigar_line {
my $self = shift;
my @cigars;
# First get an 'expanded' cigar string for each leaf of the subtree
my @all_leaves = @{$self->get_all_leaves};
my $num_leaves = scalar(@all_leaves);
foreach my $leaf (@all_leaves) {
next unless( UNIVERSAL::can( $leaf, 'cigar_line' ) );
my $cigar = $leaf->cigar_line;
$cigar =~ s/(\d*)([A-Z])/$2 x ($1||1)/ge; #Expand
push @cigars, $cigar;
}
# Itterate through each character of the expanded cigars.
# If there is a 'D' at a given location in any cigar,
# set the consensus to 'D', otherwise assume an 'M'.
# TODO: Fix assumption that cigar strings are always the same length,
# and start at the same point.
my $cigar_len = length( $cigars[0] );
my $cons_cigar;
for( my $i=0; $i<$cigar_len; $i++ ){
my $char = 'M';
my $num_deletions = 0;
foreach my $cigar( @cigars ){
if ( substr($cigar,$i,1) eq 'D'){
$num_deletions++;
}
}
if ($num_deletions * 3 > 2 * $num_leaves) {
$char = "D";
} elsif ($num_deletions * 3 > $num_leaves) {
$char = "m";
}
$cons_cigar .= $char;
}
# Collapse the consensus cigar, e.g. 'DDDD' = 4D
$cons_cigar =~ s/(\w)(\1*)/($2?length($2)+1:"").$1/ge;
# Return the consensus
return $cons_cigar;
}
sub get_SitewiseOmega_values {
my $self = shift;
my @values = @{$self->adaptor->db->get_SitewiseOmegaAdaptor->fetch_all_by_ProteinTreeId($self->node_id)};
return \@values;
}
1;