Bio::EnsEMBL::Compara ProteinTree
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
ProteinTree - DESCRIPTION of Object
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::BaseRelation
Bio::EnsEMBL::Compara::NestedSet
Bio::EnsEMBL::Compara::SitewiseOmega
Bio::EnsEMBL::Utils::Argument
Bio::EnsEMBL::Utils::Exception
Bio::SimpleAlign
IO::File
Inherit
Bio::EnsEMBL::Compara::NestedSet
Synopsis
Description
Specific subclass of NestedSet to add functionality when the leaves of this tree
are AlignedMember objects and the tree is a representation of a Protein derived
Phylogenetic tree
Methods
consensus_cigar_line
No description
Code
description_scoreDescriptionCode
get_SimpleAlign
No description
Code
get_SitewiseOmega_values
No description
Code
get_leaf_by_Member
No description
Code
Methods description
description_scorecode    nextTop
  Arg [1]    : 
Example :
Description:
Returntype :
Exceptions :
Caller :
Methods code
consensus_cigar_linedescriptionprevnextTop
sub consensus_cigar_line {
   my $self = shift;
   my @cigars;

   # First get an 'expanded' cigar string for each leaf of the subtree
my @all_leaves = @{$self->get_all_leaves}; my $num_leaves = scalar(@all_leaves); foreach my $leaf (@all_leaves) { next unless( UNIVERSAL::can( $leaf, 'cigar_line' ) ); my $cigar = $leaf->cigar_line; $cigar =~ s/(\d*)([A-Z])/$2 x ($1||1)/ge; #Expand
push @cigars, $cigar; } # Itterate through each character of the expanded cigars.
# If there is a 'D' at a given location in any cigar,
# set the consensus to 'D', otherwise assume an 'M'.
# TODO: Fix assumption that cigar strings are always the same length,
# and start at the same point.
my $cigar_len = length( $cigars[0] ); my $cons_cigar; for( my $i=0; $i<$cigar_len; $i++ ){ my $char = 'M'; my $num_deletions = 0; foreach my $cigar( @cigars ){ if ( substr($cigar,$i,1) eq 'D'){ $num_deletions++; } } if ($num_deletions * 3 > 2 * $num_leaves) { $char = "D"; } elsif ($num_deletions * 3 > $num_leaves) { $char = "m"; } $cons_cigar .= $char; } # Collapse the consensus cigar, e.g. 'DDDD' = 4D
$cons_cigar =~ s/(\w)(\1*)/($2?length($2)+1:"").$1/ge; # Return the consensus
return $cons_cigar;
}
description_scoredescriptionprevnextTop
sub description_score {
  my $self = shift;
  $self->{'_description_score'} = shift if(@_);
  return $self->{'_description_score'};
}
get_SimpleAligndescriptionprevnextTop
sub get_SimpleAlign {
  my ($self, @args) = @_;

  my $id_type = 'STABLE';
  my $unique_seqs = 0;
  my $cdna = 0;
  my $stop2x = 0;
  my $append_taxon_id = 0;
  my $append_sp_short_name = 0;
  my $append_genomedb_id = 0;
  my $exon_cased = 0;
  if (scalar @args) {
    ($unique_seqs, $cdna, $id_type, $stop2x, $append_taxon_id, $append_sp_short_name, $append_genomedb_id, $exon_cased) =
       rearrange([qw(UNIQ_SEQ CDNA ID_TYPE STOP2X APPEND_TAXON_ID APPEND_SP_SHORT_NAME APPEND_GENOMEDB_ID EXON_CASED)], @args);
  }
  $id_type = 'STABLE' unless(defined($id_type));

  my $sa = Bio::SimpleAlign->new();

  #Hack to try to work with both bioperl 0.7 and 1.2:
#Check to see if the method is called 'addSeq' or 'add_seq'
my $bio07 = 0; $bio07=1 if(!$sa->can('add_seq')); my $seq_id_hash = {}; foreach my $member (@{$self->get_all_leaves}) { next unless($member->isa('Bio::EnsEMBL::Compara::AlignedMember')); next if($unique_seqs and $seq_id_hash->{$member->sequence_id}); $seq_id_hash->{$member->sequence_id} = 1; my $seqstr; if ($cdna) { $seqstr = $member->cdna_alignment_string; $seqstr =~ s/\s+//g; } else { $seqstr = $member->alignment_string($exon_cased); } next if(!$seqstr); my $seqID = $member->stable_id; $seqID = $member->sequence_id if($id_type eq "SEQ"); $seqID = $member->member_id if($id_type eq "MEMBER"); $seqID .= "_" . $member->taxon_id if($append_taxon_id); $seqID .= "_" . $member->genome_db_id if ($append_genomedb_id); ## Append $seqID with Speciae short name, if required
if ($append_sp_short_name) { my $species = $member->genome_db->short_name; $species =~ s/\s/_/g; $seqID .= "_" . $species . "_"; } # $seqID .= "_" . $member->genome_db->taxon_id if($append_taxon_id); # this may be needed if you have subspecies or things like that
$seqstr =~ s/\*/X/g if ($stop2x); my $seq = Bio::LocatableSeq->new(-SEQ => $seqstr, -START => 1, -END => length($seqstr), -ID => $seqID, -STRAND => 0); if($bio07) { $sa->addSeq($seq); } else { $sa->add_seq($seq); } } return $sa; } # Takes a protein tree and creates a consensus cigar line from the
# constituent leaf nodes.
}
get_SitewiseOmega_valuesdescriptionprevnextTop
sub get_SitewiseOmega_values {
  my $self = shift;

  my @values = @{$self->adaptor->db->get_SitewiseOmegaAdaptor->fetch_all_by_ProteinTreeId($self->node_id)};

  return\@ values;
}

1;
}
get_leaf_by_MemberdescriptionprevnextTop
sub get_leaf_by_Member {
  my $self = shift;
  my $member = shift;

  if($member->isa('Bio::EnsEMBL::Compara::ProteinTree')) {
    return $self->find_leaf_by_node_id($member->node_id);
  } elsif ($member->isa('Bio::EnsEMBL::Compara::Member')) {
    return $self->find_leaf_by_name($member->get_longest_peptide_Member->stable_id);
  } else {
    die "Need a Member object!";
  }
}
General documentation
CONTACTTop
  Contact Jessica Severin on implemetation/design detail: jessica@ebi.ac.uk
Contact Abel Ureta-Vidal on EnsEMBL compara project: abel@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _