Bio::EnsEMBL::Analysis::Tools::Algorithms
ExonCluster
Toolbar
Package variables
No package variables defined.
Included modules
Synopsis
No synopsis!
Description
No description!
Methods
_add_exon_biotype | No description | Code |
_add_new_exon | No description | Code |
_add_transcript_reference | No description | Code |
_by_stranded_start | No description | Code |
_determine_type | No description | Code |
_sort_by_forward_start | No description | Code |
add_exon | No description | Code |
add_exon_if_not_present | No description | Code |
check_if_ExonCluster_has_est_evidence | No description | Code |
contains_exon | No description | Code |
contains_exon_with_dbid_and_dbname | No description | Code |
contains_transcript | No description | Code |
each_transcripts_exons | No description | Code |
end | No description | Code |
get_all_Exons | No description | Code |
get_all_Exons_in_ExonCluster | No description | Code |
get_all_Exons_of_EvidenceSet | No description | Code |
get_biotype_of_Exon | No description | Code |
get_prediction_transcripts_which_have_exon_in_ExonCluster | No description | Code |
get_transcripts_having_this_Exon_in_ExonCluster | No description | Code |
length | No description | Code |
merge | No description | Code |
merge_new_exon | No description | Code |
new | No description | Code |
print_clust_summary | No description | Code |
start | No description | Code |
strand | No description | Code |
transcript_from_ref | No description | Code |
transcripts_containing_exon | No description | Code |
type | No description | Code |
unique_exon_combinations | No description | Code |
Methods description
None available.
Methods code
_add_exon_biotype | description | prev | next | Top |
sub _add_exon_biotype
{ my ( $self, $exon, $transcript ) = @_ ;
unless (exists $self->{_exon_2_biotype}{$exon}) {
$self->{_exon_2_biotype}{$exon}=$transcript->biotype ;
} else {
my $old_bt = $self->{_exon_2_biotype}{$exon};
my $new_bt = $transcript->biotype ;
if ($old_bt eq $new_bt) {
$self->{_exon_2_biotype}{$exon}=$transcript->biotype ;
} else {
$self->{_exon_2_biotype}{$exon}=$transcript->biotype ;
warning("Changing biotype of exon : $old_bt changed to $new_bt\n" ) ; }
}
}
} |
sub _add_new_exon
{ my ($self,$exon, $ignore_strand) = @_;
if (!defined($self->start) || $exon->start < $self->start) {
$self->start($exon->start);
}
if (!defined($self->end) || $exon->end > $self->end) {
$self->end($exon->end);
}
if (!$ignore_strand) {
if (!defined($self->strand)) {
$self->strand($exon->strand);
} elsif ($self->strand != $exon->strand) {
carp("Trying to add exon with strand ". $exon->strand . " to cluster with strand " . $self->strand);
}
}
$self->{_exonhash}{"$exon"} = $self->{_internal_index}++;
if (exists $self->{_exonidhash}{$exon->dbID.$exon->adaptor->db->dbc->dbname}) {
$self->throw("Error : there seem to be exons with the same dbID and dbname in the databases ".$exon->dbID." ".$exon->adaptor->db->dbname) ;
}
$self->{_exonidhash}{$exon->dbID.$exon->adaptor->db->dbc->dbname} = $exon;
$self->{_all_exons_in_cluster}{$exon}=$exon ; } |
sub _add_transcript_reference
{ my ($self,$exon,$transcript) = @_;
if (!$self->contains_transcript($transcript)) {
$self->{_transcripthash}{"$transcript"} = [];
}
push @{$self->{_transcripthash}{"$transcript"}}, $exon;
$self->{_transcriptrefhash}{"$transcript"} = $transcript; } |
sub _by_stranded_start
{ my $alow;
my $blow;
my $ahigh;
my $bhigh;
if ($a->strand != $b->strand) {
confess("Mixed strand in sort comparison routine - Bad!");
}
if ($a->strand == 1) {
$alow = $a->start;
$ahigh = $a->end;
} else {
$blow = $a->start;
$bhigh = $a->end;
}
if ($b->strand == 1) {
$blow = $b->start;
$bhigh = $b->end;
} else {
$alow = $b->start;
$ahigh = $b->end;
}
if ($a->strand == 1) {
if ($alow != $blow) {
return $alow <=> $blow;
} else {
return $ahigh <=> $bhigh;
}
} else {
if ($ahigh != $bhigh) {
return $ahigh <=> $bhigh;
} else {
return $alow <=> $blow;
}
} } |
sub _determine_type
{ my ($self, $ignore_strand) = @_;
my @combs = $self->unique_exon_combinations($ignore_strand);
if (scalar(@combs) == 1) {
$self->{'_type'} = 0;
return 0;
}
my $maxexoncount = 0;
foreach my $comb (@combs) {
if (scalar(@{$comb->{exons}} > $maxexoncount)) {
$maxexoncount = scalar(@{$comb->{exons}});
}
}
if ($maxexoncount == 1) {
my $failed = 0;
my $first_start;
my $first_end;
my $first_phase;
my $subtype;
foreach my $comb (@combs) {
if (!defined($first_start)) {
$first_start = $comb->{exons}[0]->start;
$first_end = $comb->{exons}[0]->end;
$first_phase = $comb->{exons}[0]->phase;
}
if ($comb->{exons}[0]->start != $first_start ||
$comb->{exons}[0]->end != $first_end) {
$failed = 1;
last;
} else {
if ($comb->{exons}[0]->phase != $first_phase) {
$subtype .= "p";
} else {
my $countterm = 0;
foreach my $trans (@{$comb->{transcripts}}) {
if ($trans->translation->start_Exon == $comb->{exons}[0] ||
$trans->translation->end_Exon == $comb->{exons}[0]) {
$countterm++;
}
}
if ($countterm == scalar(@{$comb->{transcripts}})) {
$subtype .= "t";
} elsif ($countterm) {
print "WARNING: Exon which isn't always the terminal exon in a translation\n";
}
}
}
}
if (!$failed) {
$self->{'_type'} = 1;
return 1;
}
}
if ($maxexoncount == 1) {
my @allsingle;
my @notallsingle;
foreach my $comb (@combs) {
my $nsingle = 0;
foreach my $trans (@{$comb->{transcripts}}) {
if (scalar(@{$trans->get_all_Exons}) == 1) {
$nsingle++;
}
}
if ($nsingle == scalar(@{$comb->{transcripts}})) {
push @allsingle,$comb;
} else {
push @notallsingle,$comb;
}
}
my $failed = 0;
if (@allsingle) {
my $notsingle_start = undef;
my $notsingle_end = undef;
SINGLE:
foreach my $comb (@allsingle) {
foreach my $comp_comb (@notallsingle) {
if ($comb->{exons}[0]->start < $comp_comb->{exons}[0]->start ||
$comb->{exons}[0]->end > $comp_comb->{exons}[0]->end) {
$failed = 1;
last SINGLE;
} elsif (!defined($notsingle_start)) {
$notsingle_start = $comp_comb->{exons}[0]->start;
$notsingle_end = $comp_comb->{exons}[0]->end;
} elsif ($notsingle_start != $comp_comb->{exons}[0]->start ||
$notsingle_end != $comp_comb->{exons}[0]->end) {
$failed = 1;
last SINGLE;
}
}
}
} else {
$failed = 1;
}
if (!$failed) {
$self->{'_type'} = 2;
return 2;
}
}
if ($maxexoncount == 1) {
if ($ignore_strand) {
throw("This method is not designed to handle cases where we ignore strand");
}
my @allterminal;
my @notallterminal;
my $failed = 0;
my $first_end = undef;
COMB:
foreach my $comb (@combs) {
my $nterminal = 0;
my $end = undef;
foreach my $trans (@{$comb->{transcripts}}) {
if (scalar(@{$trans->get_all_Exons}) == 1) {
$nterminal++;
print "Single exon transcript\n";
} elsif ($comb->{exons}[0]->strand == 1) {
if ($comb->{exons}[0] == $trans->start_Exon &&
(!defined($end) || ($end == 0))) {
$end = 0;
if (!defined($first_end)) {
$first_end = $end;
}
$nterminal++;
} elsif ($comb->{exons}[0] == $trans->end_Exon &&
(!defined($end) || $end == 1)) {
$end = 1;
if (!defined($first_end)) {
$first_end = $end;
}
$nterminal++;
} else {
print "Failed forward strand condition\n";
}
} else {
if ($comb->{exons}[0] == $trans->start_Exon &&
(!defined($end) || ($end == 1))) {
$end = 1;
if (!defined($first_end)) {
$first_end = $end;
}
$nterminal++;
} elsif ($comb->{exons}[0] == $trans->end_Exon &&
(!defined($end) || $end == 0)) {
$end = 0;
if (!defined($first_end)) {
$first_end = $end;
}
$nterminal++;
} else {
print "Failed reverse strand condition\n";
}
}
if (defined($end) && $end != $first_end) {
$failed = 1;
last COMB;
}
}
if ($nterminal == scalar(@{$comb->{transcripts}})) {
push @allterminal,$comb;
} else {
push @notallterminal,$comb;
}
}
if (!defined($first_end)) {
$failed = 1;
}
if (!$failed) {
if (scalar(@allterminal)) {
my $conspos;
if ($first_end == 0) {
$conspos = $allterminal[0]->{exons}[0]->end;
} else {
$conspos = $allterminal[0]->{exons}[0]->start;
}
my $nonconspos = undef;
foreach my $comb (@allterminal) {
if ($first_end == 0) {
if (!defined($nonconspos) || $comb->{exons}[0]->start < $nonconspos) {
$nonconspos = $comb->{exons}[0]->start;
}
} else {
if (!defined($nonconspos) || $comb->{exons}[0]->end > $nonconspos) {
$nonconspos = $comb->{exons}[0]->end;
}
}
}
TERMINAL:
foreach my $comb (@combs) {
if ($first_end == 0 && $comb->{exons}[0]->end != $conspos ) {
$failed = 1;
last TERMINAL;
} elsif ($first_end == 1 && $comb->{exons}[0]->start != $conspos) {
$failed = 1;
last TERMINAL;
}
}
} else {
$failed = 1;
}
}
if (!$failed) {
$self->{'_type'} = 3;
return 3;
}
}
if ($maxexoncount == 1) {
$self->{'_type'} = 4;
return 4;
} else {
foreach my $comb (@combs) {
if (scalar(@{$comb->{exons}}) > 1) {
my $prev_exon = undef;
my @exons = sort {$a->start <=> $b->start} @{$comb->{exons}};
foreach my $exon (@exons) {
if (defined($prev_exon)) {
if ($exon->start - $prev_exon->end > 8) {
$self->{'_type'} = 6;
return 6;
}
}
$prev_exon = $exon;
}
}
}
$self->{'_type'} = 5;
return 5;
} } |
sub _sort_by_forward_start
{ my $alow;
my $blow;
my $ahigh;
my $bhigh;
$alow = $a->start;
$ahigh = $a->end;
$blow = $b->start;
$bhigh = $b->end;
if ($alow != $blow) {
return $alow <=> $blow;
} else {
return $ahigh <=> $bhigh;
} } |
sub add_exon
{ my ($self,$exon,$transcript, $ignore_strand) = @_;
if (!$self->contains_exon($exon)) {
$self->_add_new_exon($exon, $ignore_strand);
}
$self->_add_transcript_reference($exon,$transcript);
$self->_add_exon_biotype($exon,$transcript) ;
if ($self->{_internal_index} != 1) {
}
$self->{'_type'} = undef;
}
} |
sub add_exon_if_not_present
{ my ($self,$exon,$transcript, $ignore_strand) = @_;
if (!$self->contains_exon_with_dbid_and_dbname($exon)) {
$self->_add_new_exon($exon,$ignore_strand);
} else {
}
$self->_add_transcript_reference($exon,$transcript);
$self->_add_exon_biotype($exon,$transcript) ;
if ($self->{_internal_index} != 1) {
}
$self->{'_type'} = undef; } |
check_if_ExonCluster_has_est_evidence | description | prev | next | Top |
sub check_if_ExonCluster_has_est_evidence
{ my ( $self,$ev_sets ) = @_ ;
my @est_bioytpes = @{ $$ev_sets{'est'} };
my %est_bt ;
@est_bt{@est_bioytpes} = () ;
my %transhash = $self->each_transcripts_exons;
my $cluster_has_real_evidence = 0 ;
my @rank_of_trans ;
TRANS: for my $trans_refname (keys %transhash) {
my $trans = $self->transcript_from_ref( $trans_refname );
my $trans_biotype = $trans->biotype() ;
if (exists $est_bt{$trans->biotype}) {
return 1 ;
}
}
return 0 ;
}
} |
sub contains_exon
{ my ($self,$exon) = @_;
return (exists $self->{_exonhash}{"$exon"});
}
} |
contains_exon_with_dbid_and_dbname | description | prev | next | Top |
sub contains_exon_with_dbid_and_dbname
{ my ($self,$exon) = @_;
my $exon_duplicate = 0;
foreach my $ex (keys %{$self->{_exonidhash}}) {
if ($ex eq "".$exon->dbID.$exon->adaptor->db->dbc->dbname) {
$exon_duplicate = 1;
}
}
return $exon_duplicate; } |
sub contains_transcript
{ my ($self,$transcript) = @_;
return (exists $self->{_transcripthash}{"$transcript"});
}
} |
sub each_transcripts_exons
{ my $self = shift;
return %{$self->{_transcripthash}}; } |
sub end
{ my ($self,$end) = @_;
if ($end){
$self->throw( "$end is not an integer") unless $end =~/^[-+]?\d+$/;
$self->{'_end'} = $end;
}
return $self->{'_end'}; } |
sub get_all_Exons
{ my $self =shift;
return values %{$self->{_exonidhash}}; } |
sub get_all_Exons_in_ExonCluster
{ my ($self ) = @_ ;
my @all_exons_in_cluster = values %{$self->{_all_exons_in_cluster}} ;
return\@ all_exons_in_cluster ; } |
sub get_all_Exons_of_EvidenceSet
{ my ($self, $setname) = @_ ;
my @result ;
for my $e (@{ $self->get_all_Exons_in_ExonCluster }) {
if ($e->ev_set =~m/$setname/){
push @result, $e ;
}
}
@result = sort { $b->seq_region_length <=> $a->seq_region_length} @result ;
return\@ result ; } |
sub get_biotype_of_Exon
{ my ($self,$exon) = @_ ;
return $self->{_exon_2_biotype}{$exon};
}
1; } |
get_prediction_transcripts_which_have_exon_in_ExonCluster | description | prev | next | Top |
sub get_prediction_transcripts_which_have_exon_in_ExonCluster
{ my ($self,$exon) = @_;
my @transcripts;
my %transhash = $self->each_transcripts_exons;
TRANS:foreach my $trans (keys %transhash) {
if ( $trans=~m/PredictionTranscript/) {
foreach my $ex_to_test (@{$transhash{$trans}}) {
if($ex_to_test->start >= $self->start && $ex_to_test->end <= $self->end){
push @transcripts, $self->transcript_from_ref($trans);
next TRANS;
}
}
}
}
return @transcripts; } |
get_transcripts_having_this_Exon_in_ExonCluster | description | prev | next | Top |
sub get_transcripts_having_this_Exon_in_ExonCluster
{ my ($self,$exon) = @_;
my @transcripts;
my %transhash = $self->each_transcripts_exons;
TRANS:foreach my $trans (keys %transhash) {
unless ( $trans =~m/PredictionTranscript/ ) {
foreach my $ex_to_test (@{$transhash{$trans}}) {
if($ex_to_test->stable_id eq $exon->stable_id){
push @transcripts, $self->transcript_from_ref($trans);
next TRANS;
}
}
}
}
return @transcripts; } |
sub length
{ my $self = shift @_;
if (@_){
$self->confess( ref($self)."->length() is read-only");
}
return ( $self->{'_end'} - $self->{'_start'} + 1 ); } |
sub merge
{ my ($self,$cluster, $ignore_strand) = @_;
my %transhash = $cluster->each_transcripts_exons;
foreach my $transref (keys %transhash) {
foreach my $exon (@{$transhash{$transref}}) {
$self->add_exon($exon,$cluster->transcript_from_ref($transref), $ignore_strand);
}
}
$self->{'_type'} = undef; } |
sub merge_new_exon
{ my ($self,$cluster, $ignore_strand) = @_;
my %transhash = $cluster->each_transcripts_exons;
foreach my $transref (keys %transhash) {
foreach my $exon (@{$transhash{$transref}}) {
$self->add_exon_if_not_present($exon,$cluster->transcript_from_ref($transref), $ignore_strand);
}
}
$self->{'_type'} = undef; } |
sub new
{ my ($class,@args) = @_;
my $self = bless {},ref($class) || $class;
$self->{_start} = undef;
$self->{_end} = undef;
$self->{_exonhash} = undef;
$self->{_exonidhash} = undef;
$self->{_transcripthash} = undef;
$self->{_transcriptidhash} = undef;
$self->{_internal_index} = 0;
$self->{_exon_2_biotype} = undef ; $self->{_all_exons_in_cluster} = {} ;
if (@args) {
$self->throw("Constructor does not expect any parameters");
}
return $self; } |
sub print_clust_summary
{ my (@combs) = @_;
foreach my $comb (@combs) {
print ": ";
foreach my $exon (@{$comb->{exons}}) {
print $exon->dbID. " ";
}
print ": ";
}
print "\n"; } |
sub start
{ my ($self,$start) = @_;
if ($start){
$self->throw( "$start is not an integer") unless $start =~/^[-+]?\d+$/;
$self->{'_start'} = $start;
}
return $self->{'_start'}; } |
sub strand
{ my ($self,$strand) = @_;
if ($strand){
$self->{'_strand'} = $strand;
}
return $self->{'_strand'}; } |
sub transcript_from_ref
{ my ($self,$ref) = @_;
return $self->{_transcriptrefhash}{$ref} } |
sub transcripts_containing_exon
{ my ($self,$exon) = @_;
my @transcripts;
my %transhash = $self->each_transcripts_exons;
TRANS:
foreach my $trans (keys %transhash) {
foreach my $e (@{$transhash{$trans}}) {
if ($e == $exon) {
push @transcripts, $self->transcript_from_ref($trans);
next TRANS;
}
}
}
return @transcripts; } |
sub type
{ my ($self,$ignore_strand, @args) = @_;
if (@args){
$self->throw("type is a get only method");
}
if (!defined($self->{'_type'})) {
$self->_determine_type($ignore_strand);
}
return $self->{'_type'}; } |
unique_exon_combinations | description | prev | next | Top |
sub unique_exon_combinations
{ my $self = shift;
my $ignore_strand = shift;
my %unique_combs;
my %transhash = $self->each_transcripts_exons;
foreach my $trans (keys %transhash) {
my $keystr;
if (!$ignore_strand) {
@{$transhash{$trans}} = sort _by_stranded_start @{$transhash{$trans}};
} else {
@{$transhash{$trans}} = sort _sort_by_forward_start @{$transhash{$trans}};
}
foreach my $exon (@{$transhash{$trans}}) {
$keystr .= ":" . $self->{_exonhash}{$exon};
}
if (! exists($unique_combs{$keystr})) {
$unique_combs{$keystr}{exons} = [@{$transhash{$trans}}];
}
push @{$unique_combs{$keystr}{transcripts}}, $self->transcript_from_ref($trans);
}
return values %unique_combs; } |
General documentation
Name transcript_from_ref
Arg : String "Bio::EnsEMBL::Transcript(HASHXXXXXX)" used as key in _transcriptrefhash
Function : Returns for a given STRING the reference to a Bio::EnsEMBL::Transcript-Object
Returnval: Bio::EnsEMBL::Transcript
Name : get_all_Exons_in_ExonCluster ()
Arg[0] : Bio::EnsEMBL::Analysis::Runnable::Condense_EST::ExonCluster
Function : returns an Array of all Exons (all types) which belong to an ExonCluster
if Arg[1] is supplied the list of returned Bio::EnsEMBL::Exon Objects will
be filtered and only exons >= the supplied rank will be returned
Returnval : Arrayref to Array of Bio::EnsEMBL::Exon Objects
Name : get_all_Exons_of_EvidenceSet
Arg : String describing Name of evidence_set
Function : returns an Array of all Exons (all types) which belong to an ExonCluster
and belong to the specified Evidence_set.
be filtered and only exons >= the supplied rank will be returned
Returnval : Arrayref to Array of Bio::EnsEMBL::Exon Objects