Bio::EnsEMBL::Utils
TranscriptAlleles
Toolbar
Summary
TranscriptAlleles - A utility class used to obtain information about the
relationships between a transcript and Alleles
Package variables
Globals (from "use vars" definitions)
@EXPORT_OK = qw(&get_all_ConsequenceType &type_variation)
Included modules
Inherit
Exporter
Synopsis
use Bio::EnsEMBL::Utils::TranscriptAlleles;
# get the peptide variations caused by a set of Alleles
%variations = %{
Bio::EnsEMBL::Utils::TranscriptAlleles::get_all_peptide_variations(
$transcript, $alleles ) };
Description
This is a utility class which can be used to find consequence type of an
AlleleFeature in a transcript, and to determine the amino acid changes
caused by the AlleleFeature in the Transcript
Methods
apply_aa_change | No description | Code |
calculate_same_codon | No description | Code |
empty_codon | No description | Code |
get_all_ConsequenceType | Description | Code |
type_variation | No description | Code |
Methods description
Arg [1] : $transcript the transcript to obtain the peptide variations for Arg [2] : $alleles listref of AlleleFeatures Example : $consequence_types = get_all_ConsequenceType($transcript, \@alleles); foreach my $ct (@{$consequence_types}){ print "Allele : ", $ct->allele_string, " has a consequence type of :",$ct->type; print " and is affecting the transcript with ",@{$ct->aa_alleles}, "in position ", $ct->aa_start,"-", $ct->aa_end if (defined $ct->aa_alleles); print "\n"; } Description: Takes a list of AlleleFeatures and a Transcritpt, and return a list of ConsequenceType of the alleles in the given Transcript Returntype : listref of Bio::EnsEMBL::Variation::ConsequenceType Exceptions : none Caller : general |
Methods code
apply_aa_change | description | prev | next | Top |
sub apply_aa_change
{ my $tr = shift;
my $var = shift;
my $mrna = $tr->translateable_seq();
my $mrna_seqobj = Bio::Seq->new( -seq => $mrna,
-moltype => "dna",
-alphabet => "dna");
my ($attrib) = @{$tr->slice()->get_all_Attributes('codon_table')};
my $codon_table;
$codon_table = $attrib->value() if($attrib);
$codon_table ||= 1;
my $peptide = $mrna_seqobj->translate(undef,undef,undef,$codon_table)->seq;
my $len = $var->aa_end - $var->aa_start + 1;
my $old_aa = substr($peptide, $var->aa_start -1 , $len);
my $codon_cds_start = $var->aa_start * 3 - 2;
my $codon_cds_end = $var->aa_end * 3;
my $codon_len = $codon_cds_end - $codon_cds_start + 1;
my @alleles = @{$var->alleles};
my $var_len = $var->cds_end - $var->cds_start + 1;
my @aa_alleles = ($old_aa);
foreach my $a (@alleles) {
$a =~ s/\-//;
my $cds = $tr->translateable_seq();
if($var_len != length($a)) {
if(abs(length($a) - $var_len) % 3) {
$var->type('FRAMESHIFT_CODING');
return [$var];
}
if($codon_len == 0) { $aa_alleles[0] = '-';
$old_aa = '-';
}
}
my $new_aa;
substr($cds, $var->cds_start-1, $var_len) = $a;
my $codon_str = substr($cds, $codon_cds_start-1, $codon_len + length($a)-$var_len);
$var->codon($codon_str); my $codon_seq = Bio::Seq->new(-seq => $codon_str,
-moltype => 'dna',
-alphabet => 'dna');
$new_aa = $codon_seq->translate(undef,undef,undef,$codon_table)->seq();
if(length($new_aa)<1){
$new_aa='-';
}
if(uc($new_aa) ne uc($old_aa)) {
push @aa_alleles, $new_aa;
if ($new_aa =~ /\*/) {
$var->type('STOP_GAINED');
}
elsif ($old_aa =~ /\*/) {
$var->type('STOP_LOST');
}
}
}
if(@aa_alleles > 1) {
if (!$var->type or (join ' ',@{$var->type}) !~ /STOP/) {
$var->type('NON_SYNONYMOUS_CODING');
}
}
else {
$var->type('SYNONYMOUS_CODING');
}
$var->aa_alleles(\@aa_alleles);
}
1; } |
sub calculate_same_codon
{ my $same_codon = shift;
my $new_codon;
my $old_aa;
my $codon_table = Bio::Tools::CodonTable->new;
if (@{$same_codon} == 3){
map {$new_codon .= @{$_->[0]->alleles};$old_aa = $_->[0]->aa_alleles()->[0]} @{$same_codon};
}
else{
my $first_pos = ($same_codon->[0]->[0]->cdna_start -1) % 3; my $second_pos = ($same_codon->[1]->[0]->cdna_start -1)% 3; if ($first_pos == 0){
$new_codon = $same_codon->[0]->[0]->alleles->[0]; if ($second_pos == 1){
$new_codon .= $same_codon->[1]->[0]->alleles->[0]; $new_codon .= substr($same_codon->[1]->[0]->codon,2,1); }
else{
$new_codon .= substr($same_codon->[1]->[0]->codon,1,1); $new_codon .= $same_codon->[1]->[0]->alleles->[0]; }
}
else{
$new_codon = substr($same_codon->[1]->[0]->codon,0,1); $new_codon .= $same_codon->[0]->[0]->alleles->[0]; $new_codon .= $same_codon->[1]->[0]->alleles->[0]; }
$old_aa = $same_codon->[0]->[0]->aa_alleles->[0];
}
my $new_aa = $codon_table->translate($new_codon);
foreach my $codon (@{$same_codon}){
map {$_->aa_alleles([$old_aa,$new_aa])} @{$codon};
}
}
} |
sub empty_codon
{ my $out = shift;
my $same_codon = shift;
if (@{$same_codon} == 1){
map {push @{$out}, @{$_}} @{$same_codon};
}
elsif (@{$same_codon} > 1){
calculate_same_codon($same_codon);
map {push @{$out}, @{$_}} @{$same_codon};
}
@{$same_codon} = ();
}
} |
sub get_all_ConsequenceType
{ my $transcript = shift;
my $alleles = shift;
if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
throw('Bio::EnsEMBL::Transcript argument is required.');
}
if(!ref($alleles) || (ref($alleles) ne 'ARRAY')) {
throw('Reference to a list of Bio::EnsEMBL::Variation::AlleleFeature objects is required');
}
my @alleles_ordered = sort { $a->start <=> $b->start} @$alleles; my @same_codon; my @out; foreach my $allele (@alleles_ordered) {
my $allele_string;
if ($allele->allele_string =~ /[\|\\\/]/){ my @alleles = split /[\|\\\/]/,$allele->allele_string;
if ($alleles[0] ne $allele->ref_allele_string){
$allele_string = $alleles[0];
}
else{
$allele_string = $alleles[1];
}
}
else{
$allele_string = $allele->allele_string;
}
my $opposite_strand = 0; my $transcript_allele = $allele_string;
if( $transcript->strand != $allele->strand ) {
$transcript_allele =~tr/ACGT/TGCA/; $opposite_strand = 1;
}
my $consequence_type = Bio::EnsEMBL::Variation::ConsequenceType->new($transcript->dbID(),'',$allele->start, $allele->end, $transcript->strand, [$transcript_allele]);
if ($allele->ref_allele_string eq $allele_string) { empty_codon(\@out,\@same_codon);
$consequence_type->type('SARA');
push @out, $consequence_type;
next;
}
my $ref_consequences = type_variation($transcript,"",$consequence_type);
if ($allele->start != $allele->end){
empty_codon(\@out,\@same_codon);
push @out, @{$ref_consequences};
next;
}
my $new_consequence = shift @{$ref_consequences};
if (! defined $new_consequence ) {
empty_codon(\@out,\@same_codon);
push @out, $consequence_type; next;
}
if ( !defined $new_consequence->aa_start){
empty_codon(\@out,\@same_codon);
push @out, $new_consequence;
next;
}
if (!defined $same_codon[0]){
push @{$same_codon[0]}, $new_consequence; next;
}
if ($same_codon[-1]->[0]->aa_start == $new_consequence->aa_start){
if ($same_codon[-1]->[0]->start == $new_consequence->start){
push @{$same_codon[-1]},$new_consequence; }
else{
push @{$same_codon[$#same_codon + 1]},$new_consequence; }
}
else{
if (@same_codon > 1){
calculate_same_codon(\@same_codon);
}
map {push @out, @{$_}} @same_codon;
@same_codon = ();
push @{$same_codon[0]}, $new_consequence; }
}
empty_codon(\@out,\@same_codon);
return\@ out; } |
type_variation | description | prev | next | Top |
sub type_variation
{ my $tr = shift;
my $g = shift;
my $var = shift;
my $UPSTREAM = 5000;
my $DOWNSTREAM = 5000;
$var->empty_type if defined $var->type;
if (!$var->isa('Bio::EnsEMBL::Variation::ConsequenceType')){
throw("Not possible to calculate the consequence type for ",ref($var)," : Bio::EnsEMBL::Variation::ConsequenceType object expected");
}
if (($var->start < $tr->start - $UPSTREAM) || ($var->end > $tr->end + $DOWNSTREAM)){
return [];
}
if (!$tr->translation()) {
if($var->end < $tr->start()) {
$var->type( ($tr->strand() == 1) ? 'UPSTREAM' : 'DOWNSTREAM' );
return [$var];
}
if($var->start > $tr->end()) {
$var->type( ($tr->strand() == 1) ? 'DOWNSTREAM' : 'UPSTREAM' );
return [$var];
}
if ($var->start >= $tr->start() and $var->end <= $tr->end()) { if ($tr->biotype() eq "miRNA") {
my ($attribute) = @{$tr->get_all_Attributes('miRNA')};
if ( $attribute->value =~ /(\d+)-(\d+)/ ) {
my @mapper_objs = $tr->cdna2genomic($1, $2, $tr->strand); foreach my $obj ( @mapper_objs ){ if( $obj->isa("Bio::EnsEMBL::Mapper::Coordinate")){
if ($var->start >= $obj->start() and $var->end <= $obj->end()) {
$var->type("WITHIN_MATURE_miRNA");
return [$var];
}
}
}
}
}
$var->type("WITHIN_NON_CODING_GENE");
return [$var];
}
}
my $tm = $tr->get_TranscriptMapper();
my @coords = $tm->genomic2cdna($var->start,
$var->end,
$var->strand);
if(@coords > 1) {
my @out;
$var->type('COMPLEX_INDEL');
return [$var];
}
my @coords_splice_2 = $tm->genomic2cdna($var->start -2, $var->end +2, $var->strand);
my @coords_splice_3 = $tm->genomic2cdna($var->start -3, $var->end +3, $var->strand);
my @coords_splice_8 = $tm->genomic2cdna($var->start -8, $var->end +8, $var->strand);
my ($splice_site_2, $splice_site_3, $splice_site_8);
if (scalar @coords_splice_2 >1) {
$splice_site_2=1;
}
elsif (scalar @coords_splice_3 >1) {
$splice_site_3=1;
}
elsif (scalar @coords_splice_8 >1) {
$splice_site_8=1;
}
my $c = $coords[0];
if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
if($var->end < $tr->start()) {
$var->type( ($tr->strand() == 1) ? 'UPSTREAM' : 'DOWNSTREAM' );
return [$var];
}
if($var->start > $tr->end()) {
$var->type( ($tr->strand() == 1) ? 'DOWNSTREAM' : 'UPSTREAM' );
return [$var];
}
foreach my $intron (@{$tr->get_all_Introns()}) {
if ($intron->length <=5) { if ($var->start>=$intron->start and $var->end<=$intron->end) {
$var->type('SYNONYMOUS_CODING');
return [$var];
}
}
}
$var->type('INTRONIC');
if ($splice_site_2) {
$var->type('ESSENTIAL_SPLICE_SITE');
}
elsif ($splice_site_3 or $splice_site_8) {
$var->type('SPLICE_SITE');
}
return [$var];
}
if ($splice_site_2 or $splice_site_3) {
$var->type('SPLICE_SITE');
}
$var->cdna_start( $c->start() );
$var->cdna_end( $c->end() );
@coords = $tm->genomic2cds($var->start, $var->end,$var->strand);
if(@coords > 1) {
$var->type('COMPLEX_INDEL');
return [$var];
}
$c = $coords[0];
if($c->isa('Bio::EnsEMBL::Mapper::Gap')) {
if($var->end < $tr->coding_region_start()) {
$var->type( ($tr->strand() == 1) ? '5PRIME_UTR' : '3PRIME_UTR' );
}
elsif($var->start > $tr->coding_region_end()) {
$var->type( ($tr->strand() == 1) ? '3PRIME_UTR' : '5PRIME_UTR');
}
else {
throw('Unexpected: CDNA variation which is not in CDS is not in UTR');
}
return [$var];
}
$var->cds_start( $c->start() );
$var->cds_end( $c->end() );
@coords = $tm->genomic2pep($var->start, $var->end, $var->strand);
if(@coords != 1 || $coords[0]->isa('Bio::EnsEMBL::Mapper::Gap')) {
throw("Unexpected: Could map to CDS but not to peptide coordinates.");
}
$c = $coords[0];
$var->aa_start( $c->start());
$var->aa_end( $c->end());
apply_aa_change($tr, $var);
return [$var];
}
} |
General documentation
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html