use Bio::EnsEMBL::Utils::TranscriptSNPs;
# get and type all snps in the region of the transcript
%snps = %{
Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs( $transcript,
$flanking ) };
# get all snps overlapping the transcript in cdna coordinates
%snps =
%{ Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs(
$transcript) };
# get the peptide variations caused by a set of SNPs
%variations = %{
Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations(
$transcript, $snps ) };
Arg [1] : $transcript the transcript to obtain the peptide variations for Arg [2] : $snps listref of coding snps in cdna coordinates Example : $pep_hash = get_all_peptide_variations($transcript, \@snps); Description: Takes a list of coding snps on this transcript in which are in cdna coordinates and returns a hash with peptide coordinate keys and listrefs of alternative amino acids as values. The SNPs must additionally have a strand of 1 for the sake of simplicity. Normally these could be generated using the get_all_cdna_SNPs method.
Note that the peptide encoded by the reference sequence is
also present in the results and that duplicate peptides
(e.g. resulting from synonomous mutations) are discarded.
It is possible to have greated than two peptides variations
at a given location given adjacent or overlapping snps.
Insertion/deletion variations are ignored by this method.
Example of a data structure that could be returned:
{ 1 => ['I', 'M'],
10 => ['I', 'T'],
37 => ['N', 'D'],
56 => ['G', 'E'],
118 => ['R', 'K'],
159 => ['D', 'E'],
167 => ['Q', 'R'],
173 => ['H', 'Q'] }
Returntype : hashref
Exceptions : none
Caller : general |
sub get_all_SNPs
{ my $transcript = shift;
my $flanking = shift || 0;
my $source = shift;
if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
throw('Bio::EnsEMBL::Transcript argument required.');
}
my $slice = $transcript->slice();
if(!$slice) {
warning("Cannot obtain SNPs for transcript without attached Slice.");
return {};
}
my $sa = $slice->adaptor();
if(!$sa) {
warning('Cannot obtain SNPs for transcript unless attached slice ' .
'has attached adaptor');
return {};
}
my %snp_hash;
$slice = $sa->fetch_by_Feature($transcript, $flanking );
$transcript = $transcript->transfer( $slice );
my $snps;
if ($source eq 'glovar') {
$snps = $slice->get_all_ExternalFeatures('GlovarSNP');
}
elsif ($source eq 'variation') {
$snps = $slice->get_all_VariationFeatures;
}
else {
$snps = $slice->get_all_SNPs; }
my $trans_start = $flanking + 1;
my $trans_end = $slice->length - $flanking;
my $trans_strand = $transcript->get_all_Exons->[0]->strand;
foreach my $snp (@$snps) {
my $key;
if(($trans_strand == 1 && $snp->end < $trans_start) ||
($trans_strand == -1 && $snp->start > $trans_end)) {
$key = 'five prime flanking';
}
elsif(($trans_strand == 1 && $snp->start > $trans_end) ||
($trans_strand == -1 && $snp->start < $trans_start)) {
$key = 'three prime flanking';
}
else {
foreach my $e (@{$transcript->get_all_Exons}) {
if($snp->end >= $e->start && $snp->start <= $e->end) {
if(($trans_strand == 1 &&
$snp->end < $transcript->coding_region_start) ||
($trans_strand == -1 &&
$snp->start > $transcript->coding_region_end)) {
$key = 'five prime UTR';
}
elsif(($trans_strand == 1 &&
$snp->start > $transcript->coding_region_end)||
($trans_strand == -1 &&
$snp->end < $transcript->coding_region_start)) {
$key = 'three prime UTR';
}
else {
$key = 'coding';
}
last;
}
}
unless($key) {
$key = 'intronic';
}
}
unless($key) {
next;
}
if(exists $snp_hash{$key}) {
push @{$snp_hash{$key}}, $snp;
}
else {
$snp_hash{$key} = [$snp];
}
}
return\% snp_hash; } |
sub get_all_cdna_SNPs
{ my ($transcript, $source) = @_;
my $all_snps = get_all_SNPs($transcript, 0, $source);
my %snp_hash;
my @cdna_types = ('three prime UTR', 'five prime UTR','coding');
my $slice = $transcript->slice();
my $sa = $slice->adaptor();
$slice = $sa->fetch_by_Feature($transcript);
$transcript = $transcript->transfer($slice);
foreach my $type (@cdna_types) {
$snp_hash{$type} = [];
foreach my $snp (@{$all_snps->{$type}}) {
my @coords = $transcript->genomic2cdna($snp->start, $snp->end,
$snp->strand);
if(scalar(@coords) != 1) {
next;
}
my ($coord) = @coords;
unless($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
next;
}
my $alleles;
my $ambicode;
$alleles = $snp->allele_string || $snp->{'alleles'};
$ambicode = $snp->ambig_code || $snp->{'_ambiguity_code'};
if($coord->strand == -1) {
$alleles =~
tr/acgthvmrdbkynwsACGTDBKYHVMRNWS\//tgcadbkyhvmrnwsTGCAHVMRDBKYNWS\//;
$ambicode =~
tr/acgthvmrdbkynwsACGTDBKYHVMRNWS\//tgcadbkyhvmrnwsTGCAHVMRDBKYNWS\//;
}
my $new_snp;
%$new_snp = %$snp;
bless $new_snp, ref $snp;
$new_snp->start($coord->start);
$new_snp->end($coord->end);
$new_snp->strand(1);
$new_snp->allele_string($alleles);
$new_snp->ambig_code($ambicode);
push @{$snp_hash{$type}}, $new_snp;
}
}
return\% snp_hash;
}
1; } |
sub get_all_peptide_variations
{ my $transcript = shift;
my $snps = shift;
if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
throw('Bio::EnsEMBL::Transcript argument is required.');
}
if(!ref($snps) eq 'ARRAY') {
throw('Reference to a list of Bio::EnsEMBL::SNP objects is required');
}
my $codon_table = Bio::Tools::CodonTable->new;
my $codon_length = 3;
my $cdna = $transcript->spliced_seq;
my $variant_alleles;
my $translation_start = $transcript->cdna_coding_start;
foreach my $snp (@$snps) {
next if ($snp->start != $snp->end);
my $start = $snp->start;
my $strand = $snp->strand;
my $codon_pos = ($start - $translation_start) % $codon_length;
my $peptide = ($start - $translation_start +
($codon_length - $codon_pos)) / $codon_length;
my $codon = substr($cdna, $start - $codon_pos-1, $codon_length);
my @alleles = split(/\/|\|/, lc($snp->allele_string));
foreach my $allele (@alleles) {
next if $allele eq '-'; next if CORE::length($allele) != 1;
$variant_alleles ||= {};
if(exists $variant_alleles->{$peptide}) {
my $alleles_arr = $variant_alleles->{$peptide}->[1];
push @{$alleles_arr->[$codon_pos]}, $allele;
} else {
my $alleles_arr = [[],[],[]];
push @{$alleles_arr->[$codon_pos]}, $allele;
$variant_alleles->{$peptide} = [$codon, $alleles_arr];
}
}
}
my %out;
foreach my $peptide (keys %$variant_alleles) {
my ($codon, $alleles) = @{$variant_alleles->{$peptide}};
push @{$alleles->[0]}, substr($codon,0,1);
push @{$alleles->[1]}, substr($codon,1,1);
push @{$alleles->[2]}, substr($codon,2,1);
my %alt_amino_acids;
foreach my $a1 (@{$alleles->[0]}) {
substr($codon, 0, 1) = $a1;
foreach my $a2 (@{$alleles->[1]}) {
substr($codon, 1, 1) = $a2;
foreach my $a3 (@{$alleles->[2]}) {
substr($codon, 2, 1) = $a3;
my $aa = $codon_table->translate($codon);
$alt_amino_acids{$aa} = 1;
}
}
}
my @aas = keys %alt_amino_acids;
$out{$peptide} =\@ aas;
}
return\% out; } |