TranscriptSNPs documentation.

  use Bio::EnsEMBL::Utils::TranscriptSNPs;

  # get and type all snps in the region of the transcript

  %snps = %{
    Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_SNPs( $transcript,
      $flanking ) };

  # get all snps overlapping the transcript in cdna coordinates

  %snps =
    %{ Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_cdna_SNPs(
      $transcript) };

  # get the peptide variations caused by a set of SNPs

  %variations = %{
    Bio::EnsEMBL::Utils::TranscriptSNPs::get_all_peptide_variations(
      $transcript, $snps ) };

sub get_all_SNPs {

  my $transcript = shift;
  my $flanking = shift || 0;
  my $source = shift;

  if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
    throw('Bio::EnsEMBL::Transcript argument required.');
  }

  my $slice = $transcript->slice();

  if(!$slice) {
    warning("Cannot obtain SNPs for transcript without attached Slice.");
    return {};
  }

  my $sa = $slice->adaptor();

  if(!$sa) {
    warning('Cannot obtain SNPs for transcript unless attached slice ' .
            'has attached adaptor');
    return {};
  }

  my %snp_hash;

  # retrieve slice in the region of the transcript
  $slice = $sa->fetch_by_Feature($transcript, $flanking );

  # copy transcript, to work in coord system we are interested in
  $transcript = $transcript->transfer( $slice );

  # get all snps in the transcript region
  my $snps;
  if ($source eq 'glovar') {
    $snps = $slice->get_all_ExternalFeatures('GlovarSNP');
  }
  elsif ($source eq 'variation') {
    $snps = $slice->get_all_VariationFeatures;
  }
  else {
    $snps = $slice->get_all_SNPs;   # dont need once use new snp api (i think)
  }

  my $trans_start  = $flanking + 1;
  my $trans_end    = $slice->length - $flanking;
  my $trans_strand = $transcript->get_all_Exons->[0]->strand;

  # classify each snp
  foreach my $snp (@$snps) {
    my $key;

    if(($trans_strand == 1 && $snp->end < $trans_start) ||
       ($trans_strand == -1 && $snp->start > $trans_end)) {
      # this snp is upstream from the transcript
      $key = 'five prime flanking';
    }

    elsif(($trans_strand == 1 && $snp->start > $trans_end) ||
	  ($trans_strand == -1 && $snp->start < $trans_start)) {
      # this snp is downstream from the transcript
      $key = 'three prime flanking';
    }

    else {
      #snp is inside transcript region check if it overlaps an exon
      foreach my $e (@{$transcript->get_all_Exons}) {
        if($snp->end >= $e->start && $snp->start <= $e->end) {
          # this snp is in an exon
	  
          if(($trans_strand == 1 && 
              $snp->end < $transcript->coding_region_start) ||
             ($trans_strand == -1 && 
              $snp->start > $transcript->coding_region_end)) {
            # this snp is in the 5' UTR
            $key = 'five prime UTR';
          }

          elsif(($trans_strand == 1 && 
                 $snp->start > $transcript->coding_region_end)||
                ($trans_strand == -1 && 
                 $snp->end < $transcript->coding_region_start)) {
            # this snp is in the 3' UTR
            $key = 'three prime UTR';
          }

          else {
            # snp is coding
            $key = 'coding';
          }
          last;
        }
      }
      unless($key) {
        # snp was not in an exon and is therefore intronic
        $key = 'intronic';
      }
    }

    unless($key) {
      #warning('SNP could not be mapped. In/Dels not supported yet...');
      next;
    }

    if(exists $snp_hash{$key}) {
      push @{$snp_hash{$key}}, $snp;
    } 
    else {
      $snp_hash{$key} = [$snp];
    }
  }

  return\% snp_hash;

}

sub get_all_cdna_SNPs {

  my ($transcript, $source) = @_;

  #retrieve all of the snps from this transcript
  my $all_snps = get_all_SNPs($transcript, 0, $source);
  my %snp_hash;

  my @cdna_types = ('three prime UTR', 'five prime UTR','coding');

  my $slice = $transcript->slice();
  my $sa    = $slice->adaptor();

  $slice = $sa->fetch_by_Feature($transcript);

  # copy transcript in order to work in coord system of interest
  $transcript = $transcript->transfer($slice);

  foreach my $type (@cdna_types) {
    $snp_hash{$type} = [];
    foreach my $snp (@{$all_snps->{$type}}) {
      my @coords = $transcript->genomic2cdna($snp->start, $snp->end,
                                             $snp->strand);

      #skip snps that don't map cleanly (possibly an indel...)
      if(scalar(@coords) != 1) {
        #warning("snp of type $type does not map cleanly\n");
        next;
      }

      my ($coord) = @coords;

      unless($coord->isa('Bio::EnsEMBL::Mapper::Coordinate')) {
        #warning("snp of type $type maps to gap\n");
        next;
      }

      my $alleles;
      my $ambicode;

      # get alleles and ambig_code (with fallback to old snp API)
      $alleles = $snp->allele_string || $snp->{'alleles'};
      $ambicode = $snp->ambig_code || $snp->{'_ambiguity_code'};

      #we arbitrarily put the SNP on the +ve strand because it is easier to
      #work with in the webcode
      if($coord->strand == -1) {
        $alleles =~
         tr/acgthvmrdbkynwsACGTDBKYHVMRNWS\//tgcadbkyhvmrnwsTGCAHVMRDBKYNWS\//;
        $ambicode =~
         tr/acgthvmrdbkynwsACGTDBKYHVMRNWS\//tgcadbkyhvmrnwsTGCAHVMRDBKYNWS\//;
      }
      #copy the snp and convert to cdna coords...
      my $new_snp;
      %$new_snp = %$snp;
      bless $new_snp, ref $snp;
      $new_snp->start($coord->start);
      $new_snp->end($coord->end);
      $new_snp->strand(1);
      $new_snp->allele_string($alleles);
      $new_snp->ambig_code($ambicode);
      push @{$snp_hash{$type}}, $new_snp;
    }
  }

  return\% snp_hash;
}




1;

}

sub get_all_peptide_variations {

  my $transcript = shift;
  my $snps = shift;

  if(!ref($transcript) || !$transcript->isa('Bio::EnsEMBL::Transcript')) {
    throw('Bio::EnsEMBL::Transcript argument is required.');
  }

  if(!ref($snps) eq 'ARRAY') {
    throw('Reference to a list of Bio::EnsEMBL::SNP objects is required');
  }

  my $codon_table = Bio::Tools::CodonTable->new;
  my $codon_length = 3;
  my $cdna = $transcript->spliced_seq;

  my $variant_alleles;
  my $translation_start = $transcript->cdna_coding_start;
  foreach my $snp (@$snps) {
    #skip variations not on a single base
    next if ($snp->start != $snp->end);

    my $start = $snp->start;
    my $strand = $snp->strand;

    #calculate offset of the nucleotide from codon start (0|1|2)
    my $codon_pos = ($start - $translation_start) % $codon_length;

    #calculate the peptide coordinate of the snp
    my $peptide = ($start - $translation_start +
		   ($codon_length - $codon_pos)) / $codon_length;

    #retrieve the codon
    my $codon = substr($cdna, $start - $codon_pos-1, $codon_length);

    #store each alternative allele by its location in the peptide
    my @alleles = split(/\/|\|/, lc($snp->allele_string));
    #my @alleles = split(/\/|\|/, lc($snp->alleles));

    foreach my $allele (@alleles) {
      next if $allele eq '-';       #skip deletions
      next if CORE::length($allele) != 1; #skip insertions

      #get_all_cdna_SNPs always gives strand of 1 now
      #if($strand == -1) {
      #  #complement the allele if the snp is on the reverse strand
      #  $allele =~ 
      #  tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
      #}

      #create a data structure of variant alleles sorted by both their
      #peptide position and their position within the peptides codon
      $variant_alleles ||= {};
      if(exists $variant_alleles->{$peptide}) {
        my $alleles_arr = $variant_alleles->{$peptide}->[1];
        push @{$alleles_arr->[$codon_pos]}, $allele;
      } else {
        #create a list of 3 lists (one list for each codon position)
        my $alleles_arr = [[],[],[]];
        push @{$alleles_arr->[$codon_pos]}, $allele;
        $variant_alleles->{$peptide} = [$codon, $alleles_arr];
      }
    }
  }

  my %out;
  #now generate all possible codons for each peptide and translate them
  foreach my $peptide (keys %$variant_alleles) {
    my ($codon, $alleles) = @{$variant_alleles->{$peptide}};

    #need to push original nucleotides onto each position
    #so that all possible combinations can be generated
    push @{$alleles->[0]}, substr($codon,0,1);
    push @{$alleles->[1]}, substr($codon,1,1);
    push @{$alleles->[2]}, substr($codon,2,1);

    my %alt_amino_acids;
    foreach my $a1 (@{$alleles->[0]}) {
      substr($codon, 0, 1) = $a1;
      foreach my $a2 (@{$alleles->[1]}) {
        substr($codon, 1, 1) = $a2;
        foreach my $a3 (@{$alleles->[2]}) {
          substr($codon, 2, 1) = $a3;
          my $aa = $codon_table->translate($codon);
          #print "$codon translation is $aa\n";
          $alt_amino_acids{$aa} = 1;
        }
      }
    }

    my @aas = keys %alt_amino_acids;
    $out{$peptide} =\@ aas;
  }

  return\% out;

}

get_all_SNPs	Description	Code
get_all_cdna_SNPs	Description	Code
get_all_peptide_variations	Description	Code