Bio::EnsEMBL::ExternalData
GeneSNP
Toolbar
Summary
Bio::EnsEMBL::ExternalData::GeneSNP - class to expand genomic SNP
into full gene variation description
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
$genesnp = new Bio::EnsEMBL::ExternalData::GeneSNP
(-gene => $gene,
-contig => $contig
);
# $snp is a Bio::EnsEMBL::ExternalData::Variation object
$var = $genesnp->snp2gene($snp);
# $var is a Bio::Variation::SeqDiff object
# or
@var = $genesnp->snps2gene(@snps);
Description
Bio::EnsEMBL::ExternalData::GeneSNP takes in
Bio::EnsEMBL::ExternalData::Variation objects in genomic (clone,
contig or virtual contig) coordinates and calculates for DNA, RNA and
protein changes in gene coordinates based on information in a
Bio::EnsEMBL::Gene object. The returned object for each SNP
description is
Bio::Variation::SeqDiff which links to
Bio::Variation::VariantI compliant objects.
A SNP passed into a GeneSNP object is taken to be a gene SNP (gSNP) if
it is within 5kb of coding region. A RNA and protein level description
objct is created only for coding SNPs (cSNP).
$is_cSNP = 1 if $var->AAChange;
The IDs are kept as follows:
- All the DBLinks are
Bio::Annotation::DBLink objects which are
passed from Variant into DNAMutation. Primary ID of a SNP is
$var->DNAMutation->id
- The contig ID is kept in SeqDiff ID : $var->id
- The ID of the transcript ID used to calculate SeqDiff is:
$var->rna_id
- The ID of the exon where SNP is located is kept in
DNAMutation->region_value. The string looks like 'ENSE00000012499
(1)'. The order number is there to keep track of introns which do
not have IDs, only an order number. To get exon ID only:
$var->DNAMutation->region_value =~ /\w+/
Methods
Methods description
Title : contig Usage : $contigobj = $obj->contig; Function: Returns or sets the link-reference to a Contig object. If there is no link, it will return undef Returns : an obj_ref or undef |
Title : gene Usage : $geneobj = $obj->gene; Function: Returns or sets the link-reference to a Gene object. If there is no link, it will return undef Returns : an obj_ref or undef |
Title : transcript Usage : $transcriptobj = $obj->transcript; Function: Returns or sets the link-reference to a Transcript object. If there is no link, it will return undef Returns : an obj_ref or undef |
Methods code
_calculate_gene_coordinates | description | prev | next | Top |
sub _calculate_gene_coordinates
{ my ($self, $snp, $rna, $aa, $aaseq) = @_;
my $seqDiff = undef;
$snp || $self->throw("Give a Bio::EnsEMBL::ExternalData::Variation object as an argument");
$snp->isa('Bio::EnsEMBL::ExternalData::Variation') ||
$self->throw("Is not a Bio::EnsEMBL::ExternalData::Variation object but a [$snp]");
$rna->isa('Bio::EnsEMBL::Transcript') ||
$self->throw("Is not a Bio::EnsEMBL::Transcript object but a [$rna]");
$aa->isa('Bio::EnsEMBL::Translation') ||
$self->throw("Is not a Bio::EnsEMBL::Translation object but a [$aaseq]");
$aaseq->isa('Bio::PrimarySeqI') ||
$self->throw("Is not a Bio::PrimarySeqI object but a [$aaseq]");
return undef if $snp->start < ($aa->start - 5000);
return undef if $snp->end > ($aa->end + 5000);
return undef if $snp->start != $snp->start;
$seqDiff = Bio::Variation::SeqDiff->new();
$seqDiff->moltype('dna');
$seqDiff->numbering('entry');
$seqDiff->rna_ori($rna->dna_seq->seq);
$seqDiff->aa_ori($rna->translate); $seqDiff->id($self->contig->id);
$seqDiff->rna_id($rna->id);
$seqDiff->offset($aa->start -1);
$seqDiff->cds_end($aa->end - $aa->start + 1);
my $dna_start = $snp->start - $seqDiff->offset;
$dna_start < 1 && $dna_start--;
my $dnamut = Bio::Variation::DNAMutation->new
(-start => ($dna_start),
-end => ($dna_start),
);
$dnamut->mut_number(1);
$dnamut->proof('experimental');
my (@alleles) = split (/\|/, $snp->alleles);
my $dnaA1 = Bio::Variation::Allele->new;
my $a1 = shift @alleles;
$dnamut->length(CORE::length $a1);
$dnaA1->seq($a1);
$dnamut->allele_ori($dnaA1);
foreach my $alleleseq (@alleles) {
my $A2 = Bio::Variation::Allele->new;
$A2->seq($alleleseq);
$dnamut->add_Allele($A2);
}
$seqDiff->add_Variant($dnamut);
foreach my $link ($snp->each_DBLink) {
$dnamut->add_DBLink;
}
my $start_pos = $snp->start;
$dnamut->upStreamSeq
(lc substr($self->contig->primary_seq->seq, $snp->start -25, 25));
$dnamut->dnStreamSeq
(lc substr($self->contig->primary_seq->seq, $snp->start +1, 25));
my $ref_allele = lc substr($self->contig->primary_seq->seq, $snp->start, 1 );
$dnaA1->seq ne $ref_allele &&
$self->warn("Found DNA ref allele: $ref_allele!");
if ($snp->start < $rna->start_exon->start) {
$dnamut->region('5\'upstream');
}
elsif ($snp->start > $rna->end_exon->end) {
$dnamut->region("3'downstream");
}
elsif ($snp->start < $aa->start) {
$dnamut->region("5'UTR");
}
elsif ($snp->start > $aa->end) {
$dnamut->region("3'UTR");
} else {
my $last_exon_end = 0;
my $exoncount = 1;
my $transcript_loc;
foreach my $exon ($rna->each_Exon) {
if ($snp->start < $exon->start) {
$dnamut->region('intron');
$dnamut->region_value($exoncount);
last;
}
elsif ($snp->start >= $exon->start and $snp->start <= $exon->end) {
$dnamut->region('exon');
$dnamut->region_value($exon->id. " ($exoncount)");
last;
}
$exoncount++;
}
}
my $rnachange = undef;
if ($dnamut->region eq 'exon' ) { $seqDiff->rna_offset($rna->start_exon->phase -1);
my $rna_pos = $rna->rna_pos($snp->start);
$rnachange = Bio::Variation::RNAChange->new
(-start => $rna_pos - $seqDiff->rna_offset,
-end => $rna_pos - $seqDiff->rna_offset,
);
$rnachange->length(1);
$rnachange->mut_number(1);
$seqDiff->add_Variant($rnachange);
$dnamut->RNAChange($rnachange);
$rnachange->DNAMutation($dnamut);
$rnachange->proof('computed');
$rnachange->allele_ori($dnaA1);
foreach my $alleleseq (@alleles) {
my $A2 = Bio::Variation::Allele->new;
$A2->seq($alleleseq);
$rnachange->add_Allele($A2);
}
$rnachange->codon_pos(($rnachange->start -1 )% 3 +1);
if ($rna_pos < 25 ) {
$rnachange->upStreamSeq($dnamut->upStreamSeq);
} else {
$rnachange->upStreamSeq
(lc substr($seqDiff->rna_ori, $rna_pos -25, 25));
}
if ($rna_pos > $seqDiff->cds_end - 25 ) {
$rnachange->dnStreamSeq($dnamut->dnStreamSeq);
} else {
$rnachange->dnStreamSeq
(lc substr($seqDiff->rna_ori, $rna_pos + 1, 25));
}
my $ref_allele = lc substr($seqDiff->rna_ori, $rna_pos,1);
$dnaA1->seq ne $ref_allele &&
$self->warn("Found RNA ref allele: $ref_allele!");
}
if ($dnamut->region eq 'exon' ) {
my $aachange = Bio::Variation::AAChange->new
(-start => (int($rnachange->start / 3 + 1)) ); $aachange->end($aachange->start);
$aachange->proof('computed');
$seqDiff->add_Variant($aachange);
$rnachange->AAChange($aachange);
$aachange->RNAChange($rnachange);
$aachange->mut_number(1);
my $ct = new Bio::Tools::CodonTable;
my $aa_allele_ori = $ct->translate($rnachange->codon_ori);
my $aa_o = Bio::Variation::Allele->new;
$aa_o->seq($aa_allele_ori) if $aa_allele_ori;
$aachange->allele_ori($aa_o);
my $aa_allele_mut = $ct->translate($rnachange->codon_mut);
my $aa_m = Bio::Variation::Allele->new;
$aa_m->seq($aa_allele_mut) if $aa_allele_mut;
$aachange->add_Allele($aa_m);
my $aa_length_ori = CORE::length($aa_allele_ori);
$aachange->length($aa_length_ori);
$aachange->end($aachange->start + $aa_length_ori - 1 );
my $ref_allele = substr($aaseq->seq, $aachange->start -1 ,1);
$aa_allele_ori ne $ref_allele &&
$self->warn ("Found AA ref allele '$ref_allele' when expected to see $aa_allele_ori");
}
return $seqDiff;
}
1; } |
sub contig
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'contig'} = $value;
}
unless (exists $self->{'contig'}) {
return (undef);
} else {
return $self->{'contig'};
} } |
sub gene
{ my ($self,$value) = @_;
if (defined $value) {
if( ! $value->isa('Bio::EnsEMBL::Gene') ) {
$self->throw("Is not a Bio::EnsEMBL::Gene object but a [$value]");
return (undef);
}
else {
$self->{'gene'} = $value;
}
}
unless (exists $self->{'gene'}) {
return (undef);
} else {
return $self->{'gene'};
} } |
sub new
{ my($class,@args) = @_;
my $self;
$self = {};
bless $self, $class;
my ($gene, $contig, $transcript) =
$self->_rearrange([qw(GENE
CONTIG
TRANSCRIPT
)],@args);
$gene && $self->gene($gene);
$contig && $self->contig($contig);
$transcript && $self->transcript($transcript);
return $self;
} |
sub snp2gene
{ my ($self, $snp) = @_;
my $seqDiff = undef;
$self->gene || $self->throw("Set gene to a Bio::EnsEMBL::Gene object");
$self->contig || $self->throw("Set contig to a Bio::EnsEMBL::RawContig compliant object");
my @seqDiffs = ();
foreach my $rna ($self->gene->each_Transcript) {
my $aa = $rna->translation; my $aaseq = $rna->translate;
$seqDiff = $self->_calculate_gene_coordinates($snp, $rna, $aa, $aaseq);
$seqDiff && push @seqDiffs, $seqDiff;
}
return @seqDiffs; } |
sub snps2gene
{ my ($self, @snps) = @_;
my $seqDiff = undef;
$self->gene || $self->throw("Set gene to a Bio::EnsEMBL::Gene object");
$self->contig || $self->throw("Set contig to a Bio::EnsEMBL::RawContig compliant object");
my @seqDiffs = ();
foreach my $rna ($self->gene->each_Transcript) {
my $aa = $rna->translation; my $aaseq = $rna->translate;
foreach my $snp (@snps) {
my $seqDiff = $self->_calculate_gene_coordinates($snp, $rna, $aa, $aaseq);
$seqDiff && push @seqDiffs, $seqDiff;
}
}
return @seqDiffs; } |
sub snps2transcript
{ my ($self, @snps) = @_;
my $seqDiff = undef;
$self->transcript || $self->throw("Set transcript to a Bio::EnsEMBL::Transcript object");
$self->contig || $self->throw("Set contig to a Bio::EnsEMBL::RawContig compliant object");
my @seqDiffs = ();
my $rna = $self->transcript;
my $aa = $rna->translation; my $aaseq = $rna->translate;
foreach my $snp (@snps) {
my $seqDiff = $self->_calculate_gene_coordinates($snp, $rna, $aa, $aaseq);
$seqDiff && push @seqDiffs, $seqDiff;
}
return @seqDiffs; } |
sub transcript
{ my ($self,$value) = @_;
if (defined $value) {
if( ! $value->isa('Bio::EnsEMBL::Transcript') ) {
$self->throw("Is not a Bio::EnsEMBL::Transcript object but a [$value]");
return (undef);
}
else {
$self->{'transcript'} = $value;
}
}
unless (exists $self->{'transcript'}) {
return (undef);
} else {
return $self->{'transcript'};
} } |
General documentation
Heikki Lehvaslaiho <
heikki@ebi.ac.uk>
Address:
EMBL Outstation, European Bioinformatics Institute
Wellcome Trust Genome Campus, Hinxton
Cambs. CB10 1SD, United Kingdom
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _