Raw content of Bio::EnsEMBL::ExternalData::SangerSNP::TranscriptVariationAdaptor
# EnsEMBL Sanger SNP adaptor
#
# Copyright EnsEMBL
#
# Author: Steve Searle
#
=head1 NAME
Bio::EnsEMBL::ExternalData::SangerSNP::TranscriptVariationAdaptor
=head1 SYNOPSIS
A SNP adaptor which sits over the Sanger SNP database. Provides a means of
getting SNPs out of the Sanger SNP database as
Bio::EnsEMBL::Variation::VariationFeature objects.
=head1 CONTACT
Post questions to the EnsEMBL developer list:
=head1 APPENDIX
=cut
use strict;
package Bio::EnsEMBL::ExternalData::SangerSNP::TranscriptVariationAdaptor;
use Bio::EnsEMBL::ExternalData::Variation;
use Bio::EnsEMBL::Variation::TranscriptVariation;
use Bio::EnsEMBL::Analysis;
use Bio::EnsEMBL::External::ExternalFeatureAdaptor;
use Bio::EnsEMBL::DBSQL::BaseAdaptor;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::External::ExternalFeatureAdaptor );
our %CONSEQUENCE_TYPE_MAP = (
'Coding Synonymous' => 'SYNONYMOUS_CODING',
'Coding Non-synonymous' => 'NON_SYNONYMOUS_CODING',
'Coding Stop gained' => 'STOP_GAINED',
'Coding Stop lost' => 'STOP_LOST',
# Coding Indel might NOT be frameshift
'Coding Indel' => 'FRAMESHIFT_CODING',
'Non-coding exonic Non-coding' => 'UTR',
'Intronic Non-coding' => 'INTRONIC',
'Upstream Non-coding' => 'UPSTREAM',
);
sub coordinate_systems {
return ("ASSEMBLY");
}
sub fetch_all_by_VariationFeatures {
my ($self, $vf_ref) = @_;
if(ref($vf_ref) ne 'ARRAY') {
throw('ArrayRef of Bio::EnsEMBL::Variation::VariationFeature expected');
}
my %vf_by_id;
my %v_by_id;
%vf_by_id = map {$_->dbID() . ":" . ($_->start + $_->slice->start - 1), $_ } @$vf_ref;
%v_by_id = map {$_->dbID(), $_ } @$vf_ref;
my $instr = join (",",keys( %v_by_id));
my $q = qq(
SELECT
sgc.id_snp as idsnp,
ptd.description as pos_type,
sgc_dict.description as consequence,
cs.name as transcript_stable_id,
sgc.transcript_position as cdna_start,
sgc.genomic_position as gen_pos
FROM
coding_sequence cs,
snp_gene_consequence sgc,
position_type_dict ptd,
sgc_dict
WHERE sgc.id_snp in ($instr)
AND sgc.consequence = sgc_dict.id_dict
AND sgc.position_description = ptd.id_dict
AND sgc.id_codingseq = cs.id_codingseq
AND cs.design_entry = ?
);
my $sth;
eval {
#print $q . "\n";
$sth = $self->prepare($q);
$sth->execute($self->consequence_exp);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
#print "Finished query - now making features\n";
my %trans_hash;
# Mark all variation features as having had tvs added
foreach my $vf (@$vf_ref) {
$vf->{transcriptVariations} = [];
}
my @tvs;
while (my $row = $sth->fetchrow_hashref) {
#add to the variation feature object all the transcript variations
my $conskey = $row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'};
#print "conskey = $conskey\n";
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$conskey};
#print " idsnp = " . $row->{IDSNP} . "\n";
my $vfkey = $row->{IDSNP} . ":" . $row->{GEN_POS};
if (exists($vf_by_id{$vfkey})) {
$vf_by_id{$vfkey}->add_consequence_type($consequence_type);
# add TranscriptVariation object
my $tsid = $row->{'TRANSCRIPT_STABLE_ID'};
if (!exists($trans_hash{$tsid})) {
#print "fetching transcript $tsid\n";
$trans_hash{$tsid} = $self->ensembl_db->get_TranscriptAdaptor->fetch_by_stable_id($tsid);
}
my $tvar = Bio::EnsEMBL::Variation::TranscriptVariation->new_fast({
transcript => $trans_hash{$tsid},
cdna_start => $row->{'CDNA_START'},
cdna_end => $row->{'CDNA_START'},
consequence_type => $consequence_type,
});
$vf_by_id{$vfkey}->add_TranscriptVariation( $tvar );
push @tvs,$tvar;
}
}
#print "Finished making features\n";
return \@tvs;
}
sub fetch_all_by_postype_consequence {
my ($self, $postype, $consequence) = @_;
my $q = qq(
SELECT
sgc.id_snp as idsnp,
ptd.description as pos_type,
sgc_dict.description as consequence,
cs.name as transcript_stable_id,
sgc.transcript_position as cdna_start,
sgc.genomic_position as gen_pos
FROM
coding_sequence cs,
snp_gene_consequence sgc,
position_type_dict ptd,
sgc_dict
WHERE sgc_dict.description = ?
AND ptd.description = ?
AND sgc.consequence = sgc_dict.id_dict
AND sgc.position_description = ptd.id_dict
AND sgc.id_codingseq = cs.id_codingseq
AND cs.design_entry = ?
);
my $sth;
eval {
# print $q . "\n";
$sth = $self->prepare($q);
$sth->execute($consequence,$postype,$self->consequence_exp);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
#print "Finished query - now making features\n";
my %trans_hash;
my %vf_hash;
my @tvs;
while (my $row = $sth->fetchrow_hashref) {
#add to the variation feature object all the transcript variations
my $conskey = $row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'};
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$conskey};
# add TranscriptVariation object
my $tsid = $row->{TRANSCRIPT_STABLE_ID};
if (!exists($trans_hash{$tsid})) {
#print "fetching transcript $tsid\n";
$trans_hash{$tsid} = $self->ensembl_db->get_TranscriptAdaptor->fetch_by_stable_id($tsid);
}
my $vfid = $row->{IDSNP};
my $vfkey = $row->{IDSNP} . ":" . $row->{GEN_POS};
if (!exists($vf_hash{$vfkey})) {
#print "fetching variation $vfid at " . $row->{GEN_POS} . "\n";
$vf_hash{$vfkey} = $self->db->get_VariationAdaptor->fetch_by_dbID_position_range($vfid,$trans_hash{$tsid}->seq_region_name,
$row->{GEN_POS}, $row->{GEN_POS});
}
if (defined($vf_hash{$vfkey})) {
my $tvar = Bio::EnsEMBL::Variation::TranscriptVariation->new_fast({
transcript => $trans_hash{$tsid},
cdna_start => $row->{CDNA_START},
cdna_end => $row->{CDNA_START},
consequence_type => $consequence_type,
variation_feature => $vf_hash{$vfkey},
});
$vf_hash{$vfkey}->add_TranscriptVariation( $tvar );
$vf_hash{$vfkey}->add_consequence_type($consequence_type);
push @tvs,$tvar;
} else {
print "Missing variation feature " . $row->{IDSNP} . " for transcript " . $tsid .
" " . $trans_hash{$tsid}->seq_region_name . " " . $trans_hash{$tsid}->start . " " .
$trans_hash{$tsid}->end . " at " . $row->{GEN_POS} . "\n";
}
}
#print "Finished making features\n";
return \@tvs;
}
sub fetch_all_by_transcript_stable_id {
my ($self, $trans_stable_id) = @_;
my $q = qq(
SELECT
sgc.id_snp as idsnp,
ptd.description as pos_type,
sgc_dict.description as consequence,
cs.name as transcript_stable_id,
sgc.transcript_position as cdna_start,
sgc.genomic_position as gen_pos
FROM
coding_sequence cs,
snp_gene_consequence sgc,
position_type_dict ptd,
sgc_dict
WHERE cs.name = ?
AND sgc.consequence = sgc_dict.id_dict
AND sgc.position_description = ptd.id_dict
AND sgc.id_codingseq = cs.id_codingseq
AND cs.design_entry = ?
);
my $sth;
eval {
# print $q . "\n";
$sth = $self->prepare($q);
$sth->execute($trans_stable_id,$self->consequence_exp);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
#print "Finished query - now making features\n";
my %trans_hash;
my %vf_hash;
my @tvs;
while (my $row = $sth->fetchrow_hashref) {
#add to the variation feature object all the transcript variations
my $conskey = $row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'};
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$conskey};
# add TranscriptVariation object
my $tsid = $row->{TRANSCRIPT_STABLE_ID};
if (!exists($trans_hash{$tsid})) {
#print "fetching transcript $tsid\n";
$trans_hash{$tsid} = $self->ensembl_db->get_TranscriptAdaptor->fetch_by_stable_id($tsid);
}
my $vfid = $row->{IDSNP};
my $vfkey = $row->{IDSNP} . ":" . $row->{GEN_POS};
if (!exists($vf_hash{$vfid})) {
#print "fetching variation $vfid\n";
$vf_hash{$vfkey} = $self->db->get_VariationAdaptor->fetch_by_dbID_position_range($vfid,$trans_hash{$tsid}->seq_region_name,
$row->{GEN_POS},$row->{GEN_POS});
}
if (defined($vf_hash{$vfid})) {
my $tvar = Bio::EnsEMBL::Variation::TranscriptVariation->new_fast({
transcript => $trans_hash{$tsid},
cdna_start => $row->{CDNA_START},
cdna_end => $row->{CDNA_START},
consequence_type => $consequence_type,
variation_feature => $vf_hash{$vfkey},
});
$vf_hash{$vfkey}->add_TranscriptVariation( $tvar );
$vf_hash{$vfkey}->add_consequence_type($consequence_type);
push @tvs,$tvar;
} else {
print "Missing variation feature " . $row->{IDSNP} . " for transcript " . $tsid .
" " . $trans_hash{$tsid}->seq_region_name . " " . $trans_hash{$tsid}->start . " " .
$trans_hash{$tsid}->end . " at " . $row->{GEN_POS} . "\n";
}
}
#print "Finished making features\n";
return \@tvs;
}
=head2 consequence_exp
Arg[1] : (optional) consequence experiment id
Example : $glovar_adaptor->consequence_ext(2046);
Description : getter/setter for the consequence experiment
(coding_sequence.design_entry in the glovar db)
Return type : String - consequence experiment id
Exceptions : none
Caller : general
=cut
sub consequence_exp {
my ($self, $exp) = @_;
if ($exp) {
$self->{'consequence_exp'} = $exp;
}
return $self->{'consequence_exp'};
}
1;