Raw content of SeqStoreConverter::AnophelesGambiae
use strict;
use warnings;
use SeqStoreConverter::BasicConverter;
package SeqStoreConverter::AnophelesGambiae;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::BasicConverter);
sub create_coord_systems {
my $self = shift;
$self->debug("AnophelesGambiae Specific: creating scaffold, chunk and, " .
"chromosome coord systems");
my $target = $self->target();
my $dbh = $self->dbh();
my $ass_def = $self->get_default_assembly();
my @coords =
(['chunk', undef, 'default_version,sequence_level', 3],
['chromosome', $ass_def, 'default_version', 1],
["scaffold" , undef, "default_version", 2]);
my @assembly_mappings = ("chromosome:$ass_def|chunk",
"chromosome:$ass_def|scaffold",
"scaffold|chromosome:$ass_def|chunk");
$self->debug("Building coord_system table");
my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
"(name, version, attrib,rank) VALUES (?,?,?,?)");
my %coord_system_ids;
foreach my $cs (@coords) {
$sth->execute(@$cs);
$coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
}
$sth->finish();
$sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
"VALUES ('assembly.mapping', ?)");
foreach my $mapping (@assembly_mappings) {
$sth->execute($mapping);
}
$sth->finish();
return;
}
sub create_seq_regions {
my $self = shift;
$self->debug("AnophelesGambiae Specific: creating seq_regions");
$self->contig_to_seq_region('chunk');
$self->supercontig_to_seq_region('scaffold');
$self->chromosome_to_seq_region();
}
sub create_assembly {
my $self = shift;
$self->debug("AnophelesGambiae Specific: loading assembly table");
$self->assembly_contig_chromosome();
$self->assembly_supercontig_chromosome();
return;
}
sub transfer_prediction_transcripts {
my $self = shift;
my $source = $self->source();
my $target = $self->target();
my $dbh = $self->dbh();
$self->debug("AnophelesGambiae Specific: building prediction_exon table");
#
# In Anopheles the predicion transcripts were computed in chromosomal
# coords, so convert them to chromosomal coords and merge any adjacent
# exons
#
my $sql =
"SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
" IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
" (a.chr_start+a.contig_end-pt.contig_end)) as start, " .
" IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
" (a.chr_start+a.contig_end-pt.contig_start)) as end, " .
" a.contig_ori * pt.contig_strand as strand, " .
" pt.start_phase, pt.score, pt.p_value " .
"FROM $source.assembly a, $target.tmp_chr_map tcm, " .
" $source.prediction_transcript pt " .
"WHERE pt.contig_id = a.contig_id " .
"AND a.chromosome_id = tcm.old_id " .
"ORDER BY pt.prediction_transcript_id, exon_rank";
my $sth = $dbh->prepare($sql);
$sth->execute();
my $prev_end = undef;
my $prev_start = undef;
my $prev_id = undef;
my $rank = undef;
my %prev_exon = ();
while(my $row = $sth->fetchrow_arrayref()) {
my ($pt_id, $sr_id, $sr_start, $sr_end, $sr_strand, $start_phase,
$score, $p_value) = @$row;
if(defined($prev_id) && ($prev_id == $pt_id)) {
#still in the same transcript
if($sr_strand == 1 &&
defined($prev_end) && $prev_end == $sr_start-1) {
$self->debug("merged exon $rank in prediction_transcript $pt_id\n");
#adjacent exons forward strand - merge them
$prev_exon{'seq_region_end'} = $sr_end;
$prev_end = $sr_end;
} elsif($sr_strand == -1 &&
defined($prev_start) && $prev_start == $sr_end+1) {
$self->debug("merged exon $rank in prediction_transcript $pt_id\n");
#adjacent exons negative strand - merge them
$prev_exon{'seq_region_start'} = $sr_start;
$prev_start = $sr_start;
} else {
#non-adjacent exons in the same transcript - no merge
$rank++;
#store the previous exon
$self->store_pexon(\%prev_exon);
#make current exon the previous exon
%prev_exon = ('prediction_transcript_id' => $pt_id,
'seq_region_id' => $sr_id,
'seq_region_start' => $sr_start,
'seq_region_end' => $sr_end,
'seq_region_strand' => $sr_strand,
'start_phase' => $start_phase,
'score' => $score,
'p_value' => $p_value,
'rank' => $rank);
}
} else {
#store previous exon
$self->store_pexon(\%prev_exon) if(%prev_exon);
#new ptranscript
$rank = 1;
$prev_id = $pt_id;
$prev_end = $sr_end;
$prev_start = $sr_start;
%prev_exon = ('prediction_transcript_id' => $pt_id,
'seq_region_id' => $sr_id,
'seq_region_start' => $sr_start,
'seq_region_end' => $sr_end,
'seq_region_strand' => $sr_strand,
'start_phase' => $start_phase,
'score' => $score,
'p_value' => $p_value,
'rank' => $rank);
}
}
#store the very last exon in the table
$self->store_pexon(\%prev_exon) if(%prev_exon);
$sth->finish();
$self->debug("AnophelesGambiae Specific: building prediction_transcript " .
"table");
$dbh->do
("INSERT INTO $target.prediction_transcript (prediction_transcript_id, " .
" seq_region_id, seq_region_start, seq_region_end, " .
" seq_region_strand, analysis_id ) " .
"SELECT pt.prediction_transcript_id, tcm.new_id as seq_region_id, " .
" MIN(IF(a.contig_ori=1,(pt.contig_start+a.chr_start-a.contig_start),".
" (a.chr_start+a.contig_end-pt.contig_end))) as start, " .
" MAX(IF(a.contig_ori=1,(pt.contig_end+a.chr_start-a.contig_start)," .
" (a.chr_start+a.contig_end-pt.contig_start))) as end, " .
" a.contig_ori * pt.contig_strand as strand, " .
" pt.analysis_id " .
"FROM $source.assembly a, $target.tmp_chr_map tcm, " .
" $source.prediction_transcript pt " .
"WHERE pt.contig_id = a.contig_id " .
"AND a.chromosome_id = tcm.old_id " .
"GROUP BY prediction_transcript_id");
return;
}
#
# helper function to store prediction exon
#
sub store_pexon {
my $self = shift;
my $pexon = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
my $store_sth = $dbh->prepare
("INSERT INTO $target.prediction_exon (prediction_transcript_id, " .
" exon_rank, seq_region_id, seq_region_start, seq_region_end, " .
" seq_region_strand, start_phase, score, p_value) " .
"VALUES (?,?,?,?,?,?,?,?,?)");
$store_sth->execute($pexon->{'prediction_transcript_id'},
$pexon->{'rank'},
$pexon->{'seq_region_id'},
$pexon->{'seq_region_start'},
$pexon->{'seq_region_end'},
$pexon->{'seq_region_strand'},
$pexon->{'start_phase'},
$pexon->{'score'},
$pexon->{'p_value'});
$store_sth->finish();
return;
}
1;