Raw content of XrefMapper::ProcessPaired
package XrefMapper::ProcessPaired;
use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
use strict;
use warnings;
use XrefMapper::BasicMapper;
use Cwd;
use DBI;
use File::Basename;
use IPC::Open3;
sub new {
my($class, $mapper) = @_;
my $self ={};
bless $self,$class;
$self->xref($mapper->xref);
return $self;
}
sub process{
my ($self) = @_;
print "Process Pairs\n" if($self->verbose);
my $object_xref_id;
my $sth = $self->xref->dbc->prepare("select MAX(object_xref_id) from object_xref");
$sth->execute;
$sth->bind_columns(\$object_xref_id);
$sth->fetch;
$object_xref_id++;
$sth->finish;
print "Starting at object_xref of $object_xref_id\n" if($self->verbose);
my $psth = $self->xref->dbc->prepare("select p.accession1, p.accession2 from pairs p");
my $ox_count_sth = $self->xref->dbc->prepare('select count(1) from object_xref ox, xref x where ox.xref_id = x.xref_id and ox.ox_status = "DUMP_OUT" and x.accession = ?');
my $ox_transcript_sth = $self->xref->dbc->prepare('select gtt.transcript_id, ix.query_identity, ix.target_identity from identity_xref ix, object_xref ox, xref x, gene_transcript_translation gtt where ox.object_xref_id = ix.object_xref_id and ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and gtt.translation_id = ox.ensembl_id and x.accession = ?');
my $ox_translation_sth = $self->xref->dbc->prepare('select gtt.translation_id, ix.query_identity, ix.target_identity from identity_xref ix, object_xref ox, xref x, gene_transcript_translation gtt where ox.object_xref_id = ix.object_xref_id and ox.ox_status = "DUMP_OUT" and ox.xref_id = x.xref_id and gtt.transcript_id = ox.ensembl_id and x.accession = ?');
my $xref_sth = $self->xref->dbc->prepare("select xref_id from xref where accession = ?");
my $ox_insert_sth = $self->xref->dbc->prepare("insert into object_xref (object_xref_id, xref_id, ensembl_id, ensembl_object_type, linkage_type, ox_status) values(?, ?, ?, ?, 'INFERRED_PAIR', 'DUMP_OUT')");
local $ox_insert_sth->{RaiseError}; #catch duplicates
local $ox_insert_sth->{PrintError}; # cut down on error messages
my $ox_get_id_sth = $self->xref->dbc->prepare("select object_xref_id,ox_status from object_xref where xref_id = ? and ensembl_id = ? and ensembl_object_type = ?");
my $ox_update_sth = $self->xref->dbc->prepare('update object_xref set ox_status = "DUMP_OUT", linkage_type = "INFERRED_PAIR" where object_xref_id = ?');
my $xref_update_sth = $self->xref->dbc->prepare('update xref set info_type = "INFERRED_PAIR" where xref_id = ?');
my $ins_dep_ix_sth = $self->xref->dbc->prepare("insert into identity_xref (object_xref_id, query_identity, target_identity) values(?, ?, ?)");
$psth->execute() || die "execute failed";
my ($acc1, $acc2);
$psth->execute();
my $refseq_count = 0;
my %change;
$psth->bind_columns(\$acc1, \$acc2);
while($psth->fetch()){
my $count1;
my $count2;
$ox_count_sth->execute($acc1); # translation alignment
$ox_count_sth->bind_columns(\$count1);
$ox_count_sth->fetch;
$ox_count_sth->execute($acc2); # transcript alignment
$ox_count_sth->bind_columns(\$count2);
$ox_count_sth->fetch;
if(( $count1 and $count2) || (!($count1) and !($count2)) ){
next; # eithr both matched or neither is.
}
if($count1){
#need xref_id for acc2
my $xref_id;
$xref_sth->execute($acc2);
$xref_sth->bind_columns(\$xref_id);
if(!$xref_sth->fetch){
# print "Could not find xref_id for accession $acc2\n";
next;
}
next if(!defined($xref_id));
# print "$acc2\t$xref_id (search using $acc1)\n";
# insert new object_xref
# trap error code. if duplicate then just set linkage_type = "INFERRED_PAIR" and ox_status = "DUMP"
# "maybe" the original failed the cutoff!!! so will have an entry alread but no good.
$ox_transcript_sth->execute($acc1);
my $transcript_id=undef;
my ($q_id,$t_id);
$ox_transcript_sth->bind_columns(\$transcript_id,\$q_id, \$t_id);
while($ox_transcript_sth->fetch){
if(defined($transcript_id)){ # remember not all transcripts have translations.
$object_xref_id++;
$ox_insert_sth->execute($object_xref_id, $xref_id, $transcript_id, "Transcript") ;
if($ox_insert_sth->err){
my $err = $ox_insert_sth->errstr;
if($err =~ /Duplicate/){
$change{"UPDATE"}++;
# duplicate this can happen as it might have failed the cutoff
# find the old object_xref_id and the update the status's
my $old_object_xref_id=undef;
my $status;
$ox_get_id_sth->execute($xref_id, $transcript_id, "Transcript");
$ox_get_id_sth->bind_columns(\$old_object_xref_id, \$status);
$ox_get_id_sth->fetch();
if($status eq "DUMP_OUT"){
print STDERR "Problem status for object_xref_id is DUMP_OUT but this should never happen as it was not found earlier??? (transcript_id = $transcript_id, $xref_id\n";
}
if(!defined($old_object_xref_id)){
die "Duplicate but can't find the original?? xref_id = $xref_id, ensembl_id = $transcript_id, type = Transcript\n";
}
$ox_update_sth->execute($old_object_xref_id)|| die "Could not set update for object_xref_id = $old_object_xref_id";
$xref_update_sth->execute($xref_id)|| die "Could not set update for xref_id = $xref_id";
}
else{
die "Problem loading error is $err\n";
}
}
else{
$ins_dep_ix_sth->execute($object_xref_id, $q_id, $t_id);
$xref_update_sth->execute($xref_id)|| die "Could not set update for xref_id = $xref_id";
$change{"NEW"}++;
}
# print "insert $xref_id transcript $transcript_id ........\n";
$refseq_count++;
}
}
}
elsif($count2){
my $xref_id;
$xref_sth->execute($acc1);
$xref_sth->bind_columns(\$xref_id);
if(!$xref_sth->fetch){
# print "Could not find xref_id for accession $acc1\n";
next;
}
next if(!defined($xref_id));
# print "$acc1\t$xref_id (search using $acc2)\n";
# insert new object_xref
# trap error code. if duplicate then just set linkage_type = "INFERRED_PAIR" and ox_status = "DUMP"
# "maybe" the original failed the cutoff!!! so will have an entry alread but no good.
$ox_translation_sth->execute($acc2);
my $translation_id = undef;
my ($q_id, $t_id);
$ox_translation_sth->bind_columns(\$translation_id, \$q_id, \$t_id);
while($ox_translation_sth->fetch){
if(defined($translation_id)){ # remember not all transcripts ahve translations.
$object_xref_id++;
$ox_insert_sth->execute($object_xref_id, $xref_id, $translation_id, "Translation") ;
if($ox_insert_sth->err){
$change{"UPDATE"}++;
my $err = $ox_insert_sth->errstr;
if($err =~ /Duplicate/){
# duplicate this can happen as it might have failed the cutoff
# find the old object_xref_id and the update the status's
my $old_object_xref_id=undef;
my $status;
$ox_get_id_sth->execute($xref_id, $translation_id, "Translation");
$ox_get_id_sth->bind_columns(\$old_object_xref_id,\$status);
$ox_get_id_sth->fetch();
if($status eq "DUMP_OUT"){
print STDERR "Problem status for object_xref_id is DUMP_OUT but this should never happen as it was not found earlier??? (trasnlation_id = $translation_id, $xref_id\n";
}
if(!defined($old_object_xref_id)){
die "Duplicate but can't find the original?? xref_id = $xref_id, ensembl_id = $translation_id, type = Translation\n";
}
$ox_update_sth->execute($old_object_xref_id)|| die "Could not set update for object_xref_id = $old_object_xref_id";
$xref_update_sth->execute($xref_id)|| die "Could not set update for xref_id = $xref_id";
}
else{
die "Problem loading error is $err\n";
}
}
else{
$ins_dep_ix_sth->execute($object_xref_id, $q_id, $t_id);
$xref_update_sth->execute($xref_id)|| die "Could not set update for xref_id = $xref_id";
$change{"NEW"}++;
}
# print "insert $xref_id translation $translation_id ........\n";
$refseq_count++;
}
}
}
else{
print STDERR "HMMM how did i get here. This should be impossible. [logic error]\n";
}
}
$psth->finish;
$ox_count_sth->finish;
$ox_transcript_sth->finish;
$ox_translation_sth->finish;
$ox_update_sth->finish;
$xref_update_sth->finish;
$xref_sth->finish;
$ins_dep_ix_sth->finish;
foreach my $key (keys %change){
print "\t$key\t".$change{$key}."\n" if($self->verbose);
}
print "$refseq_count new relationships added\n" if($self->verbose);
my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('processed_pairs',now())");
$sth_stat->execute();
$sth_stat->finish;
}
1;