Raw content of XrefParser::RefSeq_CCDSParser
package XrefParser::RefSeq_CCDSParser;
use strict;
use DBI;
use base qw( XrefParser::BaseParser );
# Parse file of Refseq records and assign direct xrefs
sub run_script {
my $self = shift if (defined(caller(1)));
my $file = shift;
my $source_id = shift;
my $species_id = shift;
my $verbose = shift;
my $user = "ensro";
my $host;
my $port;
my $dbname;
my $pass;
if($file =~ /host[=][>](\S+?)[,]/){
$host = $1;
}
if($file =~ /port[=][>](\S+?)[,]/){
$port = $1;
}
if($file =~ /dbname[=][>](\S+?)[,]/){
$dbname = $1;
}
if($file =~ /pass[=][>](\S+?)[,]/){
$pass = $1;
}
my $dna_pred = XrefParser::BaseParser->get_source_id_for_source_name("RefSeq_dna_predicted");
# becouse the direct mapping have no descriptions etc
# we have to steal these from the previous Refseq parser.
my %label;
my %version;
my %description;
my $dbi = $self->dbi();
my $sql = "select xref.accession, xref.label, xref.version, xref.description from xref, source where xref.source_id = source.source_id and source.name = 'RefSeq_dna'";
my $sth = $dbi->prepare($sql);
$sth->execute();
my ($acc, $lab, $ver, $desc);
$sth->bind_columns(\$acc, \$lab, \$ver, \$desc);
while (my @row = $sth->fetchrow_array()) {
$label{$acc} = $lab;
$version{$acc} = $ver;
$description{$acc} = $desc;
}
$sth->finish;
$sql = 'select x.accession, x.xref_id, d.ensembl_stable_id, "Transcript"
from xref x, transcript_direct_xref d, source s
where s.source_id = x.source_id and
x.xref_id = d.general_xref_id and s.name like "CCDS"';
$sth = $dbi->prepare($sql);
$sth->execute();
my ($access, $old_xref_id, $stable_id, $type);
$sth->bind_columns(\$access, \$old_xref_id, \$stable_id, \$type);
my %ensembl_stable_id;
my %ensembl_type;
my %old_xref;
while (my @row = $sth->fetchrow_array()) {
$ensembl_stable_id{$access} = $stable_id;
$ensembl_type{$access} = $type;
$old_xref{$access} = $old_xref_id;
}
$sth->finish;
my $line_count = 0;
my $xref_count = 0;
my %seen;
my %old_to_new;
my $dbi2 = $self->dbi2($host, $port, $user, $dbname, $pass);
if(!defined($dbi2)){
return 1;
}
my $sql = "select cu.ccds_uid, a.nuc_acc from Accessions a, Accessions_GroupVersions agv, GroupVersions gv, CcdsUids cu where a.accession_uid = agv.accession_uid and a.organization_uid=1 and agv.group_version_uid=gv.group_version_uid and gv.ccds_status_val_uid in (3) and cu.group_uid=gv.group_uid order by gv.ccds_status_val_uid, cu.ccds_uid";
my $sth = $dbi2->prepare($sql);
$sth->execute() or croak( $dbi2->errstr() );
while ( my @row = $sth->fetchrow_array() ) {
my $ccds = $row[0];
my $refseq = $row[1];
$line_count++;
if(!defined($seen{$refseq})){
$seen{$refseq} = 1;
my $key = "CCDS".$ccds;
if(defined($ensembl_stable_id{$key})){
my $new_source_id = $source_id;
if($refseq =~ /^XM/){
$new_source_id = $dna_pred;
}
my $xref_id = $self->add_xref($refseq, $version{$refseq} , $label{$refseq}||$refseq ,
$description{$refseq}, $new_source_id, $species_id, "DIRECT");
$self->add_direct_xref($xref_id, $ensembl_stable_id{$key}, $ensembl_type{$key}, "");
$old_to_new{$old_xref{$refseq}} = $xref_id;
$xref_count++;
}
}
}
#for each one seen get all its dependent xrefs and load them fro the new one too;
my $add_dependent_xref_sth = $dbi->prepare("INSERT INTO dependent_xref VALUES(?,?,?,?)");
my $get_dependent_xref_sth = $dbi->prepare("SELECT dependent_xref_id, linkage_annotation "
. "FROM dependent_xref where master_xref_id = ?");
foreach my $old_xref (keys %old_to_new){
my $linkage;
my $dependent_id;
$get_dependent_xref_sth->execute($old_xref);
$get_dependent_xref_sth->bind_columns(\$dependent_id, \$linkage);
while(my @row = $get_dependent_xref_sth->fetchrow_array()){
$add_dependent_xref_sth->execute($old_to_new{$old_xref}, $dependent_id, $linkage, $source_id);
}
}
print "Parsed $line_count RefSeq_dna identifiers from $file, added $xref_count xrefs and $xref_count direct_xrefs from $line_count lines.\n" if ($verbose);
return 0;
}
1;