None available.
sub fetch_descriptions
{ my( $self, $id_list, $chunk_size ) = @_;
my $descriptions = {}; $chunk_size ||= 1000;
my $failed = [];
my $f;
my $fetch_all = 1;
my $query;
my $dbs;
if($self->analysis) {
$self->is_light_fetch(1);
print STDOUT "Fetching ".@$id_list." sequences from XDF database\n" if $verbose;
SEQ :
foreach (@$id_list) {
my $seq = $self->get_Seq_by_acc($_);
if(!$seq){
push @$failed,$_;
next SEQ;
}
$descriptions->{$_}{description} = $seq->description();
$descriptions->{$_}{length} = $seq->length();
}
}
my $full_query = qq{
SELECT e.accession_version, e.sequence_length,e.data_class, d.description, t.ncbi_tax_id
FROM entry e, description d, taxonomy t
WHERE e.entry_id = d.entry_id
AND e.entry_id = t.entry_id
AND e.accession_version IN ('};
my $full_arch_query = qq{
SELECT e.accession_version, e.sequence_length,MAX(r.data_class) AS data_class, d.description, t.ncbi_tax_id
FROM entry e, db_release r,description d, taxonomy t
WHERE e.entry_id = d.entry_id
AND e.entry_id = t.entry_id
AND e.entry_id = r.entry_id
AND e.accession_version IN ('};
my $iso_full_query = qq{
SELECT i_e.accession_version, i_e.sequence_length,MAX(r.data_class) AS data_class, d.description, t.ncbi_tax_id
FROM entry i_e, entry p_e, db_release r, description d, taxonomy t, isoform i
WHERE i_e.entry_id = d.entry_id
AND i_e.entry_id = i.isoform_entry_id
AND p_e.entry_id = i.parent_entry_id
AND p_e.entry_id = t.entry_id
AND p_e.entry_id = r.entry_id
AND i_e.accession_version IN ('};
my $light_query = qq{
SELECT e.accession_version, e.data_class, t.ncbi_tax_id
FROM entry e, taxonomy t
WHERE e.entry_id = t.entry_id
AND e.accession_version IN ('};
my $light_arch_query = qq{
SELECT e.accession_version, MAX(r.data_class) AS data_class, t.ncbi_tax_id
FROM entry e, db_release r, taxonomy t
WHERE e.entry_id = t.entry_id
AND e.entry_id = r.entry_id
AND e.accession_version IN ('};
my $iso_light_query = qq{
SELECT i_e.accession_version, MAX(r.data_class) AS data_class, t.ncbi_tax_id
FROM entry i_e, entry p_e, db_release r, taxonomy t, isoform i
WHERE i_e.entry_id = i.isoform_entry_id
AND p_e.entry_id = i.parent_entry_id
AND p_e.entry_id = t.entry_id
AND p_e.entry_id = r.entry_id
AND i_e.accession_version IN ('};
if($self->type eq 'protein') {
my $ids_iso = [];
my $ids = [];
foreach (@$id_list) {
if(/\-\d+/) {
push @$ids_iso, $_ ;
} else {
push @$ids, $_ ;
}
}
$dbs = $self->get_dbnames_like("uniprot%");
$query = $self->is_light_fetch() ? $light_query : $full_query ;
$f = $ids;
while(my $db = shift @$dbs)
{
$f = $self->fetch_mm_data($db,$query,0,$f, $chunk_size,$descriptions);
last unless @$f;
}
$query = $self->is_light_fetch() ? $light_arch_query : $full_arch_query ;
$f = $self->fetch_mm_data($uniprot_archive,$query,1,$f, $chunk_size,$descriptions) if @$f;
push @$failed, @$f;
$query = $self->is_light_fetch() ? $iso_light_query : $iso_full_query ;
$f = $self->fetch_mm_data($uniprot_archive,$query,1,$ids_iso, $chunk_size,$descriptions);
push @$failed, @$f;
} else {
$dbs = $self->get_dbnames_like("embl%");
$query = $self->is_light_fetch() ? $light_query : $full_query ;
$f = $id_list;
while(my $db = shift @$dbs)
{
$f = $self->fetch_mm_data($db,$query,0,$f, $chunk_size,$descriptions);
last unless @$f;
}
push @$failed, @$f;
}
foreach (@$failed) {
delete $descriptions->{$_};
}
print STDOUT "Failed to fetch ".@$failed." ids\n" if @$failed && $verbose;
return ($descriptions, $failed); } |
sub fetch_mm_data
{ my ($self,$dbname,$sql,$group,$ids, $chunk_size,$descriptions) = @_;
my $dsn_mole = "DBI:mysql:host=".$self->mm_host.":port=".$self->mm_port.":".$dbname;
my $dbh = DBI->connect( $dsn_mole, $self->mm_user, '',{ 'RaiseError' => 1 } );
my $failed = [];
for (my $i = 0; $i < @$ids; $i += $chunk_size) {
my $j = $i + $chunk_size - 1;
$j = $#$ids if $#$ids < $j;
my $chunk = [@$ids[$i..$j]];
my $query = $sql . join("','",@$chunk)."')";
$query .= " GROUP BY accession_version" if $group;
my $sth;
my %failed_list = map {$_ , 1 } @$chunk;
print STDOUT "Fetching data from M&M database $dbname for ".@$chunk." ids\n" if $verbose;
$sth = $dbh->prepare($query);
$sth->execute() or die "Couldn't execute statement: " . $sth->errstr;
while(my $hash = $sth->fetchrow_hashref()) {
my $id = $hash->{'accession_version'};
delete $failed_list{$id};
my $hit_db = $hash->{'data_class'};
if($dbname =~ /uniprot/i) {
if($hit_db eq 'STD') {
$hit_db = 'Swissprot';
} elsif ($hit_db eq 'PRE') {
$hit_db = 'TrEMBL';
}
} else {
$hit_db = 'EMBL';
}
$descriptions->{$id}{database} = $hit_db;
$descriptions->{$id}{taxonid} = $hash->{'ncbi_tax_id'};
if(!$self->is_light_fetch()) {
$descriptions->{$id}{description} = $hash->{'description'};
$descriptions->{$id}{length} = $hash->{'sequence_length'};
}
}
$sth->finish();
push @$failed, keys %failed_list;
}
return $failed; } |
sub new
{ my ( $class, @args ) = @_;
my $self = bless {}, $class;
my ( $analysis , $type , $light , $mm_host , $mm_port , $mm_user ) = rearrange(
[ 'ANALYSIS' , 'TYPE' , 'LIGHT' , 'MM_HOST', 'MM_PORT','MM_USER' ], @args );
$self->analysis($analysis) if $analysis;
$self->is_light_fetch($light) if $light;
throw("Must pass a molecule type, either protein or dna ".
"not a ".$type) unless($type eq 'protein' || $type eq 'dna');
$self->type($type);
$self->mm_host($mm_host || 'cbi3');
$self->mm_port($mm_port || 3306);
$self->mm_user($mm_user || 'genero');
return $self; } |
sub prepare_hit_desc_sth
{ my( $self, $dbobj ) = @_;
my $sth = $dbobj->prepare(qq{
REPLACE INTO hit_description (hit_name
, hit_description
, hit_length
, hit_taxon
, hit_db)
VALUES (?,TRIM(?),?,?,?)
});
return $self->{'_hit_desc_sth'} = $sth; } |
sub write_descriptions
{ my( $self, $dbobj, $id_list, $chunk_size ) = @_;
$chunk_size ||= 1000;
my ($descriptions,$failed) = $self->fetch_descriptions($id_list, $chunk_size);
my $sth = $self->prepare_hit_desc_sth($dbobj);
for my $accession (keys %$descriptions) {
my @desc = map { $descriptions->{$accession}{$_}; } ('description','length','taxonid','database');
eval{
$sth->execute(
$accession,
@desc
);
};
if ($@) {
print STDERR "Unable to fetch description for $accession [$@]\n";
}
}
return $failed; } |