None available.
sub run
{ my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files = shift;
my $release_file = shift;
$verbose = shift;
my $data_file = @{$files}[0];
my %pre_xref_ids = (
'Uniprot' => $self->get_valid_codes( 'uniprot', $species_id ),
'Interpro' => $self->get_valid_codes( 'interpro', $species_id )
);
my %xref_ids;
my $data_io = $self->get_filehandle($data_file);
my ( $count_read, $count_skipped, $last_count_read ) = ( 0, 0, 0 );
my $status_interval = 30;
local $SIG{ALRM} = sub {
printf( "%d lines read, %d skipped, %d parsed; %d lines/s\n",
$count_read, $count_skipped,
$count_read - $count_skipped,
( $count_read - $last_count_read )/$status_interval ) if($verbose); $last_count_read = $count_read;
alarm($status_interval);
};
alarm($status_interval);
while ( defined( my $line = $data_io->getline() ) ) {
++$count_read;
if ( substr( $line, 0, 1 ) eq '#' ) { ++$count_skipped; next }
chomp($line);
my @fields = split( /\t/, $line );
if (
!( defined( $fields[1] )
&& $fields[1] eq 'FlyBase'
&& defined( $fields[2] )
&& exists( $object_types{ $fields[2] } ) ) )
{
++$count_skipped;
next;
}
my %attributes;
foreach my $attribute ( split( /;/, $fields[8] ) ) {
my ( $key, $value ) = split( /=/, $attribute );
if ( $key ne '' && $value ne '' ) {
$attributes{$key} = $value;
}
}
my $id = $attributes{'ID'};
my $type;
if ( substr( $id, 0, 4 ) eq 'FBgn' ) { $type = 'gene' }
elsif ( substr( $id, 0, 4 ) eq 'FBtr' ) { $type = 'transcript' }
elsif ( substr( $id, 0, 4 ) eq 'FBpp' ) { $type = 'translation' }
else { $type = 'unknown' }
foreach my $attribute_key ( 'Dbxref', 'Ontology_term' ) {
if ( exists( $attributes{$attribute_key} ) ) {
my %tmphash;
foreach
my $subattribute ( split( /,/, $attributes{$attribute_key} ) )
{
my ( $key, $value ) = split( /:/, $subattribute, 2 );
push( @{ $tmphash{$key} }, $value );
}
$attributes{$attribute_key} =\% tmphash;
}
}
my $dbxref = $attributes{'Dbxref'};
foreach my $dbxref_name ( keys( %{$dbxref} ) ) {
if ( exists( $source_name_map{$dbxref_name} ) ) {
my $source_name = $source_name_map{$dbxref_name};
my $source_id =
$self->get_source_id_for_source_name($source_name);
my ($pre_source) = ( $source_name =~ /^(Uniprot|Interpro)/ );
if ( defined($pre_source) ) {
foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) {
if ( exists( $pre_xref_ids{$pre_source}{$accession} ) ) {
$self->add_direct_xref(
$pre_xref_ids{$pre_source}{$accession},
$id, $type, '' );
$xref_ids{$pre_source}{$accession} =
$pre_xref_ids{$pre_source}{$accession};
} else {
$xref_ids{ $pre_source . ' (missed)' }{$accession} = -1;
}
}
} else {
foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) {
my $xref_id;
if ( exists( $xref_ids{$source_name}{$accession} ) ) {
$xref_id = $xref_ids{$source_name}{$accession};
} else {
if ( $dbxref_name eq 'bdgpinsituexpr' ) {
$xref_id =
$self->add_xref( $id, undef, $accession, '',
$source_id, $species_id );
} else {
$xref_id =
$self->add_xref( $accession, undef, $accession, '',
$source_id, $species_id );
}
$xref_ids{$source_name}{$accession} = $xref_id;
}
$self->add_direct_xref( $xref_id, $id, $type, '' );
}
}
} }
if ( exists( $attributes{'Ontology_term'}{'GO'} ) ) {
my $source_name = 'GO';
my $source_id =
$self->get_source_id_for_source_name($source_name);
foreach my $accession ( @{ $attributes{'Ontology_term'}{'GO'} } )
{
my $xref_id;
if ( exists( $xref_ids{$source_name}{$accession} ) ) {
$xref_id = $xref_ids{$source_name}{$accession};
} else {
$xref_id =
$self->add_xref( $accession, undef, $accession, '',
$source_id, $species_id );
$xref_ids{$source_name}{$accession} = $xref_id;
}
$self->add_direct_xref( $xref_id, $id, $type, '' );
}
}
if ( exists( $dbxref->{'FlyBase_Annotation_IDs'} ) ) {
my $source_name = $special_source_name_map{$type}{'Dbxref'};
my $source_id =
$self->get_source_id_for_source_name($source_name);
foreach my $accession ( @{ $dbxref->{'FlyBase_Annotation_IDs'} } )
{
my $xref_id;
if ( exists( $xref_ids{$source_name}{$accession} ) ) {
$xref_id = $xref_ids{$source_name}{$accession};
} else {
$xref_id =
$self->add_xref( $accession, undef, $accession, '',
$source_id, $species_id );
$xref_ids{$source_name}{$accession} = $xref_id;
}
$self->add_direct_xref( $xref_id, $id, $type, '' );
}
}
{
my $source_name = $special_source_name_map{$type}{'Name'};
my $source_id =
$self->get_source_id_for_source_name($source_name);
my $accession = $attributes{'Name'};
my $xref_id;
if ( exists( $xref_ids{$source_name}{$accession} ) ) {
$xref_id = $xref_ids{$source_name}{$accession};
} else {
$xref_id =
$self->add_xref( $id, undef, $accession, '', $source_id,
$species_id );
$xref_ids{$source_name}{$accession} = $xref_id;
}
$self->add_direct_xref( $xref_id, $id, $type, '' );
}
{
my $source_name = $special_source_name_map{$type}{'ID'};
my $source_id =
$self->get_source_id_for_source_name($source_name);
my $accession = $id;
my $xref_id;
if ( exists( $xref_ids{$source_name}{$accession} ) ) {
$xref_id = $xref_ids{$source_name}{$accession};
} else {
$xref_id =
$self->add_xref( $accession, undef, $accession, '',
$source_id, $species_id );
$xref_ids{$source_name}{$accession} = $xref_id;
}
$self->add_direct_xref( $xref_id, $id, $type, '' );
}
} $data_io->close();
print("FlybaseParser Summary:\n") if($verbose);
foreach my $label ( sort( keys(%xref_ids) ) ) {
my $accessions = $xref_ids{$label};
printf( "\t%-32s %6d\n", $label, scalar( keys( %{$accessions} ) ) ) if($verbose);
}
}
1; } |