Raw content of BioMart::Formatter::CSV_36
# $Id: CSV_36.pm,v 1.5 2007/10/15 15:52:26 ds5 Exp $
#
# BioMart module for BioMart::Formatter::CSV_36
#
# You may distribute this module under the same terms as perl
# itself.
# POD documentation - main docs before the code.
=head1 NAME
BioMart::Formatter::CSV_36
=head1 SYNOPSIS
The CSV_36 Formatter returns tab separated tabular data
for a BioMart query's ResultTable
=head1 DESCRIPTION
When given a BioMart::ResultTable containing the results of
a BioMart::Query the CSV_36 Formatter will return tabular output
with one line for each row of data in the ResultTable and tabs
separating the individual entries in each row. The getDisplayNames
and getFooterText can be used to return appropiately formatted
headers and footers respectively
=head1 AUTHORS
=over
=item *
Damian Smedley
=back
=head1 CONTACT
This module is part of the BioMart project
http://www.biomart.org
Questions can be posted to the mart-dev mailing list:
mart-dev@ebi.ac.uk
=head1 METHODS
=cut
package BioMart::Formatter::CSV_36;
use strict;
use warnings;
# Extends BioMart::FormatterI
use base qw(BioMart::FormatterI);
use Readonly;
use Getopt::Long;
eval{
require Bio::EnsEMBL::SimpleFeature;
require Bio::EnsEMBL::Analysis;
require Bio::EnsEMBL::DBSQL::DBAdaptor;
};
# Constants
Readonly my $FIELD_DELIMITER => q{,};
Readonly my $RECORD_DELIMITER => qq{\n};
Readonly my $FIELD_ENCLOSER => qq{\"};
Readonly my $current_assembly => 'NCBIM37';
Readonly my $new_assembly => 'NCBIM36';
Readonly my $host => '127.0.0.1';
Readonly my $port => '3309';
Readonly my $user => '????';
Readonly my $pass => '????';
Readonly my $dbname => 'mus_musculus_core_47_37';
sub _new {
my ($self) = @_;
$self->SUPER::_new();
# connect to database and get adaptors
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(
-HOST => $host,
-PORT => $port,
-USER => $user,
-PASS => $pass,
-DBNAME => $dbname);
$self->attr('db_adaptor',$db);
}
sub getFormatterDisplayName {
return 'Mouse 36 assembly (CSV)';
}
sub processQuery {
my ($self, $query) = @_;
$self->set('original_attributes',[@{$query->getAllAttributes()}])
if ($query->getAllAttributes());
$query->addAttribute('strand');
$query->addAttribute('chromosome_name');
$self->set('query',$query);
return $query;
}
sub nextRow {
my $self = shift;
my $rtable = $self->get('result_table');
my $row = $rtable->nextRow;
if (!$row){
return;
}
my $chromosome = $$row[-1];
my $strand = $$row[-2];
# convert coordinates to NCBI36 assembly
my @atts_to_convert = qw(
start_position
end_position
transcript_start
transcript_end
feat_chr_start
feat_chr_end
gene_chrom_start
gene_chrom_end
transcript_chrom_start
transcript_chrom_end
exon_chrom_start
exon_coding_end
exon_chrom_end
exon_coding_start
chromosome_location
);
my $attribute_order = $self->get('original_attributes');
my $attribute_number = 0;
my ($orig_attribute,$position,$new_position);
foreach (@$attribute_order){
$orig_attribute = $_->name;
foreach (@atts_to_convert){
if ($orig_attribute eq $_){
# covert coordinates
$position = $$row[$attribute_number];
next if (!$position);
my $db = $self->get('db_adaptor');
my $sa = $db->get_SliceAdaptor();
# create an analysis for the type of feature you wish to store
my $analysis = new Bio::EnsEMBL::Analysis(
-LOGIC_NAME => 'your_analysis'
);
my $slice_oldasm = $sa->fetch_by_region('chromosome', $chromosome, undef, undef,
undef, $current_assembly);
# create a new feature on the old assembly
my $feat = Bio::EnsEMBL::SimpleFeature->new(
-DISPLAY_LABEL => '',
-START => $position,
-END => $position,
-STRAND => $strand,
-SLICE => $slice_oldasm,
-ANALYSIS => $analysis,
);
# project feature to new assembly
my $feat_slice = $feat->feature_Slice;
my @segments;
if ($feat_slice){
@segments = @{ $feat->feature_Slice->project('chromosome', $new_assembly) };
}
# do some sanity checks on the projection results:
# discard the projected feature if
# 1. it doesn't project at all (no segments returned)
# 2. the projection is fragmented (more than one segment)
# 3. the projection doesn't have the same length as the original
# feature
if (scalar(@segments) != 1){
$new_position = '-';
}
elsif ($segments[0]->to_Slice()->length != $feat->length){
$new_position = '-';
}
elsif ($segments[0]->to_Slice()->seq_region_name ne $feat->slice->seq_region_name){
$new_position = '-';
}
else{
$new_position = $segments[0]->to_Slice()->start;
}
$$row[$attribute_number] = $new_position;
}
}
$attribute_number++;
}
my $new_row_length = @$row - 3;
$row = [@$row[0..$new_row_length]];
# Enclose non-numeric values in double quotes & escape the quotes already in them
foreach(@{$row}) {
$_ = q{} unless defined ($_);
if($_ !~ /\A[\d\.]+\z/ && $_ =~ /$FIELD_DELIMITER/) {
$_ =~ s/$FIELD_ENCLOSER/\$FIELD_ENCLOSER/g;
$_ = $FIELD_ENCLOSER . $_ . $FIELD_ENCLOSER;
}
}
# Create the final record-string
return join($FIELD_DELIMITER, @{$row}) . $RECORD_DELIMITER;
}
sub getDisplayNames {
my $self = shift;
my @displayNames = $self->getTextDisplayNames();
# Enclose non-numeric values in double quotes & escape the quotes already in them
foreach(@displayNames) {
if($_ !~ /\A[\d\.]+\z/ && $_ =~ /$FIELD_DELIMITER/) {
$_ =~ s/$FIELD_ENCLOSER/\$FIELD_ENCLOSER/g;
$_ = $FIELD_ENCLOSER . $_ . $FIELD_ENCLOSER;
}
}
# Create the final header string
return join($FIELD_DELIMITER, @displayNames) . $RECORD_DELIMITER;
}
1;