package EnsEMBL::Web::Component::LDtable;
# Puts together chunks of XHTML for LD-based displays
use EnsEMBL::Web::Component;
our @ISA = qw( EnsEMBL::Web::Component);
use POSIX qw(floor ceil);
use strict;
use warnings;
no warnings "uninitialized";
sub ld_values {
### Arg1 : The data object
### Example : my $return = ld_values($object);
### Description : Array of pairwise values which can then be formatted to be a table
### in text, html, excel format etc. Only the bottom left half of the
### in text, html, excel format etc. Only the bottom left half of the
### table''s values are filled in to avoid duplicating the data
### displayed.
### The last SNP column is not rendered as all pairwise data
### for this SNP is already displayed elsewhere in the table
### The first SNP row is not displayed as this would just
### duplicated data
### Returns hashref
### Return info :
### Keys are the type of LD data (r2, dprime)
### For each $return{"r2"} there are two keys: data, and text
### The text string is the title for the table
### The data is an array ref with three array refs: start postitions,
### snpnames and the rest of the table data
### Each value in the array is an arrayrefs with
### arrayref of SNP start positions in basepair (start order)
### Arrayref of SNP names in start order
### Arrayref 2 dimensional array of LD values
my $object = shift;
my %pops = %{ _get_pops_from_param($object) };
unless (keys %pops) {
warn "****** ERROR: No population defined";
return;
}
# Header info -----------------------------------------------------------
# Check there is data to display
my $zoom = $object->param('w')|| 50000;
my %return;
my $display_zoom = $object->round_bp($zoom);
foreach my $pop_name (sort keys %pops ) {
my $pop_obj = $object->pop_obj_from_name($pop_name);
next unless $pop_obj;
my $pop_id = $pop_obj->{$pop_name}{dbID};
my $data = $object->ld_for_slice($pop_obj->{$pop_name}{'PopObject'}, $zoom);
foreach my $ldtype ( "r2", "d_prime" ) {
my $display = $ldtype eq 'r2' ? "r2" : "D'";
my $nodata = "No $display linkage data in $display_zoom window for population $pop_name";
unless (%$data && keys %$data) {
$return{$ldtype}{$pop_name}{"text"} = $nodata;
next;
}
my @snp_list =
sort { $a->[1]->start <=> $b->[1]->start }
map { [ $_ => $data->{'variationFeatures'}{$_} ] }
keys %{ $data->{'variationFeatures'} };
unless (scalar @snp_list) {
$return{$ldtype}{$pop_name}{"text"} = $nodata;
next;
}
# Do each column starting from 1 because first col is empty---------
my @table;
my $flag = 0 ;
for (my $xcounter=0; $xcounter < scalar @snp_list; $xcounter++) {
# Do from left side of table row across to current snp
for (my $ycounter= 0; $ycounter < $xcounter; $ycounter++) {
my $ld_pair1 ="$snp_list[$xcounter]->[0]".-$snp_list[$ycounter]->[0];
my $ld_pair2 ="$snp_list[$ycounter]->[0]".-$snp_list[$xcounter]->[0];
my $cell;
if ( $data->{'ldContainer'}{$ld_pair1}) {
$cell = $data->{'ldContainer'}{$ld_pair1}{$pop_id}{$ldtype};
}
elsif ( $data->{'ldContainer'}{$ld_pair2}) {
$cell = $data->{'ldContainer'}{$ld_pair2}{$pop_id}{$ldtype};
}
$flag = $cell ? 1 : 0 unless $flag;
$table[$xcounter][$ycounter] = $cell;
}
}
unless ($flag) {
$return{$ldtype}{$pop_name}{"text"} = $nodata;
next;
}
# Turn snp_list from an array of variation_feature IDs to SNP 'rs' names
# Make current SNP bold
my @snp_names;
my @starts_list;
my $snp = $object->param('snp') || "";
foreach (@snp_list) {
my $name = $_->[1]->variation_name; #name
if ($name eq $snp or $name eq "rs$snp") {
push (@snp_names, "*$name*");
} else {
push (@snp_names, $name); }
my( $start, $end ) = ($_->[1]->start, $_->[1]->end ); #position
my $pos = $start;
if($start > $end ) {
$pos = "between $start & $end";
} elsif($start < $end ) {
$pos = "$start-"."$end";
}
push (@starts_list, $pos);
}
my $location = $object->seq_region_name .":".$object->seq_region_start.
"-".$object->seq_region_end;
$return{$ldtype}{$pop_name}{"text"} = "Pairwise $display values for $location. Population: $pop_name";
$return{$ldtype}{$pop_name}{"data"} = [\@starts_list, \@snp_names, \@table];
} # end foreach
}
return \%return;
}
###############################################################################
sub html_lddata { return output_lddata( @_ ); }
sub text_lddata { return output_lddata( @_ ); }
sub excel_lddata { return output_lddata( @_ ); }
sub output_lddata {
### The arguments are either a string with "No data" message or Arrayref
### Each value in the array is an arrayrefs with arrayref of
### SNP start positions in basepair (start order),
### arrayref of SNP names in start order,
### Arrayref 2 dimensional array of LD values,
### Example : $self->excel_ldtable({$title, \@starts, \@snps, \@table});
### Description : prints excel formatted table for LD data
my ($panel, $object ) = @_;
my $return = ld_values($object);
return 1 unless %$return;
# Formatting
my $renderer = $panel->renderer;
my $table_renderer = $renderer->new_table_renderer();
my @colour_gradient = @{ _get_colour_gradient($object) };
my %populations = ();
foreach my $ldtype (keys %$return) {
foreach my $pop_name (keys %{$return->{$ldtype}}) {
$populations{$pop_name}=1;
}
}
foreach my $pop_name ( sort {$a cmp $b } keys %populations ) {
my $flag = 1;
foreach my $ldtype (keys %$return) {
my $C = 0;
unless ( $return->{$ldtype}{$pop_name}{"data"} ) {
next;
}
$C++;
my ( $starts, $snps, $table_data ) = ( @{$return->{$ldtype}{$pop_name}{"data"} });
(my $T = $pop_name ) =~ s/[^\w\s]/_/g;
warn "SHEET NAME: $ldtype $T";
if( $flag ) {
$table_renderer->new_sheet( "$T" ); # Start a new sheet (and new table)
$flag = 0;
} else {
$table_renderer->new_table(); # Start a new table!
}
$table_renderer->set_width( 2 + @$snps );
$table_renderer->heading( $return->{$ldtype}{$pop_name}{"text"} );
$table_renderer->new_row();
$table_renderer->write_header_cell( "bp position" );
$table_renderer->write_header_cell( "SNP" );
foreach( @$snps ) {
$table_renderer->write_header_cell( $_ );
}
$table_renderer->new_row();
unshift (@$table_data, []);
foreach my $table_row (@$table_data) {
next unless ref $table_row eq 'ARRAY';
my $snp = shift @$snps;
my $pos = shift @$starts;
$table_renderer->write_header_cell( $pos );
$table_renderer->write_header_cell( $snp );
my $col =2;
my @ld_values = ( map { $_ ? sprintf("%.3f", $_ ): '-' } @$table_row );
foreach my $value (@ld_values) {
my $format = $table_renderer->new_format({
'align' => 'center',
'bgcolor' => $value eq '-' ? 'ffffff' : $colour_gradient[floor($value*40)]
});
$table_renderer->write_cell( $value, $format );
}
$table_renderer->write_header_cell( $snp );
$table_renderer->new_row;
}
}
}
$table_renderer->clean_up;
}
sub text_haploview {
### Format: olumns of family, individual, father, mother, gender, affected status and genotypes
my ($panel, $object) = @_;
my %pops = _get_pops_from_param($object);
return unless keys %pops;
my $snps = $object->get_variation_features;
my %ind_data = %{ $object->individual_table };
unless (%ind_data) {
$panel->print("No individual genotypes for this SNP");
return 1;
}
}
sub _get_pops_from_param {
### Arg1 : Proxy object
### Gets population names from 'bottom' CGI parameter and puts them
### into a hash with population names as keys
### Returns hashref
my ($object) = @_;
my %pops = ();
my @bottom = $object->param('bottom');
foreach my $tmp ( @bottom ) {
foreach (split /\|/, $tmp) {
next unless $_ =~ /opt_pop_(.*):(\w*)/;
$pops{$1} = 1 if $2 eq 'on';
}
}
return \%pops;
}
sub _get_colour_gradient {
my ($object) = @_;
my $image_config = $object->image_config_hash( 'ldview' );
my @colour_gradient = ('ffffff', $image_config->colourmap->build_linear_gradient( 41,'mistyrose', 'pink', 'indianred2', 'red' ));
return \@colour_gradient || [];
}
#--------------------------------------------------------------------
sub haploview_dump {
my( $panel, $object ) = @_;
my $FN = $object->temp_file_name( undef, 'XXX/X/X/XXXXXXXXXXXXXXX' );
my ($PATH,$FILE) = $object->make_directory( $object->species_defs->ENSEMBL_TMP_DIR_IMG."/$FN" );
my $ped_file = $object->species_defs->ENSEMBL_TMP_DIR_IMG."/$FN.ped";
my $info_file = $object->species_defs->ENSEMBL_TMP_DIR_IMG."/$FN.txt";
my $ped_url = $object->species_defs->ENSEMBL_TMP_URL_IMG."/$FN.ped";
my $info_url = $object->species_defs->ENSEMBL_TMP_URL_IMG."/$FN.txt";
my $both_url = $object->species_defs->ENSEMBL_TMP_URL_IMG."/$FN.tar.gz";
haploview_files( $ped_file, $info_file, $object );
system( "cd $PATH; tar cf - $FILE.ped $FILE.txt | gzip -9 > $FILE.tar.gz" );
$panel->print(qq (<p>
Your export has been processed successfully. You can download
the exported data by following the links below.
</p>
<ul>
<li><strong>Genotype file:</strong> <a target="_blank" href="$ped_url">genotypes.ped</a> [Genotypes in linkage format]</li>
<li><strong>Locus information:</strong> <a target="_blank" href="$info_url">marker_info.txt</a> [Locus information file]</li>
</ul>
<p><strong>OR</strong>
<ul>
<li><strong>Combined file:</strong> <a href="$both_url">haploview_files.tar.gz</a></li>
</ul>
<p>These files can be uploaded into the <a href="http://www.broad.mit.edu/mpg/haploview/index.php">Haploview</a> software for further haplotype analysis. The linkage file name must end in ".ped" and
the marker one in ".txt".</p>
)
);
return 1;
}
sub haploview_files {
my( $PED, $INFO, $object ) = @_;
open PED, ">$PED";
open INFO, ">$INFO";
my %ind_genotypes;
my %individuals;
my @snps;
#gets all genotypes in the Slice as a hash. where key is region_name-region_start
my $slice_genotypes = $object->get_all_genotypes();
foreach my $vf ( @{ $object->get_variation_features } ) {
my ($genotypes, $ind_data) = $object->individual_genotypes($vf,$slice_genotypes);
next unless %$genotypes;
my $name = $vf->variation_name;
print INFO join " ", $name, $vf->start."\n";
push @snps, $name;
#ind_genotypes{individual}{snp} = genotype
map { $ind_genotypes{$_}{$name} = $genotypes->{$_} } (keys %$genotypes);
map { $individuals{$_} = $ind_data->{$_} } (keys %$ind_data);
}
my $family;
foreach my $individual (keys %ind_genotypes) {
my $output = join "\t", ("FAM".$family++,
$individual,
$individuals{$individual}{father},
$individuals{$individual}{mother},
$individuals{$individual}{gender},
"0\t");
foreach my $snp (@snps) {
my $genotype = $ind_genotypes{$individual}{$snp} || "00";
$genotype =~ tr/ACGTN/12340/;
$output .= join " ", (split //, $genotype);
$output .= "\t";
}
print PED "$output\n";
}
close PED;
close INFO;
}
1;