Bio::EnsEMBL::Analysis::RunnableDB
CollapseAffyProbes
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::CollapseAffyProbes
Inherit
Synopsis
my $affy =
Bio::EnsEMBL::Analysis::RunnableDB::CollapseAffyProbes->new(
-db => $refdb,
-analysis => $analysis_obj,
-database => $EST_GENOMIC,
-query_seqs => \@sequences,
);
$affy->fetch_input();
$affy->run();
$affy->output();
$affy->write_output(); #writes to DB and a big fasta file
Description
This object runs the first step in the process for mapping affymetrix probes to a genome.
The probes supplied are redundant - a single probe (characterised by a unique sequence)
may occur in different positions of different arrays. SO we will first collect all
redundant sequences, find a non-redudant subset, and then simultaneously
-- write the probes to our db, and
-- write an output file of non-redundant sequences to a flat file, so that
exonerate can map them against the genome in the next step.
Note that probes are defined as redundant when they share the same
- sequence and
- probeset
Methods
NON_REDUNDANT_PROBE_SEQS | No description | Code |
OUTDB | No description | Code |
QUERYSEQS | No description | Code |
add_array_to_existing_probe | No description | Code |
affy_arrays | No description | Code |
affy_probes | No description | Code |
clean_affy_features | No description | Code |
create_new_array | No description | Code |
create_new_probe | No description | Code |
fetch_input | No description | Code |
get_output_db | No description | Code |
new | No description | Code |
non_redundant_probe_seqs | No description | Code |
populate_affy_arrays_and_probes | No description | Code |
query_file | No description | Code |
read_and_check_config | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
NON_REDUNDANT_PROBE_SEQS | description | prev | next | Top |
sub NON_REDUNDANT_PROBE_SEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_NON_REDUNDANT_PROBE_SEQS'} = $value;
}
if ( exists( $self->{'_NON_REDUNDANT_PROBE_SEQS'} ) ) {
return $self->{'_NON_REDUNDANT_PROBE_SEQS'};
} else {
return undef;
} } |
sub OUTDB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_OUTDB'} = $value;
}
if ( exists( $self->{'_CONFIG_OUTDB'} ) ) {
return $self->{'_CONFIG_OUTDB'};
} else {
return undef;
}
}
1; } |
sub QUERYSEQS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_QUERYSEQS'} = $value;
}
if ( exists( $self->{'_CONFIG_QUERYSEQS'} ) ) {
return $self->{'_CONFIG_QUERYSEQS'};
} else {
return undef;
} } |
sub add_array_to_existing_probe
{ my ($self, $probe, $array, $probeset, $probename) = @_;
my @all_probenames = @{$probe->get_all_probenames};
if(!($probe->probeset eq $probeset)){
throw (
"Inconsistency: have found a probe ".$array->name.":$probeset:$probename with ".
"identical sequence but different probeset to another one: ".$probe->probeset."\n"
);
}
$probe->add_Array_probename($array, $probename); } |
sub affy_arrays
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_affy_arrays'} = $value;
}
if ( exists( $self->{'_affy_arrays'} ) ) {
return $self->{'_affy_arrays'};
} else {
return undef;
}
}
} |
sub affy_probes
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_affy_probes'} = $value;
}
if ( exists( $self->{'_affy_probes'} ) ) {
return $self->{'_affy_probes'};
} else {
return undef;
}
}
} |
sub clean_affy_features
{ my ( $self, @affy_features ) = @_;
my $slice_adaptor = $self->db->get_SliceAdaptor;
my %genome_slices;
foreach my $affy_feature (@affy_features) {
$affy_feature->analysis( $self->analysis );
my $slice_id = $affy_feature->seqname;
if ( not exists $genome_slices{$slice_id} ) {
$genome_slices{$slice_id} = $slice_adaptor->fetch_by_name($slice_id);
}
my $slice = $genome_slices{$slice_id};
$affy_feature->slice($slice);
my $array_name = $affy_feature->probe->get_all_AffyArrays->[0]->name;
my $probe_name = $array_name.":".$affy_feature->probeset.":".$affy_feature->probe->get_all_probenames->[0];
my $real_probe = $self->affy_probes->{$probe_name};
if(!$real_probe){
throw "Inconsistency! I can't find an affy probe corresponding to $probe_name\n";
}
$affy_feature->probe($real_probe);
}
}
} |
sub create_new_array
{ my($self, $array_name) = @_;
my $affy_array =
new Bio::EnsEMBL::AffyArray(
-name => $array_name,
-setsize => 0,
);
}
} |
sub create_new_probe
{ my($self, $affy_array, $probeset, $probe_name, $current_sequence) = @_;
my $affy_probe =
new Bio::EnsEMBL::AffyProbe(
-probeset => $probeset,
-name => $probe_name,
-array => $affy_array
);
return $affy_probe; } |
sub fetch_input
{ my ($self) = @_;
my $logic = $self->analysis->logic_name;
my ($query_file, $chunk_number, $chunk_total);
my $query = $self->QUERYSEQS;
if ( -e $query and -d $query ) {
throw "I need to have all affy probes input in one big file\n";
} elsif ( -e $query and -s $query ) {
$self->query_file($query);
} else {
throw("'$query' refers to something that could not be made sense of\n");
}
$self->non_redundant_probe_seqs($self->NON_REDUNDANT_PROBE_SEQS); } |
sub get_output_db
{ my ($self) = @_;
my $outdb;
if ( $self->OUTDB ) {
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ $self->OUTDB }, -dnadb => $self->db );
} else {
$outdb = $self->db;
}
return $outdb;
}
} |
sub new
{ my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($AFFY_CONFIG);
return $self; } |
sub non_redundant_probe_seqs
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_non_redudant_probe_seqs'} = $value;
}
if ( exists( $self->{'_non_redudant_probe_seqs'} ) ) {
return $self->{'_non_redudant_probe_seqs'};
} else {
return undef;
}
}
} |
sub populate_affy_arrays_and_probes
{ my($self, @args) = @_;
my $query_file = $self->query_file;
my $outdb = $self->get_output_db;
my $affy_array_adaptor = $outdb->get_AffyArrayAdaptor;
my $affy_probe_adaptor = $outdb->get_AffyProbeAdaptor;
open(PROBES, "<".$self->query_file);
while(<PROBES>){
chomp;
/^>probe:(\S+):(\S+):(\S+:\S+;).*$/;
my $array_name = $1;
my $probeset = $2;
my $probe_name = $3;
if (!$array_name){
throw "array name could not be deduced from: ".$_."\n";
}
if( !$probeset){
throw "probeset could not be deduced from: ".$_."\n";
}
if(!$probe_name){
throw "probename could not be deduced from: ".$_."\n";
}
my $affy_array = $affy_array_adaptor->fetch_by_name($array_name);
if(!$affy_array){
$affy_array =
new Bio::EnsEMBL::AffyArray(
-name => $array_name
);
}
$self->affy_arrays->{$array_name} = $affy_array;
my $affy_probe =
$affy_probe_adaptor->fetch_by_array_probeset_probe(
$array_name,
$probeset,
$probe_name
);
if(!$affy_probe){
$affy_probe =
new Bio::EnsEMBL::AffyProbe(
-probeset => $probeset,
-name => $probe_name,
-array => $affy_array
);
}
$self->affy_probes->{$array_name.":".$probeset.":".$probe_name} = $affy_probe;
}
}
} |
sub query_file
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_query_file'} = $value;
}
if ( exists( $self->{'_query_file'} ) ) {
return $self->{'_query_file'};
} else {
return undef;
}
}
} |
sub read_and_check_config
{ my $self = shift;
$self->SUPER::read_and_check_config($AFFY_CONFIG);
my $logic = $self->analysis->logic_name;
foreach my $config_var (
qw(
QUERYSEQS
NON_REDUNDANT_PROBE_SEQS
OUTDB
)
){
if ( not defined $self->$config_var ){
throw("You must define $config_var in config for logic '$logic'");
}
}
if ( $self->OUTDB and ref( $self->OUTDB ) ne "HASH" ) {
throw("OUTDB in config for '$logic' must be a hash ref of db connection pars.");
} } |
sub run
{ my ($self) = @_;
my %probes_by_sequence;
my %affy_arrays;
open( PROBES, "<".$self->query_file);
my $current_sequence = undef;
my $sequence_fragment = undef;
my $array_name = undef;
my $probeset = undef;
my $probe_name = undef;
my $current_affy_array = undef;
my $existing_probe = undef;
while(<PROBES>){
chomp;
if(/^>probe:(\S+):(\S+):(\S+:\S+;).*$/){
if($current_sequence){
if(!$current_affy_array){
throw ("Have sequence $current_sequence but no current array !\n");
}
$existing_probe = $probes_by_sequence{$probeset.":---:".$current_sequence};
if(!$existing_probe){
$existing_probe =
$self->create_new_probe(
$current_affy_array,
$probeset,
$probe_name,
$current_sequence
);
$probes_by_sequence{$probeset.":---:".$current_sequence} = $existing_probe;
}else{
$self->add_array_to_existing_probe(
$existing_probe,
$current_affy_array,
$probeset,
$probe_name
);
}
$current_sequence = undef;
}
$array_name = $1;
$probeset = $2;
$probe_name = $3;
$current_affy_array = $affy_arrays{$array_name};
if(!$current_affy_array){
$current_affy_array = $self->create_new_array($array_name);
$affy_arrays{$array_name} = $current_affy_array;
}
}else{
$sequence_fragment = $_;
if($current_sequence){
$current_sequence = $current_sequence . $sequence_fragment;
}else{
$current_sequence = $sequence_fragment;
}
}
}
$affy_arrays{$array_name} = $current_affy_array;
$existing_probe = $probes_by_sequence{$probeset.":---:".$current_sequence};
if(!$existing_probe){
$existing_probe =
$self->create_new_probe(
$current_affy_array,
$probeset,
$probe_name,
$current_sequence
);
$probes_by_sequence{$probeset.":---:".$current_sequence} = $existing_probe;
}else{
$self->add_array_to_existing_probe(
$existing_probe,
$current_affy_array,
$probeset,
$probe_name
);
}
$self->affy_probes(\%probes_by_sequence);
$self->affy_arrays(\%affy_arrays); } |
sub write_output
{ my ( $self, @output ) = @_;
my $outdb = $self->get_output_db;
my $outfile = $self->non_redundant_probe_seqs;
open (OUTFILE, ">".$outfile);
my $affy_array_adaptor = $outdb->get_AffyArrayAdaptor;
my $affy_probe_adaptor = $outdb->get_AffyProbeAdaptor;
foreach my $affy_array (values %{$self->affy_arrays}){
my $existing_affy_array = $affy_array_adaptor->fetch_by_name($affy_array->name);
if(!$affy_array->dbID){
eval{ $affy_array_adaptor->store($affy_array) };
if ($@) {
$self->throw("Unable to store affy array!\n $@");
}
}
}
foreach my $probeset_sequence_key (keys %{$self->affy_probes}){
my $affy_probe = $self->affy_probes->{$probeset_sequence_key};
if(!$affy_probe->dbID){
eval{ $affy_probe_adaptor->store($affy_probe) };
if ($@) {
$self->throw("Unable to store affy probe!\n $@");
}
}
$probeset_sequence_key =~ /.*:---:(.*)/;
my $sequence = $1;
print OUTFILE ">".$affy_probe->dbID."\n";
print OUTFILE $sequence."\n";
}
close(OUTFILE) or throw("Failed top close ".$outfile);
}
} |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _