sub read_array_data
{ my ($self, $design_notes) = @_;
$self->log("Reading and importing array data");
$self->{'design_notes'} = $design_notes if $design_notes;
my ($line, $array, $array_chip, @data, %hpos);
my $oa_adaptor = $self->db->get_ArrayAdaptor();
my $ac_adaptor = $self->db->get_ArrayChipAdaptor();
my $fh = open_file("<", $self->{'design_notes'});
while ($line = <$fh>){
$line =~ s/\r*\n//; @data = split/\t/o, $line;
if($. == 1){
%hpos = %{$self->set_header_hash(\@data, $self->get_def('notes_fields'))};
next;
}
if(! defined $array ){
$array = Bio::EnsEMBL::Funcgen::Array->new
(
-NAME => $self->array_name() || $data[$hpos{'DESIGN_NAME'}],
-FORMAT => uc($self->format()),
-VENDOR => uc($self->vendor()),
-TYPE => 'OLIGO',
-DESCRIPTION => $data[$hpos{'DESCRIPTION'}], );
($array) = @{$oa_adaptor->store($array)};
$array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
-ARRAY_ID => $array->dbID(),
-NAME => $data[$hpos{'DESIGN_NAME'}],
-DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
);
($array_chip) = @{$ac_adaptor->store($array_chip)};
$array->add_ArrayChip($array_chip);
}
elsif((! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])) && ($self->array_set())){
$self->log("Generating new ArrayChip(".$data[$hpos{'DESIGN_NAME'}].". for same Array ".$array->name()."\n");
$array_chip = Bio::EnsEMBL::Funcgen::ArrayChip->new(
-ARRAY_ID => $array->dbID(),
-NAME => $data[$hpos{'DESIGN_NAME'}],
-DESIGN_ID => $data[$hpos{'DESIGN_ID'}],
);
($array_chip) = @{$ac_adaptor->store($array_chip)};
$array->add_ArrayChip($array_chip);
}
elsif(! $array->get_ArrayChip_by_design_id($data[$hpos{'DESIGN_ID'}])){
throw("Found experiment with more than one design without -array_set");
}
}
$self->add_array($array);
close($fh);
return; } |
sub read_probe_data
{ my ($self, $array_file) = @_;
$self->log("Reading and importing probe data");
my ($fh, $line, @data, @log, %hpos, %probe_pos); my $aa = $self->db->get_AnalysisAdaptor();
my $manal = $aa->fetch_by_logic_name('MASCycles');
my $uanal = $aa->fetch_by_logic_name('UScore');
my $tmanal= $aa->fetch_by_logic_name('NimblegenTM');
$array_file ||= $self->array_file();
$self->log("Parsing ".$self->vendor()." probe data (".localtime().")");
throw("DesignDefs only accomodates a tiling design with no feature/probesets") if ($self->format() ne 'TILED');
my $slice_a = $self->db->get_SliceAdaptor();
my $cs = $self->db->get_FGCoordSystemAdaptor()->fetch_by_name_schema_build_version(
'chromosome',
$self->db->_get_schema_build($self->db->dnadb())
);
my @arrays = @{$self->arrays()};
if(scalar(@arrays) != 1){
throw("Array DESIGN imports only accomodate one Array per import, please check ".$self->{'design_notes'});
}
my @achips = @{$arrays[0]->get_ArrayChips()};
if(scalar(@achips) != 1){
throw("Array DESIGN imports only accomodates one ArrayChip per import, please check ".$self->{'design_notes'});
}
my $achip = $achips[0];
$self->log("Importing array design(".$achip->name().") from ".$array_file);
if($achip->has_status('IMPORTED')){
$self->log("Skipping fully imported ArrayChip:\t".$achip->design_id());
return;
}elsif($self->recovery()){
$self->log("Rolling back partially imported ArrayChip:\t".$achip->design_id());
$self->db->rollback_ArrayChip($achip);
}
$self->log("Importing ArrayChip:".$achip->design_id());
$fh = open_file("<", $array_file);
my $f_out = open_file(">", $self->get_dir("output")."/probe.".$achip->name()."fasta") if($self->{'_dump_fasta'});
my ($op, $of, %pfs);
my $anal = $self->db->get_AnalysisAdaptor()->fetch_by_logic_name("TileMap"); my $strand = 0; my $fasta = "";
while($line = <$fh>){
$line =~ s/\r*\n//;
@data = split/\t/o, $line;
my $loc = "";
if ($. == 1){
%hpos = %{$self->set_header_hash(\@data, $self->get_def('prb_fields'))};
next;
}
if(%pfs){
$self->store_set_probes_features($achip->dbID(),\% pfs);
undef %pfs;
}
$op = Bio::EnsEMBL::Funcgen::Probe->new(
-NAME => $data[$hpos{'PROBE_ID'}],
-LENGTH => $data[$hpos{'LENGTH'}],
-ARRAY => $arrays[0],
-ARRAY_CHIP_ID => $achip->dbID(),
-CLASS => 'DESIGN',
);
$op->add_Analysis_score($manal, $data[$hpos{'MAS_CYCLES'}]);
$op->add_Analysis_score($tmanal, $data[$hpos{'TM'}]);
$op->add_Analysis_CoordSystem_score($uanal, $cs, $data[$hpos{'UNIQUENESS_SCORE'}]);
%{$pfs{$data[$hpos{'PROBE_ID'}]}} = (
probe => $op,
features => [],
);
if(! $self->cache_slice($data[$hpos{'SEQ_ID'}])){
warn("Skipping non-standard probe chromosome");
undef %pfs;
next;
}
my $end = ($data[$hpos{'POSITION'}] + $data[$hpos{'LENGTH'}]);
if ($self->{'_dump_fasta'}){
$loc .= $data[$hpos{'SEQ_ID'}].":".$data[$hpos{'POSITION'}]."-${end};";
}
$of = Bio::EnsEMBL::Funcgen::ProbeFeature->new
(
-START => $data[$hpos{'POSITION'}],
-END => $end,
-STRAND => $strand,
-SLICE => $self->cache_slice($data[$hpos{'SEQ_ID'}]),
-ANALYSIS => $anal,
-MISMATCHCOUNT => 0,
-CIGAR_LINE => $data[$hpos{'LENGTH'}].'M',
-PROBE => undef, );
push @{$pfs{$data[$hpos{'PROBE_ID'}]}{'features'}}, $of;
if($self->{'_dump_fasta'}){
$fasta .= ">".$data[$hpos{'PROBE_ID'}]."\t".$data[$hpos{'CHROMOSOME'}].
"\t$loc\n".$data[$hpos{'PROBE_SEQUENCE'}]."\n";
}
}
$self->store_set_probes_features($achip->dbID(),\% pfs);
$self->log(join("\n", @log));
$achip->adaptor->set_status("IMPORTED", $achip);
$self->log("ArrayChip:\t".$achip->design_id()." has been IMPORTED");
if ($self->{'_dump_fasta'}){
print $f_out $fasta if($self->{'_dump_fasta'});
close($f_out);
}
$self->log("Finished parsing probe data");
return;
}
1; } |
sub set_defs
{ my ($self) = @_;
warn "Change all this to new method and check for mandatory params";
my %array_defs = (
DESIGN => {
probe_data => ["probe"],
prb_fields => ['SEQ_ID', 'POSITION', 'LENGTH', 'PROBE_SEQUENCE', 'PROBE_ID', 'UNIQUENESS_SCORE', 'TM', 'MAS_CYCLES'],
notes_fields => ['DESIGN_ID', 'DESIGN_NAME', 'DESCRIPTION'],
notes_file => $self->get_dir("data")."/input/".$self->vendor()."/".$self->name()."/DesignNotes.txt",
},
);
$self->{'array_defs'} = $array_defs{$self->vendor()};
return; } |
This module was written by Nathan Johnson.