#!/usr/bin/perl -X
use strict;
use warnings;
use File::Copy;
use Getopt::Long;
use Bio::Phylo::Util::Logger ':levels';
use Bio::Tools::Run::StandAloneBlastPlus;

# this script iterates over the */rna.fasta files, and uses each of these as a query
# to blast against the indexed database

my $datadir = $ENV{'DATADIR'};
my $verbosity = INFO;
my $blastdb;
GetOptions(
	'datadir=s' => \$datadir,
	'blastdb=s' => \$blastdb,
	'verbose+'  => \$verbosity,		
);

# instantiate logger
Bio::Phylo::Util::Logger->new(
	'-level' => $verbosity,
	'-class' => 'main',
);


# check to see if we have a DB
if ( not $blastdb ) {
	INFO "no -blastdb argument provided, looking for a default";
	my @db = <${datadir}/genomes/*.fasta>;
	if ( @db != 1 ) {
		ERROR "no -blastdb argument and not exactly 1 *.fasta file in $datadir/genomes/";
		exit(1);
	}
	else {
		($blastdb) = @db;
		INFO "using $blastdb by default";
	}
}
else {
	INFO "using provided -blastdb $blastdb";
}
my $factory = Bio::Tools::Run::StandAloneBlastPlus->new( '-db_data' => $blastdb );

# iterate over the sequences
opendir my $toplevel, "${datadir}/genes" or die $!;
while( my $entry = readdir $toplevel ) {
	my $file = "${datadir}/genes/${entry}/rna.fasta";
	if ( -e $file ) {

		# run the query, collect matches
		my %matches;
		my $report = $factory->blastn( '-query' => $file );
		INFO $file;
		while ( my $hit = $report->next_hit ) {
			my $defline = '>' . $hit->name;
			my @seq;
			while( my $hsp = $hit->next_hsp ) {
				my $ident = $hsp->percent_identity;
				if ( $ident != 100 ) {
					INFO $ident;
				}
				push @seq, $hsp->seq_str('query');
			}
			$matches{$defline} = join( '', @seq ) if scalar(@seq);
		}

		# write matches to new file
		my $unaligned = "${datadir}/genes/${entry}/rna-unaligned.fasta";
		copy( $file, $unaligned );
		open my $out, '>>', $unaligned or die $!;
		for my $acc ( keys %matches ) {
			print $out "\n", $acc, "\n", $matches{$acc};
		}
		close $out;
	}
}

# remove temp files
$factory->cleanup;
