#!/usr/local/bin/perl -w use strict; use Spreadsheet::WriteExcel; # Input sequence file my $file = "batchseq.tfa"; # Execute the infoseq program from the EMBOSS suite # We are interested in name, gi, length, %gc and description # Resulting output is stored in the array @out my @out = `infoseq -only -name -gi -length -pgc -description $file`; # Create a new workbook and save as a file called example.xls # and add a worksheet called Summary to that. my $workbook = Spreadsheet::WriteExcel->new("example.xls"); my $worksheet = $workbook->add_worksheet('Summary'); # Set text bold, red, underlined for values # designated as high my $high_val = $workbook->add_format(); $high_val->set_bold(); $high_val->set_color('red'); $high_val->set_underline(); # NCBI URL to use for retrieving sequence my $ncbi_url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=search &doptcmdl=GenBank&db=Nucleotide&uid="; my $row = 0; # Read one line at a time and split it to get the # name, gi, length, %gc and description foreach my $line (@out) { my ($name,$gi,$length,$pgc,@desc) = split(/\s+/,$line); # Use gi for linking to NCBI; create the url in the worksheet my $url = $ncbi_url . $gi; $worksheet->write($row,0,$name); $worksheet->write($row,1,$url,$gi); $worksheet->write($row,2,$length); # If %gc is > 70 make text red,bold,underlined if($pgc >= 70 ) { $worksheet->write($row,3,$pgc,$high_val); } else { $worksheet->write($row,3,$pgc); } my $full_desc = join(" ",@desc); $worksheet->write($row,4,$full_desc); # Increment $row for putting data on next line $row++; }