#!/usr/bin/perl -w # The purpose of this script is to learn Perl # command line arguments, file I/O, and # pattern matching # Author: MyName # Version 0.1, 27 Sep 03 use strict; my ($infile, $def, $seq, $len, $n_found); # Get filename from command line argument $infile = $ARGV[0]; # Verify that an argument was provided if (!defined $infile) { print "Missing filename argument\n"; print "Usage: $0 FASTAfile\n"; exit(-1); } # Open the input and output files open(INFILE, "<$infile") or die "Cannot open file $infile\n"; # Make the output filename related to the # input filename open(OUTFILE, ">$infile.out") or die "Cannot open $infile.out for writing\n"; # End of Part I # Read sequences from file and look for CAGs while ($def = ) { chomp($def); # Remove > from the description and print # progress message to STDOUT $def =~ s/>//; print "Sequence $def being processed...\n"; # Now read the sequence from the file $seq = ; chomp($seq); # Convert sequence to upper case $seq = uc($seq); # Check that the DNA sequence is valid if ($seq =~ m/[^ACGT]/) { print "Invalid DNA bases in $infile : $def \n"; next; } # Find the length of the sequence $len = length($seq); # Count number of CAG's $n_found = ($seq =~ s/CAG/CAG/g); # Check to see whether number found > 50 # or number found <= 0 # If either of these is true, print msg to outfile if ($n_found >= 50) { print OUTFILE "$def: found $n_found "; print OUTFILE "CAG in $len bases \n"; } elsif ($n_found <= 0) { print OUTFILE "$def: CAG not found!\n"; } } # End of while # End of Parts II, III, and IV close (INFILE); close (OUTFILE);