Perl Code for 40scan.pl

Here is the Perl code for 40scan.pl, including all the comments over the years. The program is intended to count the characters between pairs of double quotes and report the strings over 40 characters. To check a different length change $checklen near the beginning of the program.

40scan.pl has been updated to use lexical filehandles now. While it may have worked before, it appears to work better now. Each file will get a new page and header information.


#!/usr/bin/env perl
#  40scan.pl
#  A script to count the characters between pairs of double quote marks
#    on lines from a text file, as in SPSS VARIABLE LABELS and VALUE
#    LABELS statements.
# 
#  Kent Nassen, v1.0: 2/14/95
#      v1.1: 2/16/98 -- added multiple file capability, test for file 
#                       existence, and a length variable ($checklen) so 
#                       that changing the length to check for is easier.
#      v1.2: 5/12/98 -- added message to print when no long quotes are found.
#      v1.3: 8/20/98 -- fixed longlines count, added message with count of no. 
#                       of longlines, changed all references to 40 to 
#                       $checklen in anticipation of adding a commandline
#                       length option.
#      v1.4: 9/03/98 -- Added formatted output (page numbers, headers,
#                       etc.) to make printing long lists easier.
#      v1.5: 9/22/98 -- Improved output formatting a bit (number of lines
#                       in file, line number of longest quoted text, page 
#                       header). Each file's output now starts on a new page.
#      v1.6: 9/16/25 -- Updated to lexical filehandles and tested with use
#                       strict and use warnings.
# 
#  :set tabstop=4
# 
#  SYNTAX:  40scan filename

#use strict;
#use warnings;

our ($version, $file, $filename, $lines, $line, $ProgName,
    $file_to_process, $pagesize, $checklen, $testcount, $maxcount, $maxline,
    $longlines);

# $version="v1.1, 2/16/98";
# $version="v1.2, 5/12/98";
# $version="v1.3, 8/20/98";
# $version="v1.4, 9/03/98";
# $version="v1.5, 9/22/98";
$version="v1.6, 9/16/25";

# NOTE: $checklen sets the length of quoted text to search for
# NOTE: $pagesize is the number of lines to print per page
$file="",$checklen=40, $pagesize=54;
STDOUT->format_lines_per_page($pagesize);

(my $ProgName = $0) =~ s%.*/%%;  # Unix
# ($ProgName = lc $0) =~ s%.*\\%%;  # DOS/Windows

if ($#ARGV<0) { 
    &DisplayUsage;
	print STDERR "   *** I need a filename!\n\n";
    exit 1;
}

foreach $file (@ARGV) { 
    $file_to_process = $file;
	process($file); 
}
	
sub process {
    # reset page numbers and lines remaining for each file
    $% = 0; # page num
    $- = 0; # lines remaining
    $lines=$testcount=$maxcount=$longlines=0;
	($file_to_process) = @_;
    my $fh;
    open $fh, '<', $file_to_process or do {
         print STDERR "\n    *** $ProgName: Can't open '$file_to_process': $!\n\n";
         return;
    };
	while ($line = <$fh>) {
		chomp($line);
        #print $line,"\n";
        $lines++;
		if ( $line =~ m/".*?".*(".*?")/ ) {
            $line=$1;
			$testcount=length($1) - 2;
			if ( $testcount > $checklen ) { $longlines++; write; }
            if ($testcount > $maxcount) { $maxcount=$testcount; $maxline=$.; }
		next;
		}
		if ( $line =~ m/(".*?")/ ) {
            $line=$1;
			$testcount=length($1) - 2;
			if ( $testcount > $checklen ) {	$longlines++; write; }
            if ($testcount > $maxcount) { $maxcount=$testcount; $maxline=$.; }
		next;
		}
	}
   	if (!$longlines) { 
        $testcount=0;
        $_ = "";
        $. = 0;
        write;
        if (!$maxcount) { print "\n    *** No quoted text found.\n"; }
        else {
		    print "\n    *** No quoted text over $checklen characters long.\n"; 
        }
        if ($lines==1) { print "    There was $lines line in the file.\n"; }
        else { print "    There were $lines lines in the file.\n"; }
        if ($maxcount) {
            print "    The longest quoted text found was $maxcount characters long at line $maxline.\n";
        }
        $- = 0;
	}
	else { 
        if ($longlines==1) {
		    print "\n    $longlines line had quoted text over $checklen characters long.\n"; 
        }
        else {
		    print "\n    $longlines lines had quoted text over $checklen characters long.\n"; 
        }
        if ($lines==1) { print "    There was $lines line in the file.\n"; }
        else { print "    There were $lines lines in the file.\n"; }
        print "    The length of the longest quoted text found was $maxcount characters at line $maxline.\n";
	}
	close $fh;
	print "\n";
}

sub DisplayUsage {
    print STDERR "\n  $ProgName: Find quotes longer than $checklen characters (e.g., check variable\n";
    print STDERR "               and value labels).  by Kent Nassen, $version\n";

    print STDERR "\n   Usage: $ProgName [filename...]\n",
		  "        (multiple filenames or wildcards are accepted if\n",
          "        your shell can handle them)\n",
          "\n",
          "   Examples: $ProgName sp6360.sps  or  $ProgName *.sps\n\n";
}

format STDOUT_TOP =

   @||||@||
   "Page",$%

   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   "$ProgName: Find quotes longer than $checklen characters";
   @<<<<<<<<<<<<<<<<<<<<<<<<<<<< 
   "by Kent Nassen, $version";
   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   "Scanning for long quoted text in the file: ",$file

   Line#     Length                             Line Contents
  -------   --------  --------------------------------------------------------------
.

format STDOUT =
  @>>>>>> @>>>>>>    @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
$.,$testcount,$line
.

Back to the 40scan Perl Page
Last Modified: Mon Sep 22 14:45:16 EDT 2025