#!/usr/bin/env perl
#  qscan.pl (quote scan)
#  A script to count the characters between pairs of double quote marks ("")
#    on lines from a text file, as in SPSS VARIABLE LABELS and VALUE
#    LABELS statements.
#
#  Kent Nassen, v1.0: 2/14/95 (first version, called 40scan)
#      v1.1: 2/16/98 -- added multiple file capability, test for file
#                       existence, and a length variable ($checklen) so
#                       that changing the length to check for is easier.
#      v1.2: 5/12/98 -- added message to print when no long quotes are found.
#      v1.3: 8/20/98 -- fixed longlines count, added message with count of #
#                       of longlines, changed all references to 40 to
#                       $checklen in anticipation of adding a commandline
#                       length option.
#      v1.4: 9/03/98 -- Added formatted output (page numbers, headers,
#                       etc.) to make printing long lists easier.
#      v1.5: 9/22/98 -- Improved output formatting a bit (number of lines
#                       in file, line number of longest quoted text, page
#                       header). Each file's output now starts on a new page.
#      v1.6: 6/27/99 -- Improved handling of files with no quoted lines or
#                       with quoted lines, but no long quoted lines. Set
#                       output to truncate when the contents of the
#                       line pushes total width over 80 chars.  Moved
#                       *** messages to print in the body of the report
#                       rather than in the summary. Should print a nicer
#                       looking report now.  Added -l option to set the
#                       quoted string length to be searched. Changed name
#                       to qscan.
#      v1.7: 8/15/99 -- Am setting format_page_number to 0 at the start of
#                       each file, so that each file's report is numbered
#                       from page 1 (since it is likely each would be seen
#                       as separate reports). To undo this change, comment
#                       out or remove the line: 
#                       STDOUT->format_page_number(0); # Each file starts with Page 1
#     v1.8: 10/22/99 -- Added support for also finding single-quoted strings.
#
#
#  :set tabstop=4
#
#  SYNTAX:  qscan filename[...]

use FileHandle;
use Getopt::Std;

use vars qw($version $checklen $pagesize $ProgName $lines $testcount
 	$maxcount $longlines $extendlen $filename $input $extend $maxline
 	$lentest $opt_l);

# $version="v1.1, 2/16/98";
# $version="v1.2, 5/12/98";
# $version="v1.3, 8/20/98";
# $version="v1.4, 9/03/98";
# $version="v1.5, 9/22/98";
# $version="v1.6, 6/27/99";
# $version="v1.7, 8/14/99";
$version="v1.8, 10/22/99";

# NOTE: $checklen sets the length of quoted text to search for
my $file="", $checklen=40, $pagesize=54;
STDOUT->format_lines_per_page($pagesize);

($ProgName = $0) =~ s%.*/%%;  # Unix
#($ProgName = lc $0) =~ s%.*\\%%;  # DOS

$opt_l='';

getopt('l:');

if ($opt_l =~ /^\d+$/) { $checklen=$opt_l }
else { 
	if ($opt_l) {
	print STDERR "\n   *** -l parameter, $opt_l, is not a number\n"; 
    &DisplayUsage; 
	exit 1;
	}
}

if ($#ARGV<0) {
    &DisplayUsage;
    exit 1;
}

foreach $file (@ARGV) {
	STDOUT->format_page_number(0); # Each file starts with Page 1
	process($file, 'fh00');
}

sub process {
    $lines=$testcount=$maxcount=$longlines=0;
	$extend=" ";
	$extendlen=56; # length beyond which we truncate long strings in the output
	local($filename, $input) = @_;
	$input++;
	unless (open $input, $filename) {
		print STDERR "\n   *** $ProgName: Can't open '$filename': $!\n\n";
		return;
	}
	while (<$input>) {
		chop;
        $lines++;
		if ( m/".*?".*(".*?")/ or m/'.*?'.*('.*?')/ ) {
			$testcount=length($1);
			if ( $testcount>$checklen ) {
				$longlines++;
				$lentest=length($_);
				if ($lentest>=$extendlen) { $extend='...' }
				else { $extend=" " }
				write;
			}
            if ($testcount>$maxcount) { $maxcount=$testcount; $maxline=$.; }
			$extend="";
		}
		if ( m/"(.*?)"/ or m/'(.*?)'/ ) {
			$testcount=length($1);
			if ( $testcount>$checklen ) {
				$longlines++;
				$lentest=length($_);
				if ($lentest>=$extendlen) { $extend='...' }
				else { $extend=" " }
				write;
			}
            if ($testcount>$maxcount) { $maxcount=$testcount; $maxline=$.; }
		}
	} # end of while (file has been completely read and processed)

	# Print out summary information about the file and set up for a new page on next file
   	if (!$longlines) { # No long lines found
        if (!$maxcount) { $_=" *** No quoted text found.\n"; }
        else { # Quoted text found, but not over the max
		    $_=" *** No quoted text over $checklen characters long.\n";
        }
		write; # Print out the header for files with no lines over max
        if ($lines==1) { print "\n    There was one line in the '$filename'\n"; }
        else { print "\n    There were $lines lines in '$filename'\n"; }
        if ($maxcount) { # Quoted text over the max found
            print "    The length of the longest quoted text found was $maxcount",
				" characters at line $maxline.\n";
        }
	}
	else { # Long quoted text found
        if ($lines==1) { print "\n    There was one line line in the '$filename'.\n"; }
        else { print "\n    There were $lines lines in '$filename'\n"; }
        if ($longlines==1) {
		    print "    One line had quoted text over $checklen characters long.\n";
        }
        else {
		    print "    $longlines lines had quoted text over $checklen characters long.\n";
        }
        print "    The length of the longest quoted text found was $maxcount",
			" characters at line $maxline.\n";
	}
	print "\n";
	close $input;
	STDOUT->format_lines_left("0");
} # end of subroutine process()

sub DisplayUsage {
    print STDERR "\n  $ProgName: Find long quoted text",
			" (e.g., check the length of variable\n";
    print STDERR "               and value labels).  by Kent Nassen, $version\n";

    print STDERR "\n   Usage: $ProgName [-l#] [filename...]\n",
          "        -l#  number of characters within quotes to search for (default=40)\n\n",
		  "        (multiple filenames or wildcards are accepted if your shell\n",
          "        can handle them)\n",
          "\n",
          "   Examples: $ProgName sp6360.sps  or  $ProgName -l60 *.sps\n\n";
}

format STDOUT_TOP =

   @||||@||
   "Page",$%

   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   "$ProgName: Find long quoted text";
   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   "by Kent Nassen, $version";
   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   "Scanning for quoted text longer than $checklen characters in the file '".$file."'"

             Quote
   Line#     Length                   Line Contents/*** Message
  -------   --------  ----------------------------------------------------------
.

format STDOUT =
  @>>>>>> @>>>>>>>>   @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<@<<<
$.,$testcount,$_,$extend
.

Last Modified: Fri Aug 15 11:53:26 EDT 2025