#! /usr/bin/perl # # scrubkeds (by Justin Appleby) # [justin.appleby@gmail.com] # # This perl script finds occurences of O##, l##, and # other bad entries in KEDS data files. Scrubbed files # are written to input_filename.clean # # Usage: ./scrubkeds [filenames] foreach $arg (@ARGV){ # read MAC style endlines $/ = "\r"; print "File to parse: $arg \n"; open (IN, "<$arg") || die "cannot open: $!"; my @lines = ; close(IN); my $outfile = $arg . ".clean"; open(OUT, ">$outfile"); my $i=0; my $rm=0; my $errors=0; foreach $line (@lines) { $i++; # print back bad lines when found if ( $line =~ /O\d\d/ ){ print "Entry $i changed ('O##' found)\n"; $errors++; } if ( $line =~ /l\d\d/ ){ print "Entry $i changed ('l##' found)\n"; $errors++; } if ( $line =~ /-*]/ ){ print "Line $i removed (bad data found)\n"; $rm++; } # substitute proper values in bad lines $line =~ s/O(\d\d)/0\1/g; $line =~ s/l(\d\d)/1\1/g; $line =~ s/^.*---].*$//g; # write to new file print OUT "$line"; } close(OUT); print "\n$errors lines fixed in the data set\n"; print "$rm bad lines removed\n"; print "Scrubbed file written to: $outfile\n"; }