#!/usr/bin/perl # # Author: Josh Odom # # Version: 0.01 (alpha) # # Description: # This program is designed to provide query-like functionality to delimited # text-files. This was created in response to the author's perceived # lack of ability to find and replace named columns in text files, even though # other column-based utilities exist (like cut, paste), and other search # and replace utilities exist (like grep and sed). # # Please note that this utility is currently in an alpha state, and care should # be taken when using it with sensitive data. # # # Revisions: # 0.01 - Febuary 6, 2009 - Initial release # # Known Bugs and Missing Functionality: # - no checks are made to determine that the requested field exists # - the functionality is undefined for using fields which do not exist # in the file # - zero and negative fields do not have defined functionality # - columns cannot be selected from a description on a header row (i.e. # specifying a column name instead of field number) # - usage is not displayed on error # - code is not thoroughly documented # - "and"s strictly have higher precedence than "or"s, and there is no way # to force precedence # - this utility reads from stdin and writes to stdout, and there's no way # to specify an input or output file # # Other Notes: # - code is not thoroughly documented # - requires perl 5 use strict; use Switch; ## Global variables my $delimiter; my $type; my $root_condition; my @assignment_criteria = (); my $line; my @tokens; my $temp; ## Fake enumeration my $CONDITIONGROUP = 1; my $ORGROUP = 2; my $ANDGROUP = 3; my $CONDITION = 4; my $FIELD = 7; my $CONSTANT = 8; my $SELECT = 11; my $UPDATE = 12; my $DELETE = 13; my $SET = 14; my $EQUAL = 21; my $NOTEQUAL = 22; my $LESSTHAN = 23; my $GREATERTHAN = 24; my $LESSEQUAL = 25; my $GREATEREQUAL = 26; my $VERSION = "0.01"; my $USAGE = "usage: filesql.pl -d{delim} select where {CONDITIONGROUP}\n" . "usage: filesql.pl -d{delim} update set -f{number} = {TERMINAL} \\\n" . " [-f{number} = {TERMINAL} ...] [where {CONDITIONGROUP}]\n" . "usage: filesql.pl -d{delim} delete where {CONDITIONGROUP}\n\n" . "{CONDITIONGROUP} := {ANDGROUP} [or {ANDGROUP} ...]\n" . "{ANDGROUP} := {CONDITION} [and {CONDITION} ...]\n" . "{CONDITION} := {TERMINAL} {OPERATOR} {TERMINAL}\n" . "{TERMINAL} := -f{number} | {constant}\n" . "{OPERATOR} := -eq | -ne | -gt | -lt | -ge | -le\n"; ## read arguments off command line $temp = shift(@ARGV); if ($temp eq "-v" || $temp eq "--version") { print $VERSION . "\n"; exit 0; } if ($temp eq "-h" || $temp eq "--help") { print $USAGE; exit 0; } ## Expects the first argument to be the delimiter, or dies substr($temp, 0, 2) eq "-d" || die "Bad delimiter\n"; length $temp == 3 || die "Bad delimiter\n"; $delimiter = substr($temp, 2, 1); ## Gets the type of selection $temp = shift(@ARGV); switch ($temp) { case "select" { $type = $SELECT; } case "update" { $type = $UPDATE; } case "delete" { $type = $DELETE; } else { die "Bad selection type\n"; } } ## Read the "set" statement, if "update" if ($type == $UPDATE) { $temp = shift(@ARGV); $temp eq "set" || die "Missing 'set' statement\n"; while (defined @ARGV[0] && @ARGV[0] ne "where") { substr(@ARGV[0], 0, 2) eq "-f" || die "Bad field\n"; length @ARGV[0] >= 3 || die "Bad field\n"; @ARGV[1] eq "=" || die "Missing '=' in set statement]n"; defined @ARGV[0] || die "Missing assignment value\n"; my $temp_criterion = [substr(@ARGV[0], 2)-1, &eval_terminal(@ARGV[2])]; push(@assignment_criteria, $temp_criterion); shift(@ARGV); shift(@ARGV); shift(@ARGV); } } ## Read condition $temp = shift(@ARGV); if ($temp eq "where") { $root_condition = [$CONDITIONGROUP, &eval_or]; } else { $root_condition = [$CONDITIONGROUP]; } ## Process each line from stdin based on criteria read from command line while ( ) { $line = $_; ## It is important that we strip the newline off before the split and add ## it back after the join, in the case that the final field gets updated or ## new fields get added chomp($line); @tokens = split /[$delimiter]/, $line; ## Evaluate this line against given conditions my $evaluation = &evaluate($root_condition); switch ($type) { case ($SELECT) { if ($evaluation) { print join($delimiter,@tokens) . "\n"; } } case ($DELETE) { if ( ! $evaluation) { print join($delimiter,@tokens) . "\n"; } } case ($UPDATE) { if ($evaluation) { my @out_tokens = @tokens; foreach (@assignment_criteria) { my @criterion = @{$_}; $out_tokens[@criterion[0]] = &evaluate(@criterion[1]); } print join($delimiter,@out_tokens) . "\n"; } else { print join($delimiter,@tokens) . "\n"; } } } } ## Evaluates an array where the first element is its optype, and the rest ## of the elements are its data. ## The criteria specified on the command line is stored as a tree of nested ## lists in this format. sub evaluate { my @args = @{@_[0]}; switch (@args[0]) { case ($CONDITIONGROUP) { if (defined @args[1]) { return &evaluate(@args[1]); } else { return 1; } } case ($ORGROUP) { if ( &evaluate(@args[1]) ) { return 1; } elsif ( defined @args[2] ) { return &evaluate(@args[2]); } else { return 0; } } case ($ANDGROUP) { if ( ! &evaluate(@args[1]) ) { return 0; } elsif ( defined @args[2] ) { return &evaluate(@args[2]); } else { return 1; } } case ($CONDITION) { switch (@args[1]) { case ($EQUAL) { return &evaluate(@args[2]) eq &evaluate(@args[3]); } case ($NOTEQUAL) { return &evaluate(@args[2]) ne &evaluate(@args[3]); } case ($LESSTHAN) { return &evaluate(@args[2]) < &evaluate(@args[3]); } case ($GREATERTHAN) { return &evaluate(@args[2]) > &evaluate(@args[3]); } case ($LESSEQUAL) { return &evaluate(@args[2]) <= &evaluate(@args[3]); } case ($GREATEREQUAL) { return &evaluate(@args[2]) >= &evaluate(@args[3]); } } } case ($FIELD) { return @tokens[@args[1]]; } case ($CONSTANT) { return @args[1]; } } } ## Processes a "terminal" on the command line, which is either a field ## or a constant sub eval_terminal { my $temp = @_[0]; if (substr($temp, 0, 2) eq "-f") { my $temp_field = (substr($temp, 2) - 1); return [$FIELD, $temp_field]; } elsif (substr($temp, 0, 2) eq "--") { return [$CONSTANT, substr($temp, 1)]; } else { return [$CONSTANT, $temp]; } } ## Processes a group of statements joined by "or" sub eval_or { my $andgroup = &eval_and; if (! defined @ARGV[0]) { return [$ORGROUP, $andgroup]; } else { $temp = shift(@ARGV); $temp eq "or" || die "Expected 'or'\n"; my $orgroup = &eval_or; return [$ORGROUP, $andgroup, $orgroup]; } } ## Processes a group of statements joines by "and" sub eval_and { my $condition = &eval_condition; if (! defined @ARGV[0] || @ARGV[0] ne "and") { return [$ANDGROUP, $condition]; } else { $temp = shift(@ARGV); $temp eq "and" || die "Expected 'and'\n"; ## redundant my $andgroup = &eval_and; return [$ANDGROUP, $condition, $andgroup]; } } ## Processes an atomic condition sub eval_condition { defined @ARGV[0] || die "Expected criteria\n"; defined @ARGV[1] || die "Expected criteria\n"; defined @ARGV[2] || die "Expected criteria\n"; my $terminal1 = &eval_terminal(@ARGV[0]); my $op; my $terminal2 = &eval_terminal(@ARGV[2]); switch (@ARGV[1]) { case "-eq" { $op = $EQUAL; } case "-ne" { $op = $NOTEQUAL; } case "-lt" { $op = $LESSTHAN; } case "-gt" { $op = $GREATERTHAN; } case "-le" { $op = $LESSEQUAL; } case "-ge" { $op = $GREATEREQUAL; } else { die "Invalid operator\n"; } } shift(@ARGV); shift(@ARGV); shift(@ARGV); return [$CONDITION, $op, $terminal1, $terminal2]; }