#!/usr/skunk/bin/gawk -f # @(#) fixdate.gawk 1.0 93/09/26 # 93/09/26 john h. dubois iii (john@armory.com) # # Use gawk for strftime(), /dev/stderr, and because it recognizes changes # to ARGV/ARGC BEGIN { Usage = \ "Usage: fixdate [-chiIuz] [-d datesep] [-r recsep]\n"\ " [-t fieldsep] [-y year] field-num [file ...]" ARGC = Opts("fixdate",Usage,"chiIuzd:r:t:y:",1) if ("h" in Options) { print \ "fixdate: convert date fields from month & day only to fully specified form.\n"\ Usage "\n"\ "The specified field of each record is parsed as a date and rewritten to\n"\ "include leading zeros in each field, and to include a year if none given.\n"\ "Date components must be numeric.\n"\ "The modified records are written to the standard output.\n"\ "-c: Include the century in the year. The default is to use two digits.\n"\ " A century part is also added to dates that include a year but not a\n"\ " century. If -y is given, its century part will be used for the\n"\ " expansion; if not, the current century is used.\n"\ "-d: Set the date component separator. The default is forward slash (/).\n"\ "-i: If the specified field does not look like it was intended as a date,\n"\ " the record is printed without modification. The default is to print\n"\ " an error message and discard the record.\n"\ "-I: Like -i, except fields that look like dates are also printed unmodified\n"\ " if there is a problem with them. An warning message is still printed.\n"\ "-r: Set the record separator. The default is newline.\n"\ "-t: Set the field separator. The default is tab. The separator can be a\n"\ " string, but may not contain any regular expression metacharacters.\n"\ "-u: Use US date format (month/day/year). The default is to use sorting\n"\ " format (year/month/date).\n"\ "-y: Set the year added to dates. The default is to use the current year.\n"\ "-z: Add leading zeros only; dates that do not contain a year are in error." exit(0) } if ("u" in Options) YearField = 3 else YearField = 1 ZeroOnly = "z" in Options Century = "c" in Options IgnoreErr = "i" in Options || "I" in Options IgnoreBadDate = "I" in Options if ("d" in Options) DateSep = Options["d"] else DateSep = "/" if ("t" in Options) FS = OFS = Options["t"] else FS = OFS = "\t" if ("r" in Options) RS = ORS = Options["r"] if ("y" in Options) { ExpYear = Options["y"] if ((ExpYear+0) < 100) ExpYear += int(strftime("%Y")/100) * 100 } else ExpYear = strftime("%Y") if (Century) { ExpCentury = int(ExpYear / 100) * 100 YearDig = 4 } else { ExpYear %= 100 YearDig = 2 } DateField = ARGV[1] delete ARGV[1] } { if (NF < DateField) { if (IgnoreErr) print $0 else FileErr("Not enough fields in record") next } if ((GoodDate = MakeGoodDate($DateField,DateSep)) ~ /^.$/) { if (IgnoreErr && GoodDate == 1) print $0 else { FileErr(ERRNAME) if (IgnoreBadDate) print $0 print "Record printed unmodifed." > "/dev/stderr" } next } $DateField = GoodDate print $0 } # Returns 1 if no date separators found in field. # Returns 2 if bad values found in date fields. # Otherwise returns fixed up InDate. function MakeGoodDate(InDate,DateSep, DateElem,NumElem,Month,Day,Year) { if (InDate ~ /^[ \t]*$/) { ERRNAME = "Empty date field" return 1 } if ((NumElem = split(InDate,DateElem,DateSep)) < 2) { ERRNAME = "Need at least two fields in date" return 1 } if (NumElem > 3) { ERRNAME = "Too many fields in date" return 2 } if (ZeroOnly && NumElem < 3) { ERRNAME = "No year given in date." return 2 } for (i = 1; i <= NumElem; i++) if (DateElem[i] !~ /^[0-9]+$/ || DateElem[i] + 0 < 1) { ERRNAME = "Bad field in date" return 2 } if (NumElem == 2 || YearField == 3) { Month = DateElem[1] Day = DateElem[2] } else { Month = DateElem[2] Day = DateElem[3] } if (NumElem == 3) Year = DateElem[YearField] else Year = ExpYear if (Year < 100 && Century) Year += ExpCentury # * in format doesn't work if (YearField == 3) return \ sprintf("%02d%s%02d%s%" YearDig "d",Month,DateSep,Day,DateSep,Year) else return \ sprintf("%" YearDig "d%s%02d%s%02d",Year,DateSep,Month,DateSep,Day) } # @(#) ProcArgs 1.1 93/07/18 # 92/02/29 john h. dubois iii # 93/07/18 Added "#" arg type # optlist is a string which contains all of the possible command line options. # If a character is followed by a colon, # it indicates that that option takes an argument. # If a character is followed by a pound sign (#), # it indicates that that option takes an integer argument. # Strings in argv[] which begin with "-" or "+" are taken to be # strings of options, except that a string which consists solely of "-" or # "+" is not taken to be an option string (it is not acted on). # If an option takes an argument, the argument may either immedately # follow it or be given separately. # If an option that does not take an argument is given, # an index with its name is created in options and its value is set to "1". # If an option that does take an argument is given, # an index with its name is created in options and its value # is set to the value of the argument given for it. # Options and their arguments are deleted from argv. # Note that this means that there may be gaps # left in the indices of argv[]. # If compress is nonzero, argv[] is packed by moving its elements so that # they have contiguous integer indices starting with 0. # argv[0] is not examined. # An argument of "--" or "++" stops the scanning of argv[]. # The number of arguments left in argc is returned. # If an error occurs, # the string OptErr is set to an error message and -1 is returned. function ProcArgs(argc,argv,optlist,options,compress, ArgNum,ArgsLeft,Arg,ArgLen,ArgInd,Option,Pos) { # ArgNum is the index of the argument being processed. # ArgsLeft is the number of arguments left in argv. # Arg is the argument being processed. # ArgLen is the length of the argument being processed. # ArgInd is the position of the character in Arg being processed. # Option is the character in Arg being processed. # Pos is the position in optlist of the option being processed. ArgsLeft = argc for (ArgNum = 1; ArgNum < argc; ArgNum++) { Arg = argv[ArgNum] if (Arg ~ "^[-+]") { if ((Arg == "-") || (Arg == "+")) continue delete argv[ArgNum] ArgsLeft-- if ((Arg == "--") || (Arg == "++")) break ArgLen = length(Arg) for (ArgInd = 2; ArgInd <= ArgLen; ArgInd++) { Option = substr(Arg,ArgInd,1) Pos = index(optlist,Option) if (!Pos) { OptErr = "Invalid option: -" Option return -1 } if ((ArgType = substr(optlist,Pos + 1,1)) ~ "[:#]") { if (ArgInd < ArgLen) { options[Option] = substr(Arg,ArgInd + 1) ArgInd = ArgLen } else { if (ArgNum < (argc - 1)) { options[Option] = argv[++ArgNum] delete argv[ArgNum] ArgsLeft-- } else { OptErr = "Option -" Option " requires an argument." return -1 } } if (ArgType == "#" && options[Option] !~ "^[0-9]+$") { OptErr = \ "Option -" Option " requires an integer argument." return -1 } } else options[Option] = 1 } } } if (compress != 0) PackArr(argv,ArgsLeft) return ArgsLeft } # Packs Arr to indices starting with 0 # Num should be the number of elements in Arr function PackArr(Arr,Num, NewInd,OldInd) { NewInd = OldInd = 0 for (; Num; Num--) { while (!(OldInd in Arr)) OldInd++ if (NewInd != OldInd) { Arr[NewInd] = Arr[OldInd] delete Arr[OldInd] } OldInd++ NewInd++ } } # Opts: Process command line arguments. # Opts processes command line arguments using ProcArgs() # and checks for errors. If an error occurs, a message is printed # and the program is exited. # # Input variables: # Name is the name of the program, for error messages. # Usage is a usage message, for error messages. # OptList the option description string, as used by ProcArgs(). # MinArgs is the minimum number of non-option arguments that this # program should have, non including ARGV[0] and +h. # If the program does not require any non-option arguments, # MinArgs should be omitted or given as 0. # Global variables: # The command line arguments are taken from ARGV[]. # The arguments that are option specifiers and values are removed from # ARGV[], leaving only ARGV[0] and the non-option arguments. # The number of elements in ARGV[] should be in ARGC. # After processing, ARGC is set to the number of elements left in ARGV[]. # The option values are put in Options[]. # On error, Err is set to 1 so it can be checked for in an END block. # Return value: The number of elements left in ARGV is returned. function Opts(Name,Usage,OptList,MinArgs, ArgsLeft) { if (MinArgs == "") MinArgs = 0 ArgsLeft = ProcArgs(ARGC,ARGV,OptList,Options,1) if ((ArgsLeft + ("h" in Options)) < (MinArgs+1)) { if (ArgsLeft != -1) OptErr = "Not enough arguments" print Name ": " OptErr ". Use +h for help." print Usage Err = 1 exit 1 } return ArgsLeft } function ErrExit(S) { printf "Error on line %d of file \"%s\":\n%s\n%s\n", FNR,FILENAME,S,$0 > "/dev/stderr" Err = 1 exit } function FileErr(S) { printf "File %s, line %d: %s.\n%s\n",FILENAME,FNR,S,$0 > "/dev/stderr" }