#!/usr/skunk/bin/gawk -f # address: find lines with matching name field in an address file # gawk is used because it has IGNORECASE. # @(#) address.gawk 2.4 94/04/23 # johnd h. dubois iii (john@armory.com) 90/05/30 # 90/11/14 removed ksh-specific code # 91/07/06 Changed to understand new record format (uses awk now), # changed syntax so that all normal args are components of # a single name and each name given must be matched as a word # 91/09/30 finished rewrite for new record format # 91/10/10 Added phone number dereferencing; made empty fields be # translated into single blanks so they'll be preserved when # split on one-or-more newlines (for sh & ksh, which have no # facility to split on single instances of a character); added -@ # 91/11/11 changed to use [] to indicate an indirect reference, # expanded indirect reference check/lookup to include fields 2..4 # (address, phone number, email address) # 91/11/13 Awk prog too big for XENIX cmd line arg; # separated into sh & awk files # 92/01/22 Added LIB to gawk line # 92/04/27 Converted to #!gawk script # 92/07/19 Converted to handle multiple numbers in phone number field # 92/10/16 minor bugfix # 92/10/29 fixed bug that prevented entries with dereferenced phone numbers # from being found if -@ was given # 93/05/02 Allow colons as separators in $ADDRESS # 93/09/02 Added -l option # 94/03/13 Cleaned up. Added comments. Print matching records at end. # Worked around gawk bugs. # 94/04/23 Read .addrrc BEGIN { Name = "address" Usage = "Usage: " Name " [-dDehl@] [-a filename] name ..." rcFile = ".addrrc" ARGC = Opts(Name,Usage,"del@a:Dhx",1,"~/" rcFile, "DEREFERENCE,PRINTREGEX,PRINTFILENAME,MATCHAT,ADDRESS",1) Debug = Options["x"] if (ARGC == -1) { print Name ": " OptErr exit(1) } if ("h" in Options) { print \ Name ": find lines with matching name field in address files.\n" \ Usage "\n"\ Name " searches address files for a given name, and prints any records that\n"\ "have a matching name field. Matching is not case sensitive. The search\n"\ "name will match a name field if each part of the search name occurs in the\n"\ "same order as given and as a separate word in the name field. The search\n"\ "name will match a name field even if other text occurs in the name field\n"\ "between the parts of the search name. The parts of the search name should\n"\ "be separated by whitespace on the command line, and should be quoted if\n"\ "they contain characters meaningful to the shell. @ and : can be matched\n"\ "by either @ or :. Blank fields are printed as lines consisting of a\n"\ "single space. If multiple matching records are found, they are printed\n"\ "separated by lines consisting solely of '+'. Multiple filenames can be\n"\ "given if each is preceded by -a. If -a is not given, the value of\n"\ "$ADDRESS is used; it should be a list of whitespace- or colon-separated\n"\ "filenames. ADDRESS can also be set by assigning it a value in a file\n"\ "named " rcFile " in the user's home directory, in this way:\n"\ "ADDRESS=address-file-list\n"\ "If ADDRESS is not set, " Name " uses the default files\n"\ "/local/public/address and $HOME/.address.\n"\ "Other options:\n"\ "-h gives this help list.\n"\ "-d dereferences indirect entries (those that are contained in brackets).\n"\ " The string contained in brackets ([]) is looked up. The name field of\n"\ " another record must match it exactly. The indirect entry, including\n"\ " the enclosing brackets, is replaced by the entire equivalent field of\n"\ " the referenced record. One level of dereferencing is done.\n"\ "-e prints to stderr the regular expression used to search the name field.\n"\ "-D prints a description of the format of an address file.\n"\ "-l prints the names of the files each entry was found in.\n"\ "-@ requires that any @ or : in the name field be matched by the pattern.\n"\ "-d, -e, -l, and -@ can also be turned on by putting DEREFERENCE,\n"\ "PRINTREGEX, PRINTFILENAME, and MATCHAT, respectively, in " rcFile "." exit(0) } if ("D" in Options) { print \ "Format of an address file: an address file consists of records separated\n"\ "by delimiter lines consisting solely of a '+' character. The address file\n"\ "must also begin and end with lines consisting solely of a '+' character.\n"\ "The '+' character may not occur anywhere in the file except as a delimiter\n"\ "line. Each line of the record is a field. The fields are used as follows:\n"\ "+\n"\ "Name\n"\ "Address\n"\ "Phone number\n"\ "Email address\n"\ "Comment ...\n"\ "+\n"\ "Comment can continue onto multiple lines. Unused fields can be left\n"\ "blank; unused fields at the end of the record do not need to be given. \n"\ Name " only interprets the name and (possibly) phone number fields.\n"\ "An indirect reference can be given by specifying in a field\n"\ "[indirect-name]\n"\ "where indirect-name is an exact match for the name field of another record.\n"\ "Other text may be included on the same line as indirect-name.\n"\ "This can be used to add an extension to a PBX number, etc." exit(0) } if (ARGC < 2) { print Usage > "/dev/stderr" exit(1) } if ("a" in Options) split(Options["a"],Files,"[ \t:]+") else if ("ADDRESS" in ENVIRON && ENVIRON["ADDRESS"] != "") # If no files speced on command line, use $ADDRESS split(ENVIRON["ADDRESS"],Files,"[ \t:]+") else { # If no files speced on command line and $ADDRESS not set, # use the default files (but only those that exist) FilesInd = 1 DefaultFiles["/local/public/address"] DefaultFiles[ENVIRON["HOME"] "/.address"] for (file in DefaultFiles) if ((getline < file) == 1) { close(file) Files[FilesInd++] = file } if (FilesInd == 1) { print Name ": No address files." > "/dev/stderr" exit(1) } } if (Debug) { printf "Address files:" > "/dev/stderr" for (i = 1; i in Files; i++) printf " " Files[i] > "/dev/stderr" print "" > "/dev/stderr" } NumMatches = SearchFiles(ARGV,ARGC,"e" in Options,Files,MatchingRecords, FileFound) PrintFilename = "l" in Options Dereference = "d" in Options for (i = 1; i <= NumMatches; i++) PrintRec(MatchingRecords[i],FileFound[i],PrintFilename,Dereference) } # Returns the number of matching records # Uses globals set by MakePats(): Qualifier, RName, FirstName function SearchFiles(NamePieces,NumPieces,PrintExpression,Files, MatchingRecords,FileFound, Name,ArgInd) { ArgInd = 1 while (NumPieces > 1) { while (!(ArgInd in NamePieces)) ArgInd++ Name = Name " " NamePieces[ArgInd] NumPieces-- ArgInd++ } Name = substr(Name,1) MakePats(Name) if (PrintExpression) { print "Name pattern: " RName > "/dev/stderr" if (Qualifier != "") print "Qualifier: " Qualifier > "/dev/stderr" } return FindAddr(Files,FirstName,RName,Qualifier,MatchingRecords,FileFound) } function Setup() { RS = "+" OFS = FS = "\n" IGNORECASE = 1 AN = "[a-z0-9]" # alphanum NAN = "[^ \ta-z0-9" a "]" # nonalphanum NotAN = "[^a-z0-9" a "]" # not-alphanum NotNAN = "[ \ta-z0-9]" # not-nonalphanum # The type of a word is (Word ~ AN) an = 1 nan = 0 # Delimiters for use at ends of line Delim[an] = NotAN Delim[nan] = NotNAN # Delimiters between two words Delim[an,an] = NotAN "(.*" NotAN ")?" Delim[an,nan] = "(" NotAN ".*" NotNAN "| )?" Delim[nan,an] = "(" NotNAN ".*" NotAN "| )?" Delim[nan,nan] = NotNAN "(.*" NotNAN ")?" } # Sets globals: Qualifier, RName # Calls Name2SearchPat(), which sets FirstName function MakePats(Name) { Setup() if (Name ~ "[@:]") { # Split the last [@:]-separated part off into Qualifier Qualifier = Name sub(".*[@:]","",Qualifier) sub("[@:][^@:]*$","",Name) gsub("[@:]","[@:]",Name) # make either : or @ match either : or @ Qualifier = Name2SearchPat(Qualifier) } RName = Name2SearchPat(Name) } # Sets global: FirstName # Uses global: Delim[] # Returns: Search pattern function Name2SearchPat(Name, Words,Types,NumWords,i,Pat) { # Tell awk that Words and Types are arrays... Words[1] = Types[1] = "" NumWords = GetWords(Name,Words,Types) if (!NumWords) # Quit if no words given, just whitespace exit 1 for (i = 1; i < NumWords; i++) Pat = Pat Words[i] Delim[Types[i],Types[i+1]] Pat = Delim[Types[1]] ")" Pat Words[NumWords] "(" Delim[Types[NumWords]] Pat = "(^|" Pat "|$)" FirstName = Words[1] return Pat } # A word is a sequence of either alphanums or non-alphanums, # as specified by the sets AN and NAN (neither of which include whitespace). # Words are delimited by whitespace or by the juxtaposition of AN and NAN. # GetWords puts the words of S in Words and the type of each word in Types. # The return value is the number of words found. function GetWords(S,Words,Types, i,Pos) { # convert all whitespace to single spaces gsub("[ \t]+"," ",S) # get rid of trailing whitespace gsub(" $","",S) i = 0 while (S != "") { sub("^ ","",S) Pos = match(S,". |" NAN AN "|" AN NAN) if (Pos) { Words[++i] = substr(S,1,Pos) Types[i] = S ~ "^" AN S = substr(S,Pos + 1) } else { Words[++i] = S Types[i] = S ~ "^" AN S = "" } } return i } # Searches for an exact complete match of Name to the name field of # an entry in the address files # Returns the first matching record, or null if none found. # On success, if the file that the maching record was found is not an index # of IndFiles[], its name is printed and it is made an index of IndFiles[]. function GetEntry(Name,PrintFilename,IndFiles, FileInd,File,Entry,ret,IndFile) { for (FileInd = 1; FileInd in Files; FileInd++) { File = Files[FileInd] # Make filename different # so that the file being read by FindAddr won't be affected # Don't have to do this with records not printed until end #if (File ~ "^/") # File = "/" File #else # File = "./" File if (Debug) printf "Checking file \"%s\" for name \"%s\"...\n", File,Name > "/dev/stderr" while ((ret = (getline Entry < File)) == 1) if (Entry ~ "^\n" Name "\n") { close(File) if (Debug) printf "Found entry: %s\n",Entry > "/dev/stderr" sub("^\n","",Entry) IndFile = Files[FileInd] if (PrintFilename && !(IndFile in IndFiles)) { print IndFile IndFiles[IndFile] } return Entry } if (ret == -1) printf "Could not open address file \"%s\".\n",Files[FileInd] > \ "/dev/stderr" close(File) } return "" } # Sets MatchingRecords[1..n] to matching records, FileFound[] to the file # each match is found in, and returns the number of matching records. function FindAddr(Files,FirstName,RName,Qualifier,MatchingRecords,FileFound, Ext,i,ret,InFile,NumMatch) { for (i = 1; i in Files; i++) { InFile = Files[i] while ((ret = (getline < InFile)) == 1) { # $1 is always empty (it is the field between the + and the # first newline). # Compare against FirstName first to avoid expensive # comparision with RName for most records. if ( \ ($2 ~ FirstName) && ($2 ~ RName) \ && # If +@ was given, either a qualifer must be given in # the search pattern... ( \ !("@" in Options) || (Qualifier != "") || # ... or the name field must not have a qualifier and the first # number on the phone number field must not have a qualifier (($2 !~ "[@:]") && ($4 ~ "^(\\[|[^: ]+( |$))" )) \ ) && # If a qualifier is given in the search pattern, ((Qualifier == "") || # it must match a qualifier in the name field... match($2,".*[@:]") && (substr($2,1,RLENGTH - 1) ~ RName) && (substr($2,RLENGTH + 1) ~ Qualifier) || # ... or a qualifier in the phone number field (($4 = PhoneMatch($4,Qualifier)) != "")) ) { MatchingRecords[++NumMatch] = $0 FileFound[NumMatch] = InFile if (Debug) printf "Found match:\n%s\n",$0 > "/dev/stderr" } } if (ret == -1) { printf "Error reading file \"%s\".\n",InFile > "/dev/stderr" exit(1) } close(InFile) } return NumMatch } function PhoneMatch(Line,Qualifier, i,j,Fields) { split(Line,Fields," +") for (i = 1; i in Fields; i++) if (match(Fields[i],".*:") && (substr(Fields[i],1,RLENGTH - 1) ~ Qualifier)) { Line = Fields[i] for (j = 1; j in Fields; j++) if (j != i) Line = Line " " Fields[j] return Line } return "" } # Uses/sets global: Plus function PrintRec(Record,InFile,PrintFilename,Dereference, FieldVal, NumFields,MainFields,FieldNum,IndirName,Fields,Indir,i,IndFiles,Entry) { # Convince gawk that Fields is an array... Fields["x"] = "" # Print a "+" between records, but not before the first record printf Plus Plus = "+\n" # Print source file if asked for if (PrintFilename) { print InFile IndFiles[InFile] } # Get rid of the leading & trailing newlines NumFields = split(substr(Record,2),MainFields,"\n") if (MainFields[NumFields] == "") NumFields-- # Dereference fields 2-4 (fields which do not exist will fail to match) for (FieldNum = 2; FieldNum <= 4; FieldNum++) { FieldVal = MainFields[FieldNum] # A reference looks like: [replacement-record-name-field] other-stuff, # where replacement-record-name-field is an exact match for the name # field of another record. # Find the reference, if any. # gawk core dumps on this when the pattern is given in //, but not # when given in quotes... if (match(FieldVal,"\\[[^]]+\\]")) { IndirName = substr(MainFields[FieldNum],RSTART + 1,RLENGTH - 2) if ((Entry = \ GetEntry(IndirName,PrintFilename,IndFiles)) == "") { printf \ "Indirect reference not found for \"%s\"\n"\ "in field %d of the following entry:\n%s\n",IndirName,FieldNum,substr(Record,2) exit 1 } split(Entry,Fields,"\n") Indir = Fields[FieldNum] # Replace indirect reference with dereferenced value if (!Dereference) Indir = "[" IndirName "]->[" Indir "]" MainFields[FieldNum] = \ substr(MainFields[FieldNum],1,RSTART -1) Indir \ substr(MainFields[FieldNum],RSTART + RLENGTH) } } for (i = 1; i <= NumFields; i++) print MainFields[i] } # @(#) ProcArgs 1.2 94/04/02 # 92/02/29 john h. dubois iii (john@armory.com) # 93/07/18 Added "#" arg type # 93/09/26 Don't count -h against MinArgs # 94/01/01 Stop scanning at first non-option arg. Added '>' option type. # Removed meaning of '+' or '-' by itself. # 94/03/08 Added & option and *()< option types. # 94/04/02 Added NoRCopt to Opts() # optlist is a string which contains all of the possible command line options. # A character followed by certain characters indicates that the option takes # an argument, with type as follows: # : String argument # * Floating point argument # ( Non-negative floating point argument # ) Positive floating point argument # # Integer argument # < Non-negative integer argument # > Positive integer argument # The only difference the type of argument makes is in the runtime argument # error checking that is done. # The & option is a special case used to get numeric options without the # user having to give an option character. It is shorthand for [-+.0-9]. # If & is included in optlist and an option string that begins with one of # these characters is seen, the value given to "&" will include the first # char of the option. & must be followed by a type character other than ':'. # Note that if e.g. &> is given, an option of -.5 will produce an error. # Strings in argv[] which begin with "-" or "+" are taken to be # strings of options, except that a string which consists solely of "-" # or "+" is taken to be a non-option string; like other non-option strings, # it stops the scanning of argv and is left in argv[]. # If an option takes an argument, the argument may either immedately # follow it or be given separately. # If an option that does not take an argument is given, # an index with its name is created in options and its value is set to "1". # If an option that does take an argument is given, # an index with its name is created in options and its value # is set to the value of the argument given for it. # Options and their arguments are deleted from argv. # Note that this means that there may be gaps left in the indices of argv[]. # If compress is nonzero, argv[] is packed by moving its elements so that # they have contiguous integer indices starting with 0. # argv[0] is not examined. # An argument of "--" or "++" stops the scanning of argv[]. # The number of arguments left in argc is returned. # If an error occurs, the string OptErr is set to an error message and -1 is # returned. function ProcArgs(argc,argv,OptList,Options,compress, ArgNum,ArgsLeft,Arg,ArgLen,ArgInd,Option,Pos,NumOpt,Value,HadValue, NeedNextOpt) { # ArgNum is the index of the argument being processed. # ArgsLeft is the number of arguments left in argv. # Arg is the argument being processed. # ArgLen is the length of the argument being processed. # ArgInd is the position of the character in Arg being processed. # Option is the character in Arg being processed. # Pos is the position in OptList of the option being processed. # NumOpt is true if a numeric option may be given. ArgsLeft = argc NumOpt = index(OptList,"&") for (ArgNum = 1; ArgNum < argc; ArgNum++) { if ((Arg = argv[ArgNum]) !~ /^[-+]./) # Not an option; quit break delete argv[ArgNum] ArgsLeft-- if ((Arg == "--") || (Arg == "++")) break ArgLen = length(Arg) for (ArgInd = 2; ArgInd <= ArgLen; ArgInd++) { Option = substr(Arg,ArgInd,1) if (NumOpt && Option ~ /[-+.0-9]/) { Option = "&" Arg = "&" Arg ArgLen++ Pos = NumOpt } else if (!(Pos = index(OptList,Option)) || Option == "&") { OptErr = "Invalid option: -" Option return -1 } # Find what the option's value will be if it needs one if (NeedNextOpt = (ArgInd >= ArgLen)) # Value is the next arg Value = argv[ArgNum+1] else # Value is included with option Value = substr(Arg,ArgInd + 1) if (HadValue = AssignVal(Option,Value,Options, substr(OptList,Pos + 1,1),ArgNum < (argc - 1))) { if (HadValue == -1) return -1 if (NeedNextOpt) { delete argv[++ArgNum] ArgsLeft-- } break # Used up this option } } } if (compress != 0) PackArr(argv,ArgsLeft) return ArgsLeft } # Global variables: OptErr # Return value: -1 on error, 0 if option did not require an argument, # 1 if it did. function AssignVal(Option,Value,Options,ArgType,GotValue,Name, UsedValue,Err) { # If option takes a value... if (UsedValue = (ArgType ~ "[:*()#<>]")) { if (!GotValue) { if (Name != "") OptErr = "Variable requires a value -- " Name else OptErr = "option requires an argument -- " Option return -1 } if ((Err = CheckType(ArgType,Value,Option,Name)) != "") { OptErr = Err return -1 } } else Value = 1 if (!(Option in Options)) # Don't overwrite previously assigned values Options[Option] = Value return UsedValue } # Option is the option letter # Value is the value being assigned # Name is the var name of the option, if any # ArgType is one of: # : String argument # * Floating point argument # ( Non-negative floating point argument # ) Positive floating point argument # # Integer argument # < Non-negative integer argument # > Positive integer argument # Returns null on success, err string on error function CheckType(ArgType,Value,Option,Name, Err) { if (ArgType == ":") return "" # A number begins with option + or -, and is followed by a string of # digits or a decimal with digits before it, after it, or both if (Value !~ /^[-+]?([0-9]+|[0-9]+?\.[0-9]+|[0-9]+\.)$/) Err = "must be a number" else if (ArgType ~ "[#<>]" && Value ~ /\./) Err = "may not include a fraction" else if (ArgType ~ "[()<>]" && Value < 0) Err = "may not be negative" else if (ArgType ~ "[)>]" && Value == 0) Err = "must be a positive number" if (Err != "") { if (Name != "") return "Value assigned to variable " Name " " Err else { if (Option == "&") Option = Value return "Value assigned to option -" Option " " Err } } else return "" } # Packs Arr to indices starting with 0 # Num should be the number of elements in Arr function PackArr(Arr,Num, NewInd,OldInd) { NewInd = OldInd = 0 for (; Num; Num--) { while (!(OldInd in Arr)) OldInd++ if (NewInd != OldInd) { Arr[NewInd] = Arr[OldInd] delete Arr[OldInd] } OldInd++ NewInd++ } } # Opts: Process command line arguments. # Opts processes command line arguments using ProcArgs() # and checks for errors. If an error occurs, a message is printed # and the program is exited. # # Input variables: # Name is the name of the program, for error messages. # Usage is a usage message, for error messages. # OptList the option description string, as used by ProcArgs(). # MinArgs is the minimum number of non-option arguments that this # program should have, non including ARGV[0] and +h. # If the program does not require any non-option arguments, # MinArgs should be omitted or given as 0. # rcFile, if given, is the name of a file to read for variable initialization. # Values given in it will not override values given on the command line. # VarNames is a comma-separated list of variable names to map to options, # in the same order as the options are given in OptList. # If UseEnv is given and nonzero, the variables will also be searched for in # the environment. Values given in the environment will override those given # in the rcfile but not those given on the command line. # NoRCopt, if given, is an additional letter option that if given on the # command line prevents the rcfile from being read. # Global variables: # The command line arguments are taken from ARGV[]. # The arguments that are option specifiers and values are removed from # ARGV[], leaving only ARGV[0] and the non-option arguments. # The number of elements in ARGV[] should be in ARGC. # After processing, ARGC is set to the number of elements left in ARGV[]. # The option values are put in Options[]. # On error, Err is set to 1 so it can be checked for in an END block. # Return value: The number of elements left in ARGV is returned. function Opts(Name,Usage,OptList,MinArgs,rcFile,VarNames,UseEnv,NoRCopt, ArgsLeft) { if (MinArgs == "") MinArgs = 0 ArgsLeft = ProcArgs(ARGC,ARGV,OptList NoRCopt,Options,1) if ((ArgsLeft + ("h" in Options)) < (MinArgs+1)) { if (ArgsLeft != -1) OptErr = "Not enough arguments" print Name ": " OptErr ". Use -h for help." print Usage Err = 1 exit 1 } if (rcFile != "" && (NoRCopt == "" || !(NoRCopt in Options)) && InitOpts(rcFile,Options,OptList,VarNames,UseEnv) == -1) { print Name ": " OptErr ". Use -h for help." Err = 1 exit 1 } return ArgsLeft } # Global vars: sets OptErr; uses Debug & ENVIRON[] function InitOpts(rcFile,Options,OptTypes,VarNames,UseEnv, Line,Var,Pos,Vars,Map,CharOpt,NumVars,TypesInd,Types,Type,Ret) { NumVars = split(VarNames,Vars,",") TypesInd = Ret = 0 for (i = 1; i <= NumVars; i++) { Var = Vars[i] CharOpt = substr(OptTypes,++TypesInd,1) if (CharOpt ~ "^[:*()#<>&]$") CharOpt = substr(OptTypes,++TypesInd,1) Map[Var] = CharOpt Types[Var] = Type = substr(OptTypes,TypesInd+1,1) # Don't overwrite entries from environment if (UseEnv && Var in ENVIRON && AssignVal(CharOpt,ENVIRON[Var],Options, Type,1,Var) == -1) return -1 } if (rcFile ~ "^~/") rcFile = ENVIRON["HOME"] substr(rcFile,2) while ((getline Line < rcFile) == 1) if (Line !~ /^#/ && Line !~ "^[ \t]*$") { if (Pos = index(Line,"=")) Var = substr(Line,1,Pos-1) else Var = Line # If no value, var is entire line if (Var in Map) { if (AssignVal(Map[Var],substr(Line,Pos+1),Options, Types[Var],Pos != 0,Var) == -1) return -1 } else { OptErr = sprintf("Unknown var \"%s\" set in %s",Var,rcFile) Ret = -1 } } if ("x" in Options) for (Var in Map) if (Map[Var] in Options) printf "%s=%s\n",Var,Options[Map[Var]] else printf "%s not set\n",Var return Ret }