#!/usr/skunk/bin/gawk -f #!/usr/bin/awk -f # @(#) postings.awk 1.0 94/03/09 # 93/01/24 john h. dubois iii (john@armory.com) # 93/07/02 Added help, fixed Total. # 94/03/09 Use gawk so - options can be given BEGIN { Usage = "Usage: postings [-h] article-file ..." if (ARGC < 2) { print Usage print "Use -h for help." exit(1) } if (ARGV[1] ~ "^[-+]h$") { print \ "postings: report number of postings to groups.\n"\ Usage "\n"\ "article-file is a file that postings are saved to, in mailbox format." exit(0) } GoodFile = Total = 0 for (FileNum = 1; FileNum < ARGC; FileNum++) { InFile = ARGV[FileNum] while ((ret = (getline < InFile)) == 1) { if ($1 == "\001\001\001\001") { InHeader = 1 } else if ($0 == "") InHeader = 0 else if (InHeader && $1 == "Newsgroups:") { Total++ split($2,Groups,",") for (i = 1; i in Groups; i++) GroupCt[Groups[i]]++ } } close(InFile) if (ret) printf "Error reading file %s.\n",InFile else GoodFile = 1 } if (!GoodFile) exit(1) NGroup = qsort_arb_ind(GroupCt,k) for (i = 1; i <= NGroup; i++) { group = k[i] printf "%3d %s\n",GroupCt[group],group } printf "%3d Total\n",Total } # Arr is an array of values with arbitrary indices. # Array k is returned with numeric indices 1..n. # The values in k are the indices of array arr, # ordered so that if array arr is stepped through # in the order arr[k[1]] .. arr[k[n]], it will be stepped # through in order of the values of its elements. # The return value is the number of elements in the array (n). function qsort_arb_ind(arr,k, ArrInd,end) { end = 0 for (ArrInd in arr) k[++end] = ArrInd; qsortseg(arr,k,1,end); return end } function qsortseg(arr,k,start,end, left,right,sepval,tmp,tmpe,tmps) { # handle two-element case explicitely for a tiny speedup if ((end - start) == 1) { if (arr[tmps = k[start]] > arr[tmpe = k[end]]) { k[start] = tmpe k[end] = tmps } return } left = start; right = end; sepval = arr[k[int((left + right) / 2)]] # Make every element <= sepval be to the left of every element > sepval while (left < right) { while (arr[k[left]] < sepval) left++ while (arr[k[right]] > sepval) right-- if (left < right) { tmp = k[left] k[left++] = k[right] k[right--] = tmp } } if (left == right) if (arr[k[left]] < sepval) left++ else right-- if (start < right) qsortseg(arr,k,start,right) if (left < end) qsortseg(arr,k,left,end) }