#! /bin/sh ## Author: Parv, parv UNDERSCORE AT yahoo DOT com ## MODIFIED: Nov 13 2006 ## ## License: Free to use as you please w/ proper credit given ## ## Name: Goodlog ## ## Purpose: To quickly take a look at the apache logs ## ## Usage: ## goodlog ## # The lynch pin of this script is the awk regex to filter out # "uninteresting" logs. This script has a complimentary "badlog" # script. # # Difference? in "goodlog", the regex is "! ..."; in "badlog" regex # is not negated. # # "Goodlog" could be converted to a perl program so that one could assign # regex to a variable instead of editing it inside the awk program. # Additionally, the same perl program can take an option, obviating the need # of a separate program like "badlog". # . ~/cf/sh/functions/error-line.fx # Log file names logs= default_log () { local Jot logroot range day log # $logroot contains the basic pattern of log files. for me it is # ${HOME}/www_logs/www.YYYYMMDD logroot=${HOME}/www_logs/www. case $1 in [0-9]*) range=$1 ; shift ;; *) range=7 ;; esac Jot=$( which jot ) if [ ! -x "$Jot" ] then echo 'no jot, no go' 1>&2 exit 1 fi for day in $( "$Jot" -n $range $range 1 ) do log="${logroot}$(date -v-${day}d '+%Y%m%d')" [ ! -f "$log" ] && continue logs="$logs $log" done case "${logs}" in "" ) #printf "%s: No logs found to parse\n" ${0##*/} >&2 err_line $0 'No logs found to parse' exit 1 ;; * ) ;; esac } default_log "$@" # override $logs value, say, for testing/debugging #logs="${@}" # given... # #------------------------------------------------------------------------------------------ # 1 2 3 4 5 6 7 8 9 10 11 12... #------------------------------------------------------------------------------------------ #127.0.0.1 - - [30/Dec/2001:14:43:28 -0500] "GET /file/path HTTP/1.0" 200 57853 "-" "User Agent (description)" # # ...awk gives.. # #127.0.0.1 2001.12.30 14.43 -0500 /file/path (200) - User # UserAgent_regex="baa.*sheep|sheep.*baa" #File_regex="\.css|/parv/comp/unix/cf/fvwm/|/parv/comp/graphic.comp/[^ ]*fvwm" File_regex="\.css|/parv/comp/[^ ]*fvwm" Pact_regex="(204\.182\.56\.120|216\.15\.97\.5|66\.117\.128\.60|w3c_validator).*libwww-perl" SearchEngine_regex="Openbot.+robot-response@openfind\.com\.tw.+www\.openfind\.com\.tw/robot\.html|\ 12\.175\..+NPBot.+http://.*nameprotect\.com/botinfo.html|\ 209\.249\..+ZyBorg/.+www.WISEnutbot\.com|\ 216\.39\.(4[89]|5[0-9]|6[0-3])\.[0-9]{1,3}.+Scooter.3\..|\ 64\.152\.75\.91.+Scooter.?W3|\ 61\.78\.61\..+nhnbot@naver\.com|\ 64\.140\.4[89].+TurnitinBot/.+www\.turnitin\.com|\ 64\.140\.[0-9]+.+SlySearch.+www\.slysearch\.com|\ 65\.214\..+Mozilla/2.+Ask Jeeves/Teoma|\ 66\.147\.154\.3.+www\.almaden\.ibm\.com/cs/crawler|\ 66\.77\.73.[0-9]{1,3}.*FAST-WebCrawler|\ 68\.142\.[12][0-9[0-9].+(Slurp|Inktomi)|\ (66\.196|7(2\.30|4\.6))\..+http://.*yahoo\.com/help/us/ysearch/slurp|\ 194\.224\.199\..+crawler@noxtrum\.com|\ msnbot/" egrep -h -v \ "(${Pact_regex}|${SearchEngine_regex}|${UserAgent_regex}|${File_regex})" \ ${logs} \ | awk ' BEGIN \ { # create month_num array to map month names to numbers month_name_list = "Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec" split(month_name_list, month_names, ",") for (i in month_names) { month = month_names[i] month_num[month] = sprintf("%0.2d", i) } } { # Create date array containing: day, month(name), year, hour, minute, second split(substr($4,2), date, "/|:") # Collect user agent info user_agent = "" for (i=12;i<=NF;i++) { user_agent = user_agent"_"$i } user_agent = substr(user_agent, 3, length(user_agent) - 3) # #user_agent = substr($12, 2) #if (index(user_agent, "\"") !=0 ) #{ user_agent = substr(user_agent, 1, index(user_agent, "\"") -1) } # Print: # - IPv4 address left justified address w/ 3 spaces at the end, # - year . month . day, hour . minute, timezone, # - (http code), # - file path, referrer, # - user agent -- may be printed if ($9 == 200 || $9 == 304) http_code = " ----- " else http_code = " ("$9") " printf "%-18s %s %s %s %s %s %s %s\n" , $1 , date[3]""month_num[date[2]]"."date[1] , date[4]""date[5]"."date[6] , substr($5, 1, length($5) - 1) , http_code , $7 , " "substr($11, 2, length($11) - 2) , " "user_agent }'