This example program reads lines from NGINX error-log files and access-log files converts them into a unified output format.
It is an example of developing REs at scale using macros with the regex test bench.
The tool is self-testing: run it with no arguments (or cabal test
).
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE QuasiQuotes #-}
{-# LANGUAGE OverloadedStrings #-}
module Main
( main-- development
, parse_a
, parse_ewhere
)
import Control.Applicative
import Control.Monad
import qualified Data.ByteString.Lazy.Char8 as LBS
import Data.Char
import Data.Functor.Identity
import qualified Data.HashMap.Lazy as HML
import Data.Maybe
import Data.String
import qualified Data.Text as T
import Data.Time
import Prelude.Compat
import System.Directory
import System.Environment
import System.Exit
import System.FilePath
import System.IO
import TestKit
import Text.Printf
import qualified Text.RE.PCRE as PCRE
import Text.RE.PCRE.ByteString.Lazy
import qualified Text.RE.PCRE.String as S
import Text.RE.REOptions
import Text.RE.Replace
import Text.RE.TestBench
import Text.RE.Tools.Sed
main :: IO ()
= do
main <- getArgs
as case as of
"--macro" ] -> putStr lp_macro_table
["--macro",mid_s] -> putStrLn $ lp_macro_summary $ MacroID mid_s
["--regex" ] -> putStr lp_macro_sources
["--regex",mid_s] -> putStrLn $ lp_macro_source $ MacroID mid_s
["--test" ] -> test
[-> test
[] | is_file in_file -> go True in_file "-"
[in_file ] | is_file in_file -> go True in_file out_file
[in_file,out_file ] -> usage
_ where
= not . (== "--") . take 2
is_file
= do
usage <- getProgName
pnm let prg = ((pnm++" ")++)
putStr $ unlines
"usage:"
[ " --help"
, prg " --macro"
, prg " --macro <macro-id>"
, prg " --regex"
, prg " --regex <macro-id>"
, prg "[--test]"
, prg "(-|<in-file>) [-|<out-file>]"
, prg ]
--
-- go
--
test :: IO ()
= do
test putStrLn "============================================================"
putStrLn "Testing the macro environment."
putStrLn "nginx-log-processor"
<- doesDirectoryExist "docs"
is_docs $
when is_docs "docs" ".txt") (fp "docs" "-src.txt") regexType lp_env
dumpMacroTable (fp "data" ".txt") (fp "data" "-src.txt") regexType lp_env
dumpMacroTable (fp <- testMacroEnv "nginx-log-processor" regexType lp_env
me_ok putStrLn "============================================================"
putStrLn "Testing the log processor on reference data."
putStrLn ""
<- test_log_processor
lp_ok putStrLn "============================================================"
case me_ok && lp_ok of
True -> return ()
False -> exitWith $ ExitFailure 1
where
= dir </> (rty_s ++ "-nginx-log-processor" ++ sfx)
fp dir sfx
= map toLower $ presentRegexType regexType
rty_s
test_log_processor :: IO Bool
= do
test_log_processor False "tmp"
createDirectoryIfMissing False "data/access-errors.log" "tmp/events.log"
go "tmp/events.log" "data/events.log" cmp
--
-- go
--
go :: Bool -> FilePath -> FilePath -> IO ()
= do
go rprt_flg in_file out_file <- setup rprt_flg
ctx sed (script ctx) in_file out_file
script :: Ctx -> Edits IO RE LBS.ByteString
= Select
script ctx ACC parse_access
[ on [re_|@{access}|] AQQ parse_deg_access
, on [re_|@{access_deg}|] ERR parse_error
, on [re_|@{error}|] QQQ parse_def
, on [re_|.*|]
]where
= Function (rex lpo) TOP $ process_line ctx src prs
on rex src prs
= fmap capturedText . matchCapture parse_def
process_line :: IsEvent a
=> Ctx
-> Source
-> (Match LBS.ByteString->Maybe a)
-> LineNo
-> Match LBS.ByteString
-> RELocation
-> Capture LBS.ByteString
-> IO (Maybe LBS.ByteString)
= do
process_line ctx src prs lno cs _ _ $
when (event_is_notifiable event)
flag_event ctx eventreturn $ Just $ presentEvent event
where
= maybe def_event (mkEvent lno src) $ prs cs
event
=
def_event Event
= lno
{ _event_line = src
, _event_source = read "1970-01-01 00:00:00"
, _event_utc = Nothing
, _event_severity = (0,0,0,0)
, _event_address = ""
, _event_details
}
--
-- Ctx, setup, event_is_notifiable, flag_event
--
type Ctx = Bool
setup :: Bool -> IO Ctx
= return
setup
event_is_notifiable :: Event -> Bool
Event{..} =
event_is_notifiable fromEnum (fromMaybe Debug _event_severity) <= fromEnum Err
flag_event :: Ctx -> Event -> IO ()
False = const $ return ()
flag_event True = LBS.hPutStrLn stderr . presentEvent
flag_event
--
-- Event, presentEvent, IsEvent
--
data Event =
Event
_event_line :: LineNo
{ _event_source :: Source
, _event_utc :: UTCTime
, _event_severity :: Maybe Severity
, _event_address :: IPV4Address
, _event_details :: LBS.ByteString
,
}deriving (Show)
data Source = ACC | AQQ | ERR | QQQ
deriving (Show,Read)
presentEvent :: Event -> LBS.ByteString
Event{..} = LBS.pack $
presentEvent "%04d %s %s %-7s %3d.%3d.%3d.%3d [%s]"
printf
(getLineNo _event_line )show _event_source )
(show _event_utc )
(maybe "-" svrty_kw _event_severity)
(
a b c d
(LBS.unpack _event_details )where
= _event_address
(a,b,c,d)
= T.unpack . fst . severityKeywords
svrty_kw
class IsEvent a where
mkEvent :: LineNo -> Source -> a -> Event
instance IsEvent Access where
Access{..} =
mkEvent lno src Event
= lno
{ _event_line = src
, _event_source = _a_time_local
, _event_utc = Nothing
, _event_severity = _a_remote_addr
, _event_address = LBS.pack $
, _event_details "%s %d %d %s %s %s"
printf
(T.unpack _a_request )
_a_status
_a_body_bytes
(T.unpack _a_http_referrer )
(T.unpack _a_http_user_agent)
(T.unpack _a_other )
}
instance IsEvent Error where
ERROR{..} =
mkEvent lno src Event
= lno
{ _event_line = src
, _event_source = UTCTime _e_date $ timeOfDayToTime _e_time
, _event_utc = Just _e_severity
, _event_severity = (0,0,0,0)
, _event_address = LBS.pack $ printf "%d#%d: %s" pid tid $ LBS.unpack _e_other
, _event_details
}where
= _e_pid_tid
(pid,tid)
instance IsEvent LBS.ByteString where
=
mkEvent lno src lbs Event
= lno
{ _event_line = src
, _event_source = read "1970-01-01 00:00:00Z"
, _event_utc = Nothing
, _event_severity = (0,0,0,0)
, _event_address = lbs
, _event_details
}
--
-- REOptions and Prelude
--
lpo :: PCRE.REOptions
= PCRE.makeREOptions lp_prelude
lpo
lp_prelude :: Macros RE
= runIdentity $ mkMacros mk regexType ExclCaptures lp_env
lp_prelude where
= maybe oops Identity . PCRE.compileRegexWithOptions PCRE.noPreludeREOptions
mk
= error "lp_prelude"
oops
lp_macro_table :: String
= formatMacroTable regexType lp_env
lp_macro_table
lp_macro_summary :: MacroID -> String
= formatMacroSummary regexType lp_env
lp_macro_summary
lp_macro_sources :: String
= formatMacroSources regexType ExclCaptures lp_env
lp_macro_sources
lp_macro_source :: MacroID -> String
= formatMacroSource regexType ExclCaptures lp_env
lp_macro_source
lp_env :: MacroEnv
= PCRE.preludeEnv `HML.union` HML.fromList
lp_env "user" user_macro
[ f "pid#tid:" pid_tid_macro
, f "access" access_macro
, f "access_deg" access_deg_macro
, f "error" error_macro
, f
]where
= (mid, mk lp_env mid)
f mid mk
--
-- The Macro Descriptors
--
user_macro :: MacroEnv -> MacroID -> MacroDescriptor
=
user_macro env mid
runTests regexType parse_user samples env midMacroDescriptor
= "(?:-|[^[:space:]]+)"
{ macroSource = map fst samples
, macroSamples = counter_samples
, macroCounterSamples = []
, macroTestResults = Just "parse_user"
, macroParser = "a user ident (per RFC1413)"
, macroDescription
}where
samples :: [(String,User)]
=
samples "joe"
[ f
]where
= (nm,User $ LBS.pack nm)
f nm
=
counter_samples "joe user"
[
]
pid_tid_macro :: MacroEnv -> MacroID -> MacroDescriptor
=
pid_tid_macro env mid
runTests regexType parse_pid_tid samples env midMacroDescriptor
= "(?:@{%nat})#(?:@{%nat}):"
{ macroSource = map fst samples
, macroSamples = counter_samples
, macroCounterSamples = []
, macroTestResults = Just "parse_pid_tid"
, macroParser = "<PID>#<TID>:"
, macroDescription
}where
samples :: [(String,(Int,Int))]
=
samples "1378#0:" (1378,0)
[ f
]where
= (,)
f
=
counter_samples ""
[ "24#:"
, "24.365:"
,
]
access_macro :: MacroEnv -> MacroID -> MacroDescriptor
=
access_macro env mid . fmap LBS.pack) samples env mid
runTests' regexType (parse_access MacroDescriptor
= access_re
{ macroSource = map fst samples
, macroSamples = counter_samples
, macroCounterSamples = []
, macroTestResults = Just "parse_a"
, macroParser = "an Nginx access log file line"
, macroDescription
}where
samples :: [(String,Access)]
=
samples "192.168.100.200 - - [12/Jan/2016:12:08:36 +0000] \"GET / HTTP/1.1\" 200 3700 \"-\" \"My Agent\" \"-\""
[ (,) Access
= (192,168,100,200)
{ _a_remote_addr = "-"
, _a_remote_user = read "2016-01-12 12:08:36 UTC"
, _a_time_local = "GET / HTTP/1.1"
, _a_request = 200
, _a_status = 3700
, _a_body_bytes = "-"
, _a_http_referrer = "My Agent"
, _a_http_user_agent = "-"
, _a_other
}
]
=
counter_samples ""
[ " - [] \"\" \"\" \"\" \"\""
,
]
access_deg_macro :: MacroEnv -> MacroID -> MacroDescriptor
=
access_deg_macro env mid . fmap LBS.pack) samples env mid
runTests' regexType (parse_deg_access MacroDescriptor
= " - \\[\\] \"\" \"\" \"\" \"\""
{ macroSource = map fst samples
, macroSamples = counter_samples
, macroCounterSamples = []
, macroTestResults = Nothing
, macroParser = "a degenerate Nginx access log file line"
, macroDescription
}where
samples :: [(String,Access)]
=
samples " - [] \"\" \"\" \"\" \"\"" deg_access
[ (,)
]
=
counter_samples ""
[ "foo"
,
]
error_macro :: MacroEnv -> MacroID -> MacroDescriptor
=
error_macro env mid . fmap LBS.pack) samples env mid
runTests' regexType (parse_error MacroDescriptor
= error_re
{ macroSource = map fst samples
, macroSamples = counter_samples
, macroCounterSamples = []
, macroTestResults = Just "parse_e"
, macroParser = "an Nginx error log file line"
, macroDescription
}where
samples :: [(String,Error)]
=
samples "2016/12/21 11:53:35 [emerg] 1378#0: foo"
[ (,) ERROR
= read "2016-12-21"
{ _e_date = read "11:53:35"
, _e_time = Emerg
, _e_severity = (1378,0)
, _e_pid_tid = " foo"
, _e_other
}"2017/01/04 05:40:19 [error] 31623#0: *1861296 no \"ssl_certificate\" is defined in server listening on SSL port while SSL handshaking, client: 192.168.31.38, server: 0.0.0.0:80"
, (,) ERROR
= read "2017-01-04"
{ _e_date = read "05:40:19"
, _e_time = Err
, _e_severity = (31623,0)
, _e_pid_tid = " *1861296 no \"ssl_certificate\" is defined in server listening on SSL port while SSL handshaking, client: 192.168.31.38, server: 0.0.0.0:80"
, _e_other
}
]
=
counter_samples ""
[ "foo"
, ]
--
-- Access, access_re, deg_access, parse_deg_access, parse_access
--
data Access =
Access
_a_remote_addr :: !IPV4Address
{ _a_remote_user :: !User
, _a_time_local :: !UTCTime
, _a_request :: !T.Text
, _a_status :: !Int
, _a_body_bytes :: !Int
, _a_http_referrer :: !T.Text
, _a_http_user_agent :: !T.Text
, _a_other :: !T.Text
,
}deriving (Eq,Show)
access_re :: RegexSource
= RegexSource $ unwords
access_re "(@{%address.ipv4})"
[ "-"
, "(@{user})"
, "\\[(@{%datetime.clf})\\]"
, "(@{%string.simple})"
, "(@{%nat})"
, "(@{%nat})"
, "(@{%string.simple})"
, "(@{%string.simple})"
, "(@{%string.simple})"
, ]
deg_access :: Access
=
deg_access Access
= (0,0,0,0)
{ _a_remote_addr = "-"
, _a_remote_user = read "1970-01-01 00:00:00Z"
, _a_time_local = ""
, _a_request = 0
, _a_status = 0
, _a_body_bytes = ""
, _a_http_referrer = ""
, _a_http_user_agent = ""
, _a_other
}
parse_deg_access :: Match LBS.ByteString -> Maybe Access
Match{..} =
parse_deg_access case matchSource == " - [] \"\" \"\" \"\" \"\"" of
True -> Just deg_access
False -> Nothing
parse_a :: LBS.ByteString -> Maybe Access
= parse_access $ lbs ?=~ [re_|@{access}|] lpo
parse_a lbs
parse_access :: Match LBS.ByteString -> Maybe Access
=
parse_access cs Access
<$> f parseIPv4Address [cp|1|]
<*> f parse_user [cp|2|]
<*> f parseDateTimeCLF [cp|3|]
<*> f parseSimpleString [cp|4|]
<*> f parseInteger [cp|5|]
<*> f parseInteger [cp|6|]
<*> f parseSimpleString [cp|7|]
<*> f parseSimpleString [cp|8|]
<*> f parseSimpleString [cp|9|]
where
= psr $ capturedText $ capture i cs
f psr i
--
-- Error, error_re, parse_error
--
data Error =
ERROR
_e_date :: Day
{ _e_time :: TimeOfDay
, _e_severity :: Severity
, _e_pid_tid :: (Int,Int)
, _e_other :: LBS.ByteString
,
}deriving (Eq,Show)
error_re :: RegexSource
= RegexSource $ unwords
error_re "(@{%date.slashes})"
[ "(@{%time})"
, "\\[(@{%syslog.severity})\\]"
, "(@{pid#tid:})(.*)"
,
]
parse_e :: LBS.ByteString -> Maybe Error
= parse_error $ lbs ?=~ [re_|@{error}|] lpo
parse_e lbs
parse_error :: Match LBS.ByteString -> Maybe Error
=
parse_error cs ERROR
<$> f parseSlashesDate [cp|1|]
<*> f parseTimeOfDay [cp|2|]
<*> f parseSeverity [cp|3|]
<*> f parse_pid_tid [cp|4|]
<*> f Just [cp|5|]
where
= psr $ capturedText $ capture i cs
f psr i
--
-- User, parseUser
--
newtype User =
User { _User :: LBS.ByteString }
deriving (IsString,Ord,Eq,Show)
parse_user :: Replace a => a -> Maybe User
= Just . User . LBS.pack . unpackR
parse_user
--
-- parse_pid_tid
--
parse_pid_tid :: Replace a => a -> Maybe (Int,Int)
= case allMatches $ unpackR x S.*=~ [re|@{%nat}|] of
parse_pid_tid x -> (,) <$> p cs <*> p cs'
[cs,cs'] -> Nothing
_ where
= matchCapture cs >>= parseInteger . capturedText
p cs
regexType :: RegexType
= PCRE.regexType regexType