This program calculates top-n league tables for the Premier League based on this openfootball data.
The program has enough data to self-test but to generate any useful data you will need to clone this repo into the parent directory and checkout the ‘corrections’ branch.
{-# LANGUAGE CPP #-}
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE QuasiQuotes #-}
module Main(main) where
import qualified Control.Monad as M
import Data.Functor.Identity
import qualified Data.HashMap.Lazy as HML
import qualified Data.List as L
import Data.Maybe
import qualified Data.Monoid as M
import Data.Ord
import qualified Data.Text as T
import qualified Data.Text.IO as T
import Data.Time
import Prelude.Compat
import qualified Shelly as SH
import System.Directory
import System.Environment
import System.Exit
import System.FilePath
import System.IO
import TestKit
import Text.RE.Summa
import qualified Text.RE.TDFA as TDFA
import Text.RE.TDFA.Text
import Text.Read
The CLI parser yields a list of jobs to generate or check various league tables.
main :: IO ()
= parseCLI >>= mapM_ job main
A Job contains everything we need to generate a league table, and what to do with it.
data Job =
Job
jobTitle :: T.Text -- ^ title for the table
{ jobSize :: Maybe Int -- ^ a full table or a top-n table
, jobInputs :: [FilePath] -- ^ the files containing the game data
, jobIsTest :: Bool -- ^ are we testing o/p or writing it
, jobIsHtml :: Bool -- ^ are we generating HTML from markdown
, jobOutput :: FilePath -- ^ where is the output
,
}deriving (Show)
A match result lists the data in the usual order:
<home-team> <home-score> <away-score> <away-team>
data Game = Game Team Int Int Team
deriving (Eq,Ord,Read,Show)
A league table is a list of teams and their results with the ordering on everything is arranged so that the list can be sorted with the default Ord
ordering to arrange the table according to PL conventions.
newtype Table = Table { getTable :: [(Results,Team)] }
deriving (Show)
Teams are just Text containing the names used by the openfootball data.
type Team = T.Text
Results contain everything we need to generate a league table.
data Results =
Results
resultsGamesPlayed :: Int
{ resultsGamesWon :: Int
, resultsGoalsFor :: Int
, resultsGoalsAgainst :: Int
, resultsPointsScored :: Int
,
}deriving (Show)
These vectors have expected zeros and sums.
#if __GLASGOW_HASKELL__ >= 804
instance Semigroup Results where
<>) = mappend_r
(#endif
instance Monoid Results where
mempty = mempty_r
mappend = mappend_r
mappend_r :: Results -> Results -> Results
Results gp1 gw1 gf1 ga1 ps1)
mappend_r (Results gp2 gw2 gf2 ga2 ps2) =
(Results (gp1+gp2)
+gw2)
(gw1+gf2)
(gf1+ga2)
(ga1+ps2)
(ps1
mempty_r :: Results
= Results 0 0 0 0 0 mempty_r
PL results are ordered by (points,goal-difference,goals-scored).
instance Ord Results where
compare = comparing $ \Results{..} ->
( resultsPointsScored- resultsGoalsAgainst
, resultsGoalsFor
, resultsGoalsFor
)
instance Eq Results where
==) x y = compare x y == EQ (
job :: Job -> IO ()
= top jb <$> input jb >>= output jb job jb
top :: Job -> [Game] -> Table
Job{..} mtchs = case jobSize of
top Nothing -> full_tbl
Just n -> aggregate $
filterGames (topTeams n full_tbl) mtchswhere
= aggregate mtchs full_tbl
Calculate the top n teams in a table.
topTeams :: Int -> Table -> [Team]
= map snd . take n . getTable topTeams n
Filter game data to only include games between the listed teams.
filterGames :: [Team] -> [Game] -> [Game]
= filter $ \(Game h _ _ a) ->
filterGames tms `elem` tms && a `elem` tms h
Aggregate game data into a table.
aggregate :: [Game] -> Table
= sortResults $
aggregate mtchs map homeResults mtchs ++ map awayResults mtchs
Generate the results for the home team from a Game result.
homeResults :: Game -> (Results,Team)
Game h hs as _) = flip (,) h
homeResults (Results
= 1
{ resultsGamesPlayed = wins $ hs - as
, resultsGamesWon = hs
, resultsGoalsFor = as
, resultsGoalsAgainst = points $ hs - as
, resultsPointsScored }
Generate the results for the away team from a Game result.
awayResults :: Game -> (Results,Team)
Game _ hs as a) = flip (,) a
awayResults (Results
= 1
{ resultsGamesPlayed = wins $ as - hs
, resultsGamesWon = as
, resultsGoalsFor = hs
, resultsGoalsAgainst = points $ as - hs
, resultsPointsScored }
Calculate the points accruing to a team from their goal difference for a game.
points :: Int -> Int
= case compare gd 0 of
points gd LT -> 0
EQ -> 1
GT -> 3
Calculate the number of wins accruing to a team (0 or 1) from their goal for a game.
wins :: Int -> Int
= case gd>0 of
wins gd True -> 1
False -> 0
Collate a list of results into a table.
sortResults :: [(Results,Team)] -> Table
=
sortResults Table
. L.sortBy (flip $ comparing fst)
. groupSortBy (comparing snd) grp
where
= (r',t)
grp (r,t) ps where
= mconcat $ r : map fst ps r'
Parse openfootball data into Game data, eliminating duplicate results. (For g1
and g2
to be recognised as a duplicate, it must be true that g1==g2
.)
input :: Job -> IO [Game]
Job{..} =
input const . parseGames . T.concat <$> mapM T.readFile jobInputs groupSort
The Game parser has three variants that should all be equivalent.
parseGames :: T.Text -> [Game]
= case PrimParseGames of
parseGames SimpleParseGames -> simpleParseGames
FunParseGames -> funParseGames
PrimParseGames -> primParseGames
data ParseGames = SimpleParseGames | FunParseGames | PrimParseGames
Here we apply the gameEdit
SearchReplace
editor to:
recognise the lines that contain the match results data and
transform the lines into Haskell Game
format which can be parsed by readText
.
The edit
function is a simple specialisation of the regex
sed'
function (defined below) that deletes every line in the file that edits every line in the file according to the given SearchReplace
, deleting all other lies.
simpleParseGames :: T.Text -> [Game]
= map readText . T.lines . edit gameEdit simpleParseGames
The [ed|
… ///
… |]
SearchReplace
editors for recognizing line containing matchresults and converting them to Haskell-format Game
data come in two variants that should be equivalent.
gameEdit :: SearchReplace RE T.Text
= case MacrosGameEdit of
gameEdit SimpleGameEdit -> simpleGameEdit
MacrosGameEdit -> macrosGameEdit
data GameEdit = SimpleGameEdit | MacrosGameEdit
simpleGameEdit :: SearchReplace RE T.Text
=
simpleGameEdit [ed|^ *${ht}([A-Za-z][a-zA-Z ]*[A-Za-z]) +${hs}([0-9]+)-${as}([0-9]+) +(\([^)]+\) *)?${at}([A-Za-z][a-zA-Z ]*[A-Za-z]) *(@.*)?$///Game "${ht}" ${hs} ${as} "${at}"|]
macrosGameEdit :: SearchReplace RE T.Text
=
macrosGameEdit [ed_|^ *${ht}(@{team}) +${hs}([0-9]+)-${as}([0-9]+) +(\([0-9]+-[0-9]+\) *)?${at}(@{team}) *(@.*)?$///Game "${ht}" ${hs} ${as} "${at}"|] macs
macs :: Macros RE
= makeMacros env
macs
env :: MacroEnv
= makeEnv [(,) "team" teamMacro] TDFA.preludeEnv env
teamMacro :: MacroEnv -> MacroID -> MacroDescriptor
=
teamMacro ev mid Just (map dup samples) ev mid
runTests TDFA.regexType MacroDescriptor
= RegexSource "([a-zA-Z]+ ?)*[A-Za-z]" -- the RE to be substituted for the macro
{ macroSource = samples -- list of strings that should match the above macro RE
, macroSamples = counter_samples -- list of strings that should **not** match the above macro RE
, macroCounterSamples = [] -- for bookkeeping
, macroTestResults = Nothing -- no parser needed for this one!
, macroParser = "team names: alphabetic characters interspersed with spaces"
, macroDescription
}where
=
samples "Chelsea FC"
[ "West Bromwich Albion"
, "AFC Bournemouth"
, "F"
, "AB"
, "A B"
, "AA B"
,
]
=
counter_samples "Arsenal FC "
[ " Liverpool FC"
, "West Bromwich Albion"
, "F2"
, ""
,
]
= (x,x) dup x
listMacros :: IO ()
= do
listMacros $ formatMacroTable TDFA.regexType env
hPutStr stderr <- testMacroEnv "macros" TDFA.regexType env
ok not ok) $ exitWith $ ExitFailure 1 M.when (
Here we use the regex
grepFilter
to extract all of the lines that match our rex
RE for detecting match-result data and assemble the Game
data directly by extracting the ht
, hs
, as
and at
fields from the matched result.
funParseGames :: T.Text -> [Game]
=
funParseGames txt Game ( mtch !$$ [cp|ht|])
[ $ mtch !$$ [cp|hs|])
(readText $ mtch !$$ [cp|as|])
(readText !$$ [cp|at|])
( mtch | Line{..} <- grepFilter rex txt
<- [allMatches getLineMatches]
, [mtch] ]
The RE for merely recognising lines that contain match results in the input data come in two variants. We either extract the RE from the above SearchReplace
template or rebuild the [re|
… ]
. (They should of course be equivalent.)
rex :: RE
= case Direct of
rex Direct ->
[re_|^ *${ht}(@{team}) +${hs}([0-9]+)-${as}([0-9]+) +(\([0-9]+-[0-9]+\) *)?${at}(@{team}) *(@.*)?$|] macsRecycle ->
getSearch gameEdit
data REX = Direct | Recycle
This variant of funParseGames
uses T.lines
and ?=~
instead of grepFilter
.
primParseGames :: T.Text -> [Game]
=
primParseGames txt Game ( mtch !$$ [cp|ht|])
[ $ mtch !$$ [cp|hs|])
(readText $ mtch !$$ [cp|as|])
(readText !$$ [cp|at|])
( mtch | mtch <- map (?=~ rex) $ T.lines txt
, matched mtch ]
Write out/test the Table according to the Job output parameters.
output :: Job -> Table -> IO ()
@Job{..} tbl = case jobIsTest of
output jbTrue -> test_it =<< formatTable jb tbl
False -> case jobOutput of
"-" -> T.putStr =<< formatTable jb tbl
-> T.writeFile fp =<< formatTable jb tbl
fp where
= do
test_it txt <- T.readFile jobOutput
txt' case txt == txt' of
True -> putStrLn "OK"
False -> do
putStrLn "Test Failed"
$ ExitFailure 1 exitWith
Generate the markdown for a table and optionally use Pandoc to generate the Html.
formatTable :: Job -> Table -> IO T.Text
Job{..} (Table ps) = to_html $ T.unlines $
formatTable "# " M.<> jobTitle
[ ""
,
, mk_row header_row
, mk_row divider_row++ map mk_row (zipWith gen_row [1..] ps)
] where
= map (T.map (const '-')) header_row
divider_row = map column_header [minBound..maxBound]
header_row
mk_row :: [T.Text] -> T.Text
= T.intercalate "|"
mk_row
gen_row :: Int -> (Results,Team) -> [T.Text]
= map (gen_field i t r) [minBound..maxBound]
gen_row i (r,t)
gen_field :: Int -> Team -> Results -> Col -> T.Text
Results{..} col = lj $ case col of
gen_field i tm Position -> showText i
Club -> tm
Played -> showText resultsGamesPlayed
Won -> showText resultsGamesWon
Drawn -> showText games_drawn
Lost -> showText $ resultsGamesPlayed -
+ games_drawn)
(resultsGamesWon GF -> showText resultsGoalsFor
GA -> showText resultsGoalsAgainst
GD -> showText $ resultsGoalsFor -
resultsGoalsAgainstPoints -> showText resultsPointsScored
where
= T.justifyLeft wd ' '
lj = T.length $ column_header col
wd
= resultsPointsScored - win_points
games_drawn = resultsGamesWon * 3
win_points
= case jobIsHtml of
to_html True -> pandoc jobTitle
False -> return
data Col
= Position
| Club
| Played
| Won
| Drawn
| Lost
| GF
| GA
| GD
| Points
deriving (Bounded,Enum,Show)
column_header :: Col -> T.Text
= case col of
column_header col Position -> "Pos"
Club -> "Club "
-> T.justifyLeft 7 ' ' $ showText col _
The command line parser generates a list of league-table generating/testing jobs for execution by the above job
action. Non-league-table-generating CLI commands like macros
for listing our RE table macros just do their thing and return an empty list of jobs.
parseCLI :: IO [Job]
= do
parseCLI <- getArgs
args case args of
-> listMacros >> testJob -- test with canned vectors
[] "test"] -> listMacros >> testJob -- "
["setup-test"] -> setupTestJob -- setup the test data, generating the golden data
["update"] -> updateIndex >> updateJobs -- update the website with latest data
["table",pth] -> discover pth Nothing -- write out a full league table
["table",pth,sz_s]
[| Just sz <- readMaybe sz_s
-> discover pth $ Just sz -- write out a bounded league table
"macros"] -> listMacros >> return [] -- list the RE macros we are using to parse the data
[-> do -- generate the usage message and fail
_ <- getProgName
pn $ prog pn
hPutStr stderr "[test]"
[ "setup-test"
, "update"
, "table <path> [<size>]"
,
]$ ExitFailure 1
exitWith where
= unlines $ zipWith prog_ (pn : repeat pn') as
prog pn as where
= map (const ' ') pn
pn'
= unwords [p,a]
prog_ p a
setupTestJob :: IO [Job]
testJob,= testJob_ True
testJob = testJob_ False
setupTestJob
testJob_ :: Bool -> IO [Job]
= do
testJob_ is_t True "data"
createDirectoryIfMissing return
Job
[ = "Premier League 2015-16: Top 7"
{ jobTitle = Just 7
, jobSize =
, jobInputs "data/2015-16-premierleague.txt"
[
]= is_t
, jobIsTest = False
, jobIsHtml = "data/league-table.md"
, jobOutput
}
]
updateJobs :: IO [Job]
= do
updateJobs <- utctDay <$> getCurrentTime
dy return $ concat $ map (mk dy) updateJobSpecs
where
@JobSpec{..} =
mk dy jsJob
[ = T.unwords $
{ jobTitle "Premier League " M.<> T.pack jsSeason
[ maybe "" (\n->"Top " M.<> showText n M.<> " ") jsSize
, "[" M.<> showText dy M.<> "]"
,
]= jsSize
, jobSize =
, jobInputs "1-premierleague-i.txt"
[ mkPath jsSeason "1-premierleague-ii.txt"
, mkPath jsSeason
]= False
, jobIsTest = is_html
, jobIsHtml = leagueTablesDir </> tableFile js is_html
, jobOutput
}| is_html <- [True,False]
]
updateIndex :: IO ()
= pandoc title toc >>= T.writeFile index_file
updateIndex where
= T.unlines $
toc "# " M.<> title
[ ""
, "Season", "Top-N", "Html", "Text"]
, mk_row ["------", "-----", "----", "----"]
, mk_row [++
]
[ mk_row
[ T.pack jsSeasonmaybe "all" showText jsSize
, "HTML" $ T.pack $ tableFile js True
, lk "Text" $ T.pack $ tableFile js False
, lk
]| js@JobSpec{..} <- updateJobSpecs
]
= "The League Tables"
title = leagueTablesDir </> "index.html"
index_file
= T.intercalate "|"
mk_row
= "[" M.<> lab M.<> "](" M.<> url M.<> ")"
lk lab url
tableFile :: JobSpec -> Bool -> FilePath
JobSpec{..} is_html = jsSeason ++ "-" ++ sze <.> ext
tableFile where
= maybe "all" (("top-"++).show) jsSize
sze = if is_html then "html" else "txt"
ext
leagueTablesDir :: FilePath
= "docs/league-tables"
leagueTablesDir
updateJobSpecs :: [JobSpec]
=
updateJobSpecs JobSpec sn mb
[ | sn <- ["2016-17","2015-16"]
<- Nothing : map Just [6..10]
, mb
]
data JobSpec =
JobSpec
jsSeason :: String
{ jsSize :: Maybe Int
,
}deriving (Show)
discover :: FilePath -> Maybe Int -> IO [Job]
= do
discover fp mb <- dscvr candidates
inps return
Job
[ = maybe fp_t mk_ttl $
{ jobTitle $ fp_t ?=~ [re|[0-9]{4}-[0-9]{2}|]
matchedText = mb
, jobSize = inps
, jobInputs = False
, jobIsTest = False
, jobIsHtml = "-"
, jobOutput
}
]where
= T.pack fp
fp_t
= "Premier League " M.<> ssn
mk_ttl ssn
= error $ fp ++ ": no data found"
dscvr [] :cds) = do
dscvr (fps<- and <$> mapM doesFileExist fps
ok case ok of
True -> return fps
False -> dscvr cds
=
candidates
[ [ fp
]</> fp </> "1-premierleague.txt"
, [ data_dir
]</> fp </> "1-premierleague-i.txt"
, [ data_dir </> fp </> "1-premierleague-ii.txt"
, data_dir
]
]
mkPath :: String -> String -> FilePath
= data_dir </> ssn </> hlf
mkPath ssn hlf
data_dir :: FilePath
= "../eng-england" data_dir
Use Pandoc to generate a an Html file from a title and markdown text.
pandoc :: T.Text -> T.Text -> IO T.Text
= do
pandoc title txt
T.writeFile inp_file txt$ T.unlines
T.writeFile mda_file "---"
[ "title: " M.<> title
, "---"
,
]fmap (const ()) $
$ SH.verbosely $
SH.shelly "pandoc"
SH.run "-f", "markdown+grid_tables"
[ "-t", "html5"
, "-T", "regex"
, "-s"
, "-c", "bs.css"
, "-c", "styles.css"
, "-c", "tabular.css"
, "-o", T.pack out_file
,
, T.pack mda_file
, T.pack inp_file
]
T.readFile out_filewhere
= "tmp/metadata.markdown"
mda_file = "tmp/pandoc-inp.md"
inp_file = "tmp/pandoc-out.html" out_file
The general helpers.
The edit
function is a simple specialisation of the regex
sed'
function (defined below) that deletes every line in the file that edits every line in the file according to the given SearchReplace
template, deleting all other lines. (It should probably be added to regex.)
edit :: SearchReplace RE T.Text -> T.Text -> T.Text
= runIdentity $ flip sed' txt $
edit sr txt Select
Template sr
[ LineEdit [re|.*|] $ \_ _ -> return Delete
, ]
Construct a Macros table for compiling REs from a MacroEnv. (Something similar should probably be added to regex.)
makeMacros :: MacroEnv -> Macros RE
= runIdentity $
makeMacros ev ExclCaptures ev
mkMacros mk TDFA.regexType where
= maybe oops Identity .
mk
TDFA.compileRegexWithOptions TDFA.noPreludeREOptions
= error "makeMacros: unexpected RE compilation error" oops
Construct a a MacroEnv
from an association list of MacroId
and MacroDescriptior
constructor functions and the base MacroEnv
(the macros that can be used inside the macros). (Something similar should probably be added to regex.)
makeEnv :: [(MacroID,MacroEnv -> MacroID -> MacroDescriptor)]
-> MacroEnv
-> MacroEnv
= ev
makeEnv al ev0 where
= ev0 `HML.union` HML.fromList
ev | (mid,mk) <- al ] [ (mid, mk ev mid)
Variants of the standard functions that operate over Text
.
showText :: Show a => a -> T.Text
= T.pack . show
showText
readText :: Read a => T.Text -> a
= fromMaybe (error "readText") . readMaybe . T.unpack readText