examples/re-include.lhs

Example: Include Processor

This example looks for lines like

%include "lib/md/load-tutorial-cabal-incl.md"

on its input and replaces them with the contents of the names file.

The tool is self-testing: run it with no arguments (or cabal test).

{-# LANGUAGE NoImplicitPrelude          #-}
{-# LANGUAGE RecordWildCards            #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell            #-}
{-# LANGUAGE QuasiQuotes                #-}
{-# LANGUAGE OverloadedStrings          #-}

module Main
  ( main
  ) where

import           Control.Applicative
import qualified Data.ByteString.Lazy.Char8               as LBS
import           Data.Maybe
import qualified Data.Text                                as T
import           Prelude.Compat
import           System.Environment
import           TestKit
import           Text.RE.Replace
import           Text.RE.TDFA.ByteString.Lazy
import           Text.RE.Tools.Edit
import           Text.RE.Tools.Grep
import           Text.RE.Tools.Sed
main :: IO ()
main = do
  as  <- getArgs
  case as of
    []                    -> test
    ["test"]              -> test
    [fn,fn'] | is_file fn -> loop fn fn'
    _                     -> usage
  where
    is_file = not . (== "--") . take 2

    usage = do
      prg <- getProgName
      putStr $ unlines
        [ "usage:"
        , "  "++prg++" [test]"
        , "  "++prg++" (-|<in-file>) (-|<out-file>)"
        ]

The Sed Script

loop :: FilePath -> FilePath -> IO ()
loop =
  sed $ Select
    [ Function [re|^%include ${file}(@{%string}) ${rex}(@{%string})$|] TOP include_file
    , Function [re|^.*$|]                                              TOP $ \_ _ _ _->return Nothing
    ]
include_file :: LineNo
             -> Match LBS.ByteString
             -> RELocation
             -> Capture LBS.ByteString
             -> IO (Maybe LBS.ByteString)
include_file _ mtch _ _ = fmap Just $
    extract fp =<< compileRegex re_s
  where
    fp    = prs_s $ captureText [cp|file|] mtch
    re_s  = prs_s $ captureText [cp|rex|]  mtch

    prs_s = maybe (error "includeDoc") T.unpack . parseString

Extracting a Literate Fragment from a Haskell Program Text

extract :: FilePath -> RE -> IO LBS.ByteString
extract fp rex = extr . LBS.lines <$> LBS.readFile fp
  where
    extr lns =
      case parse $ scan rex lns of
        Nothing      -> oops
        Just (lno,n) -> LBS.unlines $ (hdr :) $ (take n $ drop i lns) ++ [ftr]
          where
            i = getZeroBasedLineNo lno

    oops = error $ concat
      [ "failed to locate fragment matching "
      , show $ reSource rex
      , " in file "
      , show fp
      ]

    hdr  = "<div class='includedcodeblock'>"
    ftr  = "</div>"
parse :: [Token] -> Maybe (LineNo,Int)
parse []       = Nothing
parse (tk:tks) = case (tk,tks) of
  (Bra b_ln,Hit:Ket k_ln:_) -> Just (b_ln,count_lines_incl b_ln k_ln)
  _                         -> parse tks
count_lines_incl :: LineNo -> LineNo -> Int
count_lines_incl b_ln k_ln =
  getZeroBasedLineNo k_ln + 1 - getZeroBasedLineNo b_ln
data Token = Bra LineNo | Hit | Ket LineNo   deriving (Show)
scan :: RE -> [LBS.ByteString] -> [Token]
scan rex = grepWithScript
    [ (,) [re|\\begin\{code\}|] $ \i -> chk $ Bra i
    , (,) rex                   $ \_ -> chk   Hit
    , (,) [re|\\end\{code\}|]   $ \i -> chk $ Ket i
    ]
  where
    chk x mtchs = case anyMatches mtchs of
      True  -> Just x
      False -> Nothing

Testing

test :: IO ()
test = do
  test_pp "include" loop "data/pp-test.lhs" "data/include-result.lhs"
  putStrLn "tests passed"