import Control.Monad import qualified Data.ByteString.Lazy.Char8 as Bytes import Data.Function(on) import Data.List(sort,sortBy) import Data.Time import System.Directory import System.Environment import System.Locale import Text.Regex {- Merge daily logs into monthly. Prefix every line with date, too. And merge monthly logs (except this year's) into yearly. cd to the right directory before running this program. Add a command line parameter to turn on dry-run: just prints filenames affected and leaves files alone Example crontab line: 1 0 * * * cd $HOME/log && $HOME/bin/mergelogs -} main = do dryrun <- get'dryrun when dryrun (putStrLn "dryrun mode") (today, this'year) <- getToday -- merge daily logs (except today's) to monthly getFilenames "[0-9]{8}" today >>= mapM_ (transfer dryrun 6 True) -- merge monthly logs (except this year's) to yearly getFilenames "[0-9]{6}" (this'year ++ "[0-9]{2}") >>= mapM_ (transfer dryrun 4 False) {- a note on why this doesn't hurt whee-.log: matches this'year ++ "[0-9]{2}", so getFilenames will skip it -} {- merge infilename to outfilename. outfilename is constructed from: prefix, date's first 'trunc' digits, suffix when merging, add_date decides whether to prepend date to every line if dryrun, just print infilename and outfilename -} transfer dryrun trunc add_date (infilename,prefix,date,suffix) = if dryrun then do putStrLn ("would append " ++ infilename ++ " to " ++ outfilename) else do s <- Bytes.readFile infilename Bytes.appendFile outfilename (if add_date then f s else s) removeFile infilename where outfilename = prefix ++ take trunc date ++ suffix f = Bytes.unlines . map (Bytes.append (Bytes.pack (date ++ " "))) . Bytes.lines {- remark: using ByteString.Lazy.Char8 not only increases efficiency but also preserves character encoding (actual bytes) and also not abort just because the bytes don't look like utf8 -} {- collect filenames that: 1. match 'accept'. say the filename is decomposed to (prefix, d, suffix), where d fits 'accept' (and prefix doesn't) 2. d does not match 'reject' returns sorted [(filename, prefix, d, suffix)], sorted by d -} getFilenames accept reject = do xs <- getDirectoryContents "." return (sortBy (compare `on` d'of) (match_filter xs)) where r_accept = mkRegex accept r_reject = mkRegex reject d'of (_,_,d,_) = d match_filter xs = go xs where go [] = [] go (x:xs) | Just (pre,d,suf,_) <- matchRegexAll r_accept x, Nothing <- matchRegex r_reject d = (x,pre,d,suf) : go xs | otherwise = go xs -- returns (today, this year), e.g., ("20130617", "2013") getToday = do z <- getZonedTime let today = formatTime defaultTimeLocale "%Y%m%d" z return (today, take 4 today) get'dryrun = do s <- getArgs return (not (null s))