summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Cochrane <adrian@openwork.nz>2020-12-22 20:49:38 +1300
committerAdrian Cochrane <adrian@openwork.nz>2020-12-22 20:50:11 +1300
commitac4767de156fadaf2af635723a36ad6dce96077e (patch)
treefbf25cf1d0af540df8bfcdf6169bedaece1d0dcd
parent663fb7aad3813458f3b80bebaff79ba79ddf5db5 (diff)
downloadhurl-ac4767de156fadaf2af635723a36ad6dce96077e.tar.gz
hurl-ac4767de156fadaf2af635723a36ad6dce96077e.tar.bz2
hurl-ac4767de156fadaf2af635723a36ad6dce96077e.zip
Integrate new HTTP cache.
And while I'm at it, implement HTTP redirects myself to report correct URIs to the caller & cache.
-rw-r--r--src/Network/URI/Cache.hs43
-rw-r--r--src/Network/URI/Fetch.hs71
2 files changed, 80 insertions, 34 deletions
diff --git a/src/Network/URI/Cache.hs b/src/Network/URI/Cache.hs
index 01b47b1..908597f 100644
--- a/src/Network/URI/Cache.hs
+++ b/src/Network/URI/Cache.hs
@@ -1,5 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
-module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP) where
+module Network.URI.Cache(shouldCacheHTTP, cacheHTTP, readCacheHTTP, cleanCacheHTTP) where
import Network.HTTP.Client
import Network.HTTP.Types.Status
import Network.HTTP.Types.Header
@@ -15,6 +15,7 @@ import Data.ByteString.Lazy as Lazy
import System.IO as IO
import System.FilePath
import System.Directory
+import qualified Data.Text as Txt
import Data.Maybe
import Data.Char (isSpace)
@@ -70,7 +71,7 @@ cacheHTTP uri resp | shouldCacheHTTP resp = do
| otherwise = []
cacheHTTP _ _ = return ()
-readCacheHTTP :: URI -> IO (Maybe (String, Lazy.ByteString), Maybe ResponseHeaders)
+readCacheHTTP :: URI -> IO (Maybe (Txt.Text, Lazy.ByteString), Maybe ResponseHeaders)
readCacheHTTP uri = do
cached <- readKV $ uriToString' uri
case cached of
@@ -86,11 +87,25 @@ readCacheHTTP uri = do
-- Cache entry has expired, delete.
when (isJust headers') $ deleteKV $ uriToString' uri
- return (Just (mime, body), headers')
+ return (Just (Txt.pack mime, body), headers')
_ -> return (Nothing, Just [])
+cleanCacheHTTP = void $ do
+ now <- getCurrentTime
+ let tombstone = now
+ dir <- getXdgDirectory XdgCache "nz.geek.adrian.hurl"
+ dirExists <- doesDirectoryExist (dir </> "http")
+ files <- if dirExists then listDirectory (dir </> "http") else return []
+ forM files $ \file -> do
+ exists <- doesFileExist file
+ when exists $ IO.withFile file ReadMode $ \h -> do
+ (headers, _) <- parseHeaders h
+ let hasHeader h = isJust $ lookup h headers
+ validatable = hasHeader "modified" || hasHeader "etag"
+ expires = fromMaybe tombstone (readMaybe =<< lookup "expires" headers)
+ when (now >= expires && not validatable) $ removeFile file
------
--- Key-value storage
@@ -113,17 +128,17 @@ openKV key mode act = do
if exists then Just <$> IO.withFile path mode act else return Nothing
readKV key = openKV key ReadMode parseHeaders
- where
- parseHeaders h = do
- line <- IO.hGetLine h
- case L.break isSpace $ strip' line of
- ("", "") -> do
- body <- Lazy.hGetContents h
- return ([], body)
- (key, value) -> do
- (headers, body) <- parseHeaders h
- return ((key, strip' value):headers, body)
- strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace
+
+parseHeaders h = do
+ line <- IO.hGetLine h
+ case L.break isSpace $ strip' line of
+ ("", "") -> do
+ body <- Lazy.hGetContents h
+ return ([], body)
+ (key, value) -> do
+ (headers, body) <- parseHeaders h
+ return ((key, strip' value):headers, body)
+strip' = L.dropWhile isSpace . L.dropWhileEnd isSpace
writeKV key (headers, body) = void $ openKV key WriteMode $ \h -> do
forM headers $ \(key, value) -> do
diff --git a/src/Network/URI/Fetch.hs b/src/Network/URI/Fetch.hs
index 12fea99..b643b61 100644
--- a/src/Network/URI/Fetch.hs
+++ b/src/Network/URI/Fetch.hs
@@ -2,7 +2,7 @@
{-# LANGUAGE OverloadedStrings #-}
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
-module Network.URI.Fetch(Session(locale, aboutPages, redirectCount), newSession,
+module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession,
fetchURL, fetchURL', fetchURLs, mimeERR, htmlERR,
dispatchByMIME, saveDownload, downloadToURI,
-- logging API
@@ -44,6 +44,9 @@ import qualified Network.HTTP.Client.OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import Network.HTTP.Types
import Data.List (intercalate)
+import Control.Concurrent (forkIO)
+
+import Network.URI.Cache
#endif
#ifdef WITH_RAW_CONNECTIONS
@@ -89,7 +92,9 @@ data Session = Session {
-- | Log of timestamped/profiled URL requests
requestLog :: MVar [LogRecord],
-- | How many redirects to follow for Gemini or HTTP(S) requests
- redirectCount :: Int
+ redirectCount :: Int,
+ -- | Whether to cache network responses, avoiding sending requests
+ cachingEnabled :: Bool
}
data LogRecord = LogRecord {
@@ -149,7 +154,8 @@ newSession' appname = do
locale = ietfLocale,
aboutPages = [],
requestLog = log,
- redirectCount = 5
+ redirectCount = 5,
+ cachingEnabled = True
}
llookup key fallback map = fallback `fromMaybe` listToMaybe [v | (k, v) <- map, k == key]
@@ -231,23 +237,48 @@ fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath
#ifdef WITH_HTTP_URI
fetchURL' session accept@(defaultMIME:_) uri | uriScheme uri `elem` ["http:", "https:"] = do
- request <- HTTP.requestFromURI uri
- response <- HTTP.httpLbs request {
- HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
- HTTP.requestHeaders = [
- ("Accept", C8.pack $ intercalate ", " accept),
- ("Accept-Language", C8.pack $ intercalate ", " $ locale session)
- ],
- HTTP.redirectCount = redirectCount session
- } $ managerHTTP session
- return $ case (
- HTTP.responseBody response,
- [val | ("content-type", val) <- HTTP.responseHeaders response]
- ) of
- ("", _) -> (uri, mimeERR, Right $ B.fromStrict $ statusMessage $ HTTP.responseStatus response)
- (response, (mimetype:_)) -> let mime = Txt.toLower $ convertCharset "utf-8" mimetype
- in resolveCharset' uri (L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime) response
- (response, []) -> (uri, defaultMIME, Right response)
+ cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing)
+ response <- case cached of
+ (Just (mime, body), Nothing) -> return $ Right (mime, body)
+ (cached, cachingHeaders) -> do
+ request <- HTTP.requestFromURI uri
+ response <- HTTP.httpLbs request {
+ HTTP.cookieJar = Nothing, -- Will only be supported by Rhapsode when submitting a form.
+ HTTP.requestHeaders = [
+ ("Accept", C8.pack $ intercalate ", " accept),
+ ("Accept-Language", C8.pack $ intercalate ", " $ locale session)
+ ] ++ fromMaybe [] cachingHeaders,
+ HTTP.redirectCount = 0
+ } $ managerHTTP session
+ case (
+ HTTP.responseStatus response,
+ HTTP.responseBody response,
+ [val | ("content-type", val) <- HTTP.responseHeaders response]
+ ) of
+ (Status 304 _, _, _) | Just cached'@(_, body) <- cached -> do
+ cacheHTTP uri $ response { HTTP.responseBody = body }
+ return $ Right cached'
+ -- Manually handle redirects so the caller & HTTP cache gets the correct URI.
+ (Status code _, _, _) | code > 300 && code < 400,
+ Just location <- lookup "location" $ HTTP.responseHeaders response,
+ Just uri' <- parseURIReference $ C8.unpack location ->
+ return $ Left $ relativeTo uri' uri
+ (Status _ msg, "", _) -> return $ Right (Txt.pack mimeERR, B.fromStrict msg)
+ (_, body, (mimetype:_)) -> do
+ cacheHTTP uri response
+ forkIO cleanCacheHTTP -- Try to keep diskspace down...
+
+ let mime = Txt.toLower $ convertCharset "utf-8" mimetype
+ return $ Right (mime, body)
+ (_, response, []) -> return $ Right (Txt.pack defaultMIME, response)
+
+ case response of
+ Left redirect ->
+ let session' = session { redirectCount = redirectCount session - 1 }
+ in fetchURL' session' accept redirect
+ Right (mime, body) ->
+ let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
+ in return $ resolveCharset' uri mime' body
`catches` [
Handler $ \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e),
Handler $ \(ErrorCall msg) -> do return (uri, mimeERR, Left $ Txt.pack msg)