summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Cochrane <alcinnz@lavabit.com>2022-07-23 20:32:14 +1200
committerAdrian Cochrane <alcinnz@lavabit.com>2022-07-23 20:32:14 +1200
commit12c81bf0660acab500c2c9c234803e0fd1a47a0d (patch)
tree6fb4d05f9b50fb4b7fee77ccd36651c501eb7dbc
parent7d31eda45dc9d615cbfa084a2149b17c5e313a85 (diff)
downloadhurl-12c81bf0660acab500c2c9c234803e0fd1a47a0d.tar.gz
hurl-12c81bf0660acab500c2c9c234803e0fd1a47a0d.tar.bz2
hurl-12c81bf0660acab500c2c9c234803e0fd1a47a0d.zip
Bug/crash/feature fixes for submitting forms.
Now longer crashes when attempting to save cookies either in-memory or on-disk. On-disk cookie storage is now resilient to changes in http-client internals. HURL now handles encoding form submissions as per caller-specified MIMEtype. HURL now supports a choice of HTTP method when submitting forms. Bugfix means now content negotiation, caching, & form data are now actually sent to the server.
-rw-r--r--Main2.hs30
-rw-r--r--hurl.cabal19
-rw-r--r--src/Network/URI/CookiesDB.hs135
-rw-r--r--src/Network/URI/Fetch.hs96
4 files changed, 258 insertions, 22 deletions
diff --git a/Main2.hs b/Main2.hs
new file mode 100644
index 0000000..a8fa325
--- /dev/null
+++ b/Main2.hs
@@ -0,0 +1,30 @@
+{-# LANGUAGE OverloadedStrings #-}
+module Main where
+
+import Network.URI.Fetch
+-- Input parsing
+import System.Environment (getArgs)
+import Network.URI (parseURI, nullURI)
+import Data.Maybe (fromJust)
+-- Where to save files
+import System.Directory (getCurrentDirectory)
+import qualified Data.ByteString.Char8 as C8
+
+main :: IO ()
+main = do
+ url:encoding:args <- getArgs
+ let url' = fromJust $ parseURI url
+ putStrLn encoding
+ session <- newSession
+ dir <- getCurrentDirectory
+
+ resp <- submitURL' session ["*/*"] url' "POST" (C8.pack encoding) $map parseArg args
+ res <- saveDownload nullURI dir resp
+ putStrLn $ show res
+
+parseArg ('-':arg) | (key, '=':value) <- break (== '=') arg = (key, Left value)
+ | otherwise = (arg, Left "")
+parseArg ('+':arg) | (key, '=':value) <- break (== '=') arg = (key, Right value)
+ | otherwise = (arg, Left "")
+parseArg arg | (key, '=':value) <- break (== '=') arg = (key, Left value)
+ | otherwise = (arg, Left "")
diff --git a/hurl.cabal b/hurl.cabal
index 2526329..501fa5a 100644
--- a/hurl.cabal
+++ b/hurl.cabal
@@ -114,8 +114,8 @@ library
if flag(http)
CPP-options: -DWITH_HTTP_URI
build-depends: http-client, http-types >= 0.12 && <0.13,
- http-client-openssl, HsOpenSSL
- other-modules: Network.URI.Cache
+ http-client-openssl, HsOpenSSL, time, cookie
+ other-modules: Network.URI.Cache, Network.URI.CookiesDB
if flag(gemini)
CPP-options: -DWITH_GEMINI_URI -DWITH_RAW_CONNECTIONS
build-depends: HsOpenSSL, openssl-streams >= 1.2 && < 1.3, io-streams >= 1.5 && < 1.6
@@ -152,3 +152,18 @@ executable hurl
default-language: Haskell2010
ghc-options: -threaded
+
+executable hurl-post
+ -- .hs file containing the Main module
+ main-is: Main2.hs
+
+ -- Other library packages from which modules are imported
+ build-depends: base >= 4.9 && <5, hurl, network-uri, directory, bytestring
+
+ -- Directories containing source files.
+ hs-source-dirs: .
+
+ -- Base languages which the package is written in.
+ default-language: Haskell2010
+
+ ghc-options: -threaded
diff --git a/src/Network/URI/CookiesDB.hs b/src/Network/URI/CookiesDB.hs
new file mode 100644
index 0000000..b7b5365
--- /dev/null
+++ b/src/Network/URI/CookiesDB.hs
@@ -0,0 +1,135 @@
+-- | Read & write Netscape Navigator cookies format.
+{-# LANGUAGE OverloadedStrings, RecordWildCards #-}
+module Network.URI.CookiesDB (readCookies, writeCookies) where
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as C
+import Network.HTTP.Client
+import System.Directory (doesFileExist)
+
+import Web.Cookie (formatCookieExpires, parseCookieExpires)
+import Data.Maybe (fromMaybe, mapMaybe)
+import Data.Time.Clock (nominalDay, getCurrentTime, addUTCTime, UTCTime)
+
+readCookies :: FilePath -> IO CookieJar
+readCookies filepath = do
+ exists <- doesFileExist filepath
+ if exists then do
+ file <- B.readFile filepath
+ now <- getCurrentTime
+ return $ createCookieJar $ readCookies' now file
+ else return $ createCookieJar []
+readCookies' :: UTCTime -> B.ByteString -> [Cookie]
+readCookies' now = mapMaybe (readCookie' now) . C.lines
+readCookie' :: UTCTime -> B.ByteString -> Maybe Cookie
+readCookie' now = readCookie now . C.split '\t'
+readCookie :: UTCTime -> [B.ByteString] -> Maybe Cookie
+readCookie now [domain, _, path, secure, expiration, name, value] =
+ Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+
+ cookie_creation_time = now,
+ cookie_last_access_time = now,
+ cookie_persistent = True,
+ cookie_host_only = False,
+ cookie_http_only = False
+ }
+readCookie now [domain, _, path, secure, expiration, name, value, httpOnly, session] =
+ Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+ cookie_http_only = b httpOnly,
+ cookie_persistent = not $ b session,
+
+ cookie_creation_time = now,
+ cookie_last_access_time = now,
+ cookie_host_only = False
+ }
+readCookie now [domain, _, path, secure, expiration, name, value,
+ httpOnly, session, sameSite] = Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+ cookie_http_only = b httpOnly,
+ cookie_persistent = not $ b session,
+ cookie_host_only = sameSite == "STRICT",
+
+ cookie_creation_time = now,
+ cookie_last_access_time = now
+ }
+readCookie now [domain, _, path, secure, expiration, name, value,
+ httpOnly, session, sameSite, _] = Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+ cookie_http_only = b httpOnly,
+ cookie_persistent = not $ b session,
+ cookie_host_only = sameSite == "STRICT",
+
+ cookie_creation_time = now,
+ cookie_last_access_time = now
+ }
+readCookie now [domain, _, path, secure, expiration, name, value,
+ httpOnly, session, sameSite, _, creation] = Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+ cookie_http_only = b httpOnly,
+ cookie_persistent = not $ b session,
+ cookie_host_only = sameSite == "STRICT",
+ cookie_creation_time = fromMaybe now $ parseCookieExpires creation,
+ cookie_last_access_time = fromMaybe now $ parseCookieExpires creation
+ }
+readCookie now (domain:_:path:secure:expiration:name:value:
+ httpOnly:session:sameSite:_:creation:access:_) = Just Cookie {
+ cookie_domain = domain,
+ cookie_path = path,
+ cookie_secure_only = b secure,
+ cookie_expiry_time = fromMaybe (addUTCTime nominalDay now) $ parseCookieExpires expiration,
+ cookie_name = name,
+ cookie_value = value,
+ cookie_http_only = b httpOnly,
+ cookie_persistent = not $ b session,
+ cookie_host_only = sameSite == "STRICT",
+ cookie_creation_time = fromMaybe now $ parseCookieExpires creation,
+ cookie_last_access_time = fromMaybe now $ parseCookieExpires access
+ }
+readCookie _ _ = Nothing
+b "TRUE" = True
+b _ = False
+
+writeCookies :: FilePath -> CookieJar -> Bool -> IO ()
+writeCookies filepath cookies isSession = do
+ B.writeFile filepath $ writeCookies' isSession $ destroyCookieJar cookies
+writeCookies' :: Bool -> [Cookie] -> B.ByteString
+writeCookies' isSession = C.unlines . map writeCookie' . filter shouldSaveCookie
+ where
+ shouldSaveCookie | isSession = cookie_persistent
+ | otherwise = const True
+writeCookie' :: Cookie -> B.ByteString
+writeCookie' Cookie {..} = C.intercalate "\t" [
+ cookie_domain, "TRUE", cookie_path, b' cookie_secure_only,
+ formatCookieExpires cookie_expiry_time, cookie_name, cookie_value,
+ b' cookie_http_only, b' $ not cookie_persistent,
+ if cookie_host_only then "STRICT" else "LAX", "MEDIUM",
+ formatCookieExpires cookie_creation_time,
+ formatCookieExpires cookie_last_access_time]
+b' True = "TRUE"
+b' False = "FALSE"
diff --git a/src/Network/URI/Fetch.hs b/src/Network/URI/Fetch.hs
index 45404b9..a243ab9 100644
--- a/src/Network/URI/Fetch.hs
+++ b/src/Network/URI/Fetch.hs
@@ -3,7 +3,7 @@
-- | Retrieves documents for a URL, supporting multiple URL schemes that can be
-- disabled at build-time for reduced dependencies.
module Network.URI.Fetch(Session(locale, aboutPages, redirectCount, cachingEnabled), newSession,
- fetchURL, fetchURL', fetchURLs, submitURL, mimeERR, htmlERR,
+ fetchURL, fetchURL', fetchURLs, submitURL, submitURL', mimeERR, htmlERR,
dispatchByMIME, appsForMIME, Application(..), dispatchByApp,
saveDownload, downloadToURI,
-- logging API
@@ -13,16 +13,20 @@ import Network.URI.Types
import qualified Data.Text as Txt
import Data.Text (Text)
+import qualified Data.Text.Encoding as Txt
import Network.URI
import qualified Data.ByteString as Strict
import Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Char8 as C8
+import qualified Data.ByteString.Builder as Builder
import Network.URI.Charset
import Control.Exception
import System.IO.Error (isEOFError)
import Control.Concurrent.Async (forConcurrently)
+import qualified Data.Maybe as M
+
-- for about: URIs & port parsing, all standard lib
import Data.Maybe (fromMaybe, listToMaybe, isJust)
import Data.Either (isLeft)
@@ -44,6 +48,7 @@ import Data.List as L
#ifdef WITH_HTTP_URI
import qualified Network.HTTP.Client as HTTP
+import qualified Network.HTTP.Client.MultipartFormData as HTTP
import qualified Network.HTTP.Client.OpenSSL as TLS
import qualified OpenSSL.Session as TLS
import Network.HTTP.Types
@@ -51,6 +56,7 @@ import Data.List (intercalate)
import Control.Concurrent (forkIO)
import Network.URI.Cache
+import Network.URI.CookiesDB
#endif
#ifdef WITH_RAW_CONNECTIONS
@@ -132,12 +138,10 @@ newSession' appname = do
TLS.contextSetVerificationMode httpsCtxt $ TLS.VerifyPeer True True Nothing
managerHTTP' <- HTTP.newManager $ TLS.opensslManagerSettings $ return httpsCtxt
- cookiesDir <- getXdgDirectory XdgData "nz.geek.adrian.hurl.cookies"
+ cookiesDir <- getXdgDirectory XdgData "nz.geek.adrian.hurl.cookies2"
let cookiesPath' = cookiesDir </> appname
- cookiesExist <- doesFileExist cookiesPath'
- cookies <- if cookiesExist then readMaybe <$> readFile cookiesPath' else return Nothing
+ cookies' <- readCookies cookiesPath'
now <- getCurrentTime
- let cookies' = HTTP.createCookieJar $ fromMaybe [] cookies
cookieJar <- newMVar $ HTTP.evictExpiredCookies cookies' now
cookieJar' <- newMVar $ HTTP.createCookieJar []
#endif
@@ -217,24 +221,75 @@ mimeERR = "txt/x-error\t"
htmlERR = "html/x-error\t"
submitURL :: Session -> [String] -> URI -> Text -> String -> IO (URI, String, Either Text ByteString)
+-- | See submitURL', preserved for backwards compatability.
+-- This is a little more cumbersome to use, & doesn't support file uploads.
+-- Was designed naively based on convenience of initial caller.
+submitURL s a u m q =
+ submitURL' s a u (Txt.encodeUtf8 m) "application/x-www-form-urlencoded" $
+ Prelude.map parseQuery $ Txt.splitOn "&" $ Txt.pack q
+ where
+ parseQuery q = let (key, value) = Txt.breakOn "=" q in if Txt.null value
+ then (decode key, Left "")
+ else (decode key, Left $ decode $ Txt.tail value)
+ decode = unEscapeString . Txt.unpack
+-- | Uploads given key-value pairs to the specified URL using the specified HTTP method & encoding.
+-- The key-value pairs may specify filepaths, in which case the method must be "POST"
+-- and the encoding must be "multipart/form-data" for that data to get sent.
+--
+-- Unsupported encodings (values other than "application/x-www-form-urlencoded",
+-- "text/plain", or "multipart/form-data") omits the key-value pairs from the request.
+submitURL' :: Session -> [String] -> URI -> Strict.ByteString -> Strict.ByteString ->
+ [(String, Either String FilePath)] -> IO (URI, String, Either Text ByteString)
#ifdef WITH_HTTP_URI
-submitURL session accept uri "POST" query | uriScheme uri `elem` ["http:", "https:"] = do
+addHTTPBody mime body req = return req {
+ HTTP.requestHeaders = (hContentType, mime) :
+ Prelude.filter (\(x, _) -> x /= hContentType) (HTTP.requestHeaders req),
+ HTTP.requestBody = HTTP.RequestBodyBS $ C8.pack body
+ }
+packQuery :: [(String, Either String FilePath)] -> C8.ByteString -> HTTP.Request -> IO HTTP.Request
+packQuery query mime@"application/x-www-form-urlencoded" =
+ addHTTPBody mime $ encodeQuery query
+packQuery query mime@"text/plain" = addHTTPBody mime $
+ Prelude.unlines [value | (key, Left value) <- query, not $ null value]
+packQuery q "multipart/form-data" = HTTP.formDataBody $ Prelude.map encodePart q
+ where
+ encodePart (key, Left value) = HTTP.partBS (Txt.pack key) (C8.pack value)
+ encodePart (key, Right value) =
+ -- C:\fakepath\ is part of the webstandards now & I might as well use it.
+ -- Ancient browsers exposed the full filepath which was a security vulnerability.
+ -- Now to avoid breaking servers relying on this behaviour we send
+ -- a fake Windows filepath.
+ HTTP.partFileRequestBodyM (Txt.pack key) ("C:\\fakepath\\" ++ takeFileName value) $ do
+ size <- fromInteger <$> withBinaryFile value ReadMode hFileSize
+ body <- B.readFile value
+ return $ HTTP.RequestBodyBuilder size $ Builder.lazyByteString body
+packQuery _ _ = return -- Do not upload data if requested to do so in an invalid format.
+submitURL' session accept uri method encoding query | uriScheme uri `elem` ["http:", "https:"] = do
-- HURL is very strict on when it allows cookies to be set: Only POST HTTP requests are considered consent.
-- For the sake of most webframeworks' CSRF protection, cookies from retrieving the form are retroactively set.
csrfCookies <- case retroactiveCookies session of {
Just cookies -> Just <$> readMVar cookies;
Nothing -> return Nothing
}
- fetchHTTPCached session accept uri (\req -> req {
+ fetchHTTPCached session False accept uri (\req -> do
+ ret <- packQuery query encoding req
+ return ret {
HTTP.cookieJar = firstJust csrfCookies $ HTTP.cookieJar req,
- HTTP.method = "POST",
- HTTP.requestBody = HTTP.RequestBodyBS $ C8.pack query
- }) $ \resp -> do
+ HTTP.method = method
+ }) $ \resp -> if method /= "POST" then return () else do
let cookies = HTTP.responseCookieJar resp
- putMVar (globalCookieJar session) cookies
- writeFile (cookiesPath session) $ show $ HTTP.destroyCookieJar cookies
+ swapMVar (globalCookieJar session) cookies
+ writeCookies (cookiesPath session) cookies False
#endif
-submitURL session mimes uri _method query = fetchURL' session mimes uri { uriQuery = '?':query }
+submitURL' session mimes uri _method _encoding query = fetchURL' session mimes uri {
+ uriQuery = '?':encodeQuery query }
+encodeQuery :: [(String, Either String FilePath)] -> String
+encodeQuery query = intercalate "&" $ M.mapMaybe encodePart query
+ where
+ encodePart (key, Left "") = Just $ escape key
+ encodePart (key, Left value) = Just (escape key ++ '=':escape value)
+ encodePart _ = Nothing
+ escape = escapeURIString isUnescapedInURIComponent
-- | As per `fetchURL`, but also returns the redirected URI.
fetchURL' :: Session -> [String] -> URI -> IO (URI, String, Either Text ByteString)
@@ -289,10 +344,11 @@ fetchURL' Session {aboutPages = pages} _ url@URI {uriScheme = "about:", uriPath
#ifdef WITH_HTTP_URI
fetchURL' session accept uri | uriScheme uri `elem` ["http:", "https:"] =
- fetchHTTPCached session accept uri id saveCookies
+ fetchHTTPCached session (cachingEnabled session) accept uri return saveCookies
where
saveCookies resp
- | Just cookies <- retroactiveCookies session = putMVar cookies $ HTTP.responseCookieJar resp
+ | Just cookies <- retroactiveCookies session =
+ void $swapMVar cookies $HTTP.responseCookieJar resp
| otherwise = return ()
#endif
@@ -395,14 +451,14 @@ dispatchByApp _ _ _ _ = return False
#endif
#ifdef WITH_HTTP_URI
-fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do
- cached <- if cachingEnabled session then readCacheHTTP uri else return (Nothing, Nothing)
+fetchHTTPCached session shouldCache accept@(defaultMIME:_) uri cbReq cbResp = do
+ cached <- if shouldCache then readCacheHTTP uri else return (Nothing, Nothing)
response <- case cached of
(Just (mime, body), Nothing) -> return $ Right (mime, body)
(cached, cachingHeaders) -> do
request <- HTTP.requestFromURI uri
cookieJar <- readMVar $ globalCookieJar session
- let request' = cbReq $ request {
+ request'<- cbReq request {
HTTP.cookieJar = Just $ cookieJar,
HTTP.requestHeaders = [
("Accept", C8.pack $ intercalate ", " accept),
@@ -410,7 +466,7 @@ fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do
] ++ fromMaybe [] cachingHeaders,
HTTP.redirectCount = 0
}
- response <- HTTP.httpLbs request $ managerHTTP session
+ response <- HTTP.httpLbs request' $ managerHTTP session
cbResp response
case (
HTTP.responseStatus response,
@@ -442,7 +498,7 @@ fetchHTTPCached session accept@(defaultMIME:_) uri cbReq cbResp = do
let mime' = L.map (Txt.unpack . Txt.strip) $ Txt.splitOn ";" mime
in return $ resolveCharset' uri mime' body
`catch` \e -> do return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ Http e)
-fetchHTTPCached session [] uri _ _ =
+fetchHTTPCached session _ [] uri _ _ =
return (uri, mimeERR, Left $ Txt.pack $ trans (locale session) $ UnsupportedMIME "")
#endif