summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdrian Cochrane <alcinnz@lavabit.com>2022-09-14 13:13:37 +1200
committerAdrian Cochrane <alcinnz@lavabit.com>2022-09-14 13:13:37 +1200
commitf605efd1872c427894e67aa40893cad9ce8ce577 (patch)
treea174d9a98429e2ed95c906a165876a70d5cb914a
parent53212960af41818f0cc0ec936e14533f6eebf77b (diff)
downloadhurl-f605efd1872c427894e67aa40893cad9ce8ce577.tar.gz
hurl-f605efd1872c427894e67aa40893cad9ce8ce577.tar.bz2
hurl-f605efd1872c427894e67aa40893cad9ce8ce577.zip
Release v0.1 decoupled from Rhapsode, with CSV/TSV & refined tablesorting!
-rw-r--r--hurl-xml/hurl-xml.cabal6
-rw-r--r--hurl-xml/src/Network/MIME/XML.hs155
-rw-r--r--hurl-xml/src/Network/MIME/XML/Table.hs70
3 files changed, 175 insertions, 56 deletions
diff --git a/hurl-xml/hurl-xml.cabal b/hurl-xml/hurl-xml.cabal
index ef44081..7e7ad3a 100644
--- a/hurl-xml/hurl-xml.cabal
+++ b/hurl-xml/hurl-xml.cabal
@@ -16,7 +16,7 @@ version: 0.1.0.0
synopsis: Fetch parsed XML & possibly CSS for a URL based on MIMEtype.
-- A longer description of the package.
-description: API bindings between HURL, XML-Conduit, & Haskell Stylist.
+description: API bindings between HURL, XML-Conduit, & Haskell Stylist. Supports HTML, XML, Gemini, TSV, CSV, or plaintext files. Can automatically extract & apply CSS stylesheets if Haskell Stylist is used, in which case CSS files cause the previous page to get restyled.
-- URL for the project homepage or repository.
homepage: https://rhapsode.adrian.geek.nz/
@@ -63,8 +63,8 @@ library
build-depends: base >=4.12 && <4.13, text, bytestring, containers,
data-default-class,
time, directory, filepath, temporary,
- xml-conduit, html-conduit,
- network-uri, hurl, file-embed,
+ xml-conduit >= 1.8 && <2, html-conduit >= 1.3 && <2,
+ network-uri, hurl >= 2.2 && <3, file-embed >= 0.0.15 && <0.1,
css-syntax, stylist-traits, xml-conduit-stylist >= 3 && <4
-- Directories containing source files.
diff --git a/hurl-xml/src/Network/MIME/XML.hs b/hurl-xml/src/Network/MIME/XML.hs
index f260580..a87865b 100644
--- a/hurl-xml/src/Network/MIME/XML.hs
+++ b/hurl-xml/src/Network/MIME/XML.hs
@@ -50,12 +50,13 @@ data Page styles = Page {
backStack :: [(String, URI)],
forwardStack :: [(String, URI)],
-- Probably don't need an MVar here, but let's be safe!
- visitedURLs :: Set Text
+ visitedURLs :: Set Text,
+ appName :: String
}
-loadVisited :: IO (Set Text)
-loadVisited = do
- dir <- getXdgDirectory XdgData "rhapsode"
+loadVisited :: String -> IO (Set Text)
+loadVisited appname = do
+ dir <- getXdgDirectory XdgData appname
let path = dir </> "history.gmni"
exists <- doesFileExist path
@@ -68,33 +69,35 @@ loadVisited = do
readStrict path = do s <- Prelude.readFile path; length s `seq` return s
utf8' bytes = convertCharset "utf-8" $ B.toStrict bytes
+aCCEPT = ["text/xml", "application/xml", "text/html", "text/gemini",
+ "text/csv", "text/tab-separated-values", "text/css", "text/*", "*/*"]
-fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "nocache" } =
- fetchDocument http { cachingEnabled = False } referer mime $ pageURL referer
-fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "novalidate" } =
- fetchDocument http { validateCertificates = False } referer mime $ pageURL referer
-fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/back" } =
- fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False
+fetchDocument http referer URI { uriScheme = "action:", uriPath = "nocache" } =
+ fetchDocument http { cachingEnabled = False } referer $ pageURL referer
+fetchDocument http referer URI { uriScheme = "action:", uriPath = "novalidate" } =
+ fetchDocument http { validateCertificates = False } referer $ pageURL referer
+fetchDocument http referer URI { uriScheme = "action:", uriPath = "history/back" } =
+ fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer (-1)
-fetchDocument http referer mime URI { uriScheme = "action:", uriPath = "history/forward" } =
- fetchURL' http mime (pageURL referer') >>= parseDocument' referer' http False
+fetchDocument http referer URI { uriScheme = "action:", uriPath = "history/forward" } =
+ fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer' http False
where referer' = shiftHistory referer 1
-fetchDocument http referer mime URI {
+fetchDocument http referer URI {
uriScheme = "action:", uriPath = 'h':'i':'s':'t':'o':'r':'y':'/':x
} | Just x' <- readMaybe x, referer' <- shiftHistory referer x' =
- fetchURL' http mime (pageURL referer') >>= parseDocument' referer http False
-fetchDocument http referer mime URI { uriScheme = "app:", uriPath = appID } = do
+ fetchURL' http aCCEPT (pageURL referer') >>= parseDocument' referer http False
+fetchDocument http referer URI { uriScheme = "app:", uriPath = appID } = do
dispatchByApp http Application {
name = "", icon = nullURI, description = "",
appId = appID
} (pageMIME referer) $ pageURL referer
return referer -- TODO play an error or success sound
-fetchDocument http referer@Page { pageURL = uri0 } mime uri@URI { uriFragment = anchor }
+fetchDocument http referer@Page { pageURL = uri0 } uri@URI { uriFragment = anchor }
| uri { uriFragment = "" } == uri0 { uriFragment = "" } = return referer {
html = applySortDoc anchor $ html referer,
pageURL = uri
}
-fetchDocument http referer mime uri = fetchURL' http mime uri >>= parseDocument' referer http True
+fetchDocument http referer uri = fetchURL' http aCCEPT uri >>= parseDocument' referer http True
shiftHistory :: Page style -> Integer -> Page style
shiftHistory self 0 = self
@@ -129,26 +132,27 @@ parseDocument' ref@Page {visitedURLs = hist} sess saveHist resp@(URI {uriFragmen
parseDocument :: StyleSheet s => Page s -> Session -> (URI, String, Either Text B.ByteString)
-> IO (Page s)
parseDocument ref sess (uri, "html/x-error\t", resp) = parseDocument ref sess (uri, "text/html", resp)
-parseDocument Page {initCSS = css'} _ (uri, "text/html", Left text) =
- pageForDoc css' uri $ HTML.parseLT $ fromStrict text
-parseDocument Page {initCSS = css'} _(uri, "text/html", Right bytes) =
- pageForDoc css' uri $ HTML.parseLBS bytes
-parseDocument Page {initCSS = css'} _
+parseDocument Page {initCSS = css', appName = name} _ (uri, "text/html", Left text) =
+ pageForDoc css' name uri $ HTML.parseLT $ fromStrict text
+parseDocument Page {initCSS = css', appName = name} _(uri, "text/html", Right bytes) =
+ pageForDoc css' name uri $ HTML.parseLBS bytes
+parseDocument Page {initCSS = css', appName = name} _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Left text) =
- pageForDoc css' uri $ parseGemini (Just lang) text
-parseDocument Page {initCSS = css'} _
+ pageForDoc css' name uri $ parseGemini (Just lang) text
+parseDocument Page {initCSS = css', appName = name} _
(uri, 't':'e':'x':'t':'/':'g':'e':'m':'i':'n':'i':';':'l':'a':'n':'g':'=':lang, Right bytes) =
- pageForDoc css' uri $ parseGemini (Just lang) $ utf8' bytes
-parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Left text) =
- pageForDoc css' uri $ parseGemini Nothing text
-parseDocument Page {initCSS = css'} _ (uri, "text/gemini", Right bytes) =
- pageForDoc css' uri $ parseGemini Nothing $ utf8' bytes
+ pageForDoc css' name uri $ parseGemini (Just lang) $ utf8' bytes
+parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Left text) =
+ pageForDoc css' name uri $ parseGemini Nothing text
+parseDocument Page {initCSS = css', appName = name} _ (uri, "text/gemini", Right bytes) =
+ pageForDoc css' name uri $ parseGemini Nothing $ utf8' bytes
parseDocument a b (a', b'@"text/css", Right bytes) =
parseDocument a b (a', b', Left $ applyCSScharset (map Txt.unpack charsets) $ B.toStrict bytes)
-parseDocument referer@Page {pageURL = uri', initCSS = css'} _ (uri, "text/css", Left text)
+parseDocument referer@Page {pageURL = uri', initCSS = css', appName = name} _
+ (uri, "text/css", Left text)
| URI {uriAuthority = Just host} <- pageURL referer = do
-- Save this per-domain setting
- dir <- (</> "domain") <$> getXdgDirectory XdgConfig "rhapsode"
+ dir <- (</> "domain") <$> getXdgDirectory XdgConfig name
createDirectoryIfMissing True dir
Txt.writeFile (dir </> uriRegName host) $
CSSTok.serialize $ map absolutizeCSS $ CSSTok.tokenize text
@@ -162,26 +166,35 @@ parseDocument referer@Page {pageURL = uri', initCSS = css'} _ (uri, "text/css",
absolutizeCSS (CSSTok.Url text) | Just rel <- parseRelativeReference $ Txt.unpack text =
CSSTok.Url $ Txt.pack $ uriToStr' $ relativeTo rel uri'
absolutizeCSS tok = tok
+parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Left body) =
+ pageForDoc css' name uri $ parseDelimitedToTable ',' body
+parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/tab-separated-values", Left body) =
+ pageForDoc css' name uri $ parseDelimitedToTable '\t' body
+parseDocument ref@Page {initCSS = css', appName = name} _ (uri, "text/csv", Right body) =
+ pageForDoc css' name uri $ parseDelimitedToTable ',' $ utf8' body
+parseDocument ref@Page {initCSS = css', appName = name} _
+ (uri, "text/tab-separated-values", Right body) =
+ pageForDoc css' name uri $ parseDelimitedToTable '\t' $ utf8' body
parseDocument ref sess (uri, mime, body) | mime' /= mime = parseDocument ref sess (uri, mime', body)
where mime' = takeWhile (/= ';') mime
-parseDocument Page {initCSS = css'} _ (uri, _, Left text)
- | Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' uri doc
- | otherwise = pageForText css' uri text
-parseDocument Page {initCSS = css'} _ (uri, _, Right bytes)
- | Right doc <- XML.parseLBS def bytes = pageForDoc css' uri doc
-parseDocument Page {initCSS = css'} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
+parseDocument Page {initCSS = css', appName = name} _ (uri, _, Left text)
+ | Right doc <- XML.parseText def $ fromStrict text = pageForDoc css' name uri doc
+ | otherwise = pageForText css' name uri text
+parseDocument Page {initCSS = css', appName = name} _ (uri, _, Right bytes)
+ | Right doc <- XML.parseLBS def bytes = pageForDoc css' name uri doc
+parseDocument Page {initCSS = css', appName = name} _ (uri, 't':'e':'x':'t':'/':_, Right bytes) =
-- charset wasn't specified, so assume utf-8.
- pageForText css' uri $ utf8' bytes
-parseDocument Page {initCSS = css'} sess resp@(uri, mime, _) = do
+ pageForText css' name uri $ utf8' bytes
+parseDocument Page {initCSS = css', appName = name} sess resp@(uri, mime, _) = do
dir <- getCurrentDirectory -- TODO find Downloads directory.
ret <- saveDownload nullURI {
uriScheme = "file:",
uriAuthority = Just (URIAuth "" "" "")
} dir resp >>= dispatchByMIME sess mime
- pageForDoc css' uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
+ pageForDoc css' name uri $ HTML.parseLT $ LTxt.pack $ fromMaybe "Unsupported filetype" ret
-pageForText css' uri txt = pageForDoc css' uri XML.Document {
+pageForText css' appname uri txt = pageForDoc css' appname uri XML.Document {
XML.documentPrologue = XML.Prologue [] Nothing [],
XML.documentRoot = XML.Element {
XML.elementName = "pre",
@@ -191,25 +204,26 @@ pageForText css' uri txt = pageForDoc css' uri XML.Document {
XML.documentEpilogue = []
}
-pageForDoc :: StyleSheet s => s -> URI -> Document -> IO (Page s)
-pageForDoc css' uri doc = do
+pageForDoc :: StyleSheet s => s -> String -> URI -> Document -> IO (Page s)
+pageForDoc css' appname uri doc = do
-- See if the user has configured an alternate stylesheet for this domain.
let authorStyle = return $ html2css doc uri css'
styles <- case uriAuthority uri of
Nothing -> authorStyle
Just host -> do
- dir <- getXdgDirectory XdgConfig "rhapsode"
+ dir <- getXdgDirectory XdgConfig appname
let path = dir </> "domain" </> uriRegName host
hasAltStyle <- doesFileExist path
if not hasAltStyle then authorStyle else parse css' <$> Txt.readFile path
- return Page {pageURL = uri, html = doc, css = styles, initCSS = css',
+ return Page {pageURL = uri, html = doc, css = styles,
+ initCSS = css', appName = appname,
-- These fields are all blank, to be filled in later by logHistory & parseDocument'
pageTitle = "", pageMIME = "", apps = [],
backStack = [], forwardStack = [], visitedURLs = Set.empty}
-logHistory hist ret@Page {pageURL = url', html = doc} = do
- dir <- getXdgDirectory XdgData "rhapsode"
+logHistory hist ret@Page {pageURL = url', html = doc, appName = name} = do
+ dir <- getXdgDirectory XdgData name
createDirectoryIfMissing True dir
now <- getCurrentTime
let title = Txt.unpack $ getTitle $ XML.documentRoot doc
@@ -305,3 +319,50 @@ parseGemini' ("```":lines) = go [] lines
parseGemini' (line:lines) = el "p" line : parseGemini' lines
parseGemini' [] = []
+
+--------
+---- TSV, CSV, etc
+--------
+
+parseDelimitedValues _ "" row rows = reverse (reverse row : rows)
+parseDelimitedValues delim ('\r':.cs) row rows = parseDelimitedValues delim cs row rows
+parseDelimitedValues delim ('\n':.cs) row rows = parseDelimitedValues delim cs [] (reverse row : rows)
+parseDelimitedValues delim (c:.'"':.cs) row rows | c == delim =
+ let (value, cs') = inner cs in parseDelimitedValues delim cs' (value:row) rows
+ where
+ inner (x:.y:.cs) | x == delim && y == delim = let (a, b) = inner cs in (delim `Txt.cons` a, b)
+ inner (c:.cs) | c == delim = ("", cs)
+ | otherwise = let (a, b) = inner cs in (c `Txt.cons` a, b)
+ inner "" = ("", "")
+parseDelimitedValues delim (c:.cs) row rows | c == delim =
+ let (value, cs') = Txt.break (`elem` ['\r', '\n', delim]) cs
+ in parseDelimitedValues delim cs' (value:row) rows
+parseDelimitedValues delim cs row rows =
+ let (value, cs') = Txt.break (`elem` ['\r', '\n', delim]) cs
+ in parseDelimitedValues delim cs (value:row) rows
+
+escapeDelimitedValues delim source = map (map inner) $ parseDelimitedValues delim source [] []
+ where
+ inner = Txt.strip . Txt.replace "\\\\" "\\" . Txt.replace "\\n" "\n" .
+ Txt.replace "\\t" "\t" . Txt.replace "\\r" "\r"
+
+parseDelimitedToTable delim source
+ | (head:body) <- filter (not . null) $ escapeDelimitedValues delim source =
+ XML.Document {
+ XML.documentPrologue = XML.Prologue [] Nothing [],
+ XML.documentRoot = XML.Element {
+ XML.elementName = "table",
+ XML.elementAttributes = M.empty,
+ XML.elementNodes = rowToTr "th" head : map (rowToTr "td") body
+ },
+ XML.documentEpilogue = []
+ }
+ | otherwise = XML.Document { -- Empty TSV/CSV/etc
+ XML.documentPrologue = XML.Prologue [] Nothing [],
+ XML.documentRoot = XML.Element "table" M.empty [],
+ XML.documentEpilogue = []
+ }
+rowToTr tagname values = XML.NodeElement $ XML.Element "tr" M.empty $ map inner values
+ where
+ inner = XML.NodeElement . XML.Element tagname M.empty . singleton . XML.NodeContent
+ singleton a = [a]
diff --git a/hurl-xml/src/Network/MIME/XML/Table.hs b/hurl-xml/src/Network/MIME/XML/Table.hs
index b3be792..36af47d 100644
--- a/hurl-xml/src/Network/MIME/XML/Table.hs
+++ b/hurl-xml/src/Network/MIME/XML/Table.hs
@@ -1,19 +1,24 @@
-{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE OverloadedStrings, PatternSynonyms, ViewPatterns #-}
module Network.MIME.XML.Table(applySort, applySortDoc, splitTable) where
import Text.XML
-import Data.Text
+import Data.Text as Txt
import qualified Data.Map as M
import Data.Maybe
import qualified Data.List as L
import Text.Read (readMaybe)
+-- For smarter comparisons...
+import Data.Time.Format (parseTimeM, defaultTimeLocale)
+import Data.Time.Clock (UTCTime)
+import Data.Char (isDigit)
+
applySortDoc :: String -> Document -> Document
applySortDoc anchor doc@Document {documentRoot = el} = doc {documentRoot = applySort anchor el}
applySort :: String -> Element -> Element
-applySort ('#':'-':'r':'h':'a':'p':'s':'-':'%':anchor) el
+applySort ('#':'-':'a':'r':'g':'o':'-':'%':anchor) el
| (id', ord:col) <- L.break (`elem` ['<', '>']) anchor, Just col' <- readMaybe col =
applySort' id' (ord == '<') col' el
applySort _ el = el
@@ -45,9 +50,10 @@ applySort'' asc col el
| otherwise = el
where
compareRows (TableRow a _) (TableRow b _)
- | asc = (a !! col) `compare` (b !! col)
- | otherwise = (b !! col) `compare` (a !! col)
+ | asc = compareAs (a !! col) (b !! col) (comparators !! col)
+ | otherwise = compareAs (b !! col) (a !! col) (comparators !! col)
(header, _, footer) = splitTable $ elementNodes el
+ comparators = tableHeadComparators header
data TableRow = TableRow { keys :: [Text], markup :: [Element] }
@@ -55,7 +61,7 @@ table2sorttable Element {
elementName = Name "table" _ _,
elementAttributes = attrs,
elementNodes = childs
- } | "-rhaps-unsortable" `elem` attrs, (_, body, _) <- splitTable childs =
+ } | "-argo-unsortable" `notElem` attrs, (_, body, _) <- splitTable childs =
trs2sorttable body
table2sorttable _ = Nothing
@@ -83,6 +89,29 @@ splitTableBody els@(NodeElement _:_) = ([], els)
splitTableBody (_:els) = splitTableBody els
splitTableBody [] = ([], [])
+tableHeadComparators :: [Node] -> [Text]
+tableHeadComparators = Prelude.map (fromMaybe "alphanumeric") . tableHeadComparators'
+tableHeadComparators' :: [Node] -> [Maybe Text]
+tableHeadComparators' (NodeElement el@Element { elementName = Name name _ _, elementNodes = childs}:els)
+ | name == "thead" = tableHeadComparators' childs `mergeRight` tableHeadComparators' els
+ | name `elem` ["colgroup", "tr"] = tableRowComparators childs `mergeRight` tableHeadComparators' els
+ | otherwise = tableHeadComparators' els
+tableHeadComparators' [] = []
+tableRowComparators :: [Node] -> [Maybe Text]
+tableRowComparators (NodeElement el@(Element (Name "col" _ _) attrs _):els) =
+ let colspan = fromMaybe 1 (M.lookup "span" attrs >>= readMaybe . unpack)
+ in Prelude.replicate colspan (M.lookup "-argo-sortas" attrs) ++ tableRowComparators els
+tableRowComparators (NodeElement el@(Element (Name n _ _) attrs _):els) | n `elem` ["td", "th"] =
+ let colspan = fromMaybe 1 (M.lookup "colspan" attrs >>= readMaybe . unpack)
+ in Prelude.replicate colspan (M.lookup "-argo-sortas" attrs) ++ tableRowComparators els
+tableRowComparators (_:els) = tableRowComparators els
+tableRowComparators [] = []
+mergeRight :: [Maybe a] -> [Maybe a] -> [Maybe a]
+mergeRight (_:as) (Just b:bs) = Just b : mergeRight as bs
+mergeRight (a:as) (_:bs) = a : mergeRight as bs
+mergeRight [] bs = bs
+mergeRight as [] = as
+
annotateTHead (NodeElement el@Element { elementName = Name "thead" _ _, elementNodes = childs }:nodes) a c =
NodeElement el { elementNodes = annotateTHead childs a c } : nodes
annotateTHead (NodeElement el@Element { elementName = Name "tr" _ _, elementNodes = childs }:nodes) a c =
@@ -111,6 +140,8 @@ trs2sorttable _ = Nothing
tds2keys :: [Element] -> Maybe [Text]
tds2keys (el@Element {elementName = Name n _ _, elementAttributes = attrs, elementNodes = childs }:els)
+ | n `elem` ["td", "th"], Just key <- "-argo-sortkey" `M.lookup` attrs, Just rest <- tds2keys els =
+ Just (Prelude.replicate colspan key ++ rest)
| n `elem` ["td", "th"], Just rest <- tds2keys els =
Just (Prelude.replicate colspan (nodesText childs) ++ rest)
where
@@ -151,3 +182,30 @@ setAt i a ls
go 0 (x:xs) = a x : xs
go n (x:xs) = x : go (n-1) xs
go _ [] = []
+
+pattern (:.) :: Char -> Txt.Text -> Txt.Text
+pattern x :. xs <- (Txt.uncons -> Just (x, xs))
+
+infixr 5 :.
+
+compareAs :: Text -> Text -> Text -> Ordering
+--- Hueristic that readily handles both numbers & text
+compareAs (a:.as) (b:.bs) "alphanumeric"
+ | isDigit a && isDigit b =
+ let (a', as') = Txt.break (not . isDigit) as
+ (b', bs') = Txt.break (not . isDigit) bs
+ in if Txt.length a' == Txt.length b' && a == b
+ then compareAs as bs "alphanumeric"
+ else if Txt.length a' == Txt.length b' then a `compare` b
+ else Txt.length a' `compare` Txt.length b'
+ | a == b = compareAs as bs "alphanumeric"
+ | otherwise = a `compare` b
+compareAs as bs "text" = as `compare` bs
+compareAs as bs "number" = readInt as `compare` readInt bs
+ where
+ readInt :: Text -> Maybe Float
+ readInt = readMaybe . Prelude.filter (`elem` '-':'.':['0'..'9']) . unpack
+compareAs as bs fmt = readTime as `compare` readTime bs
+ where
+ readTime :: Text -> Maybe UTCTime
+ readTime = parseTimeM True defaultTimeLocale (unpack fmt) . unpack