Улучшение парсинга описания/комментариев в формате JSON

This commit is contained in:
lost+skunk 2024-07-09 17:24:56 +03:00
parent 0f528ee9fa
commit 8391cc34a9
3 changed files with 68 additions and 40 deletions

View File

@ -113,6 +113,12 @@ func ParseDescription(dscr devianter.Text) string {
} }
return content return content
} }
DeleteSpywareFromUrl := func(url string) string {
if len(url) > 42 && url[:42] == "https://www.deviantart.com/users/outgoing?" {
url = url[42:]
}
return url
}
if description, dl := dscr.Html.Markup, len(dscr.Html.Markup); dl != 0 && if description, dl := dscr.Html.Markup, len(dscr.Html.Markup); dl != 0 &&
description[0] == '{' && description[0] == '{' &&
@ -135,6 +141,7 @@ func ParseDescription(dscr devianter.Text) string {
EntityMap map[string]struct { EntityMap map[string]struct {
Type string Type string
Data struct { Data struct {
Url string
Config struct { Config struct {
Aligment string Aligment string
Width int Width int
@ -147,31 +154,18 @@ func ParseDescription(dscr devianter.Text) string {
err(e) err(e)
entities := make(map[int]devianter.Deviation) entities := make(map[int]devianter.Deviation)
urls := make(map[int]string)
for n, x := range descr.EntityMap { for n, x := range descr.EntityMap {
num, _ := strconv.Atoi(n) num, _ := strconv.Atoi(n)
if x.Data.Url != "" {
urls[num] = DeleteSpywareFromUrl(x.Data.Url)
}
entities[num] = x.Data.Data entities[num] = x.Data.Data
} }
for _, x := range descr.Blocks { for _, x := range descr.Blocks {
ranges := make(map[int]text) ranges := make(map[int]text)
if len(x.InlineStyleRanges) == 0 {
switch x.Type {
case "atomic":
d := entities[x.EntityRanges[0].Key]
parseddescription.WriteString(`<img width="50%" src="`)
parseddescription.WriteString(ParseMedia(d.Media))
parseddescription.WriteString(`" title="`)
parseddescription.WriteString(d.Author.Username)
parseddescription.WriteString(" - ")
parseddescription.WriteString(d.Title)
parseddescription.WriteString(`">`)
case "unstyled":
parseddescription.WriteString(x.Text)
}
parseddescription.WriteString("<br>")
}
for i, rngs := range x.InlineStyleRanges { for i, rngs := range x.InlineStyleRanges {
var tag string var tag string
@ -192,22 +186,48 @@ func ParseDescription(dscr devianter.Text) string {
} }
} }
switch x.Type {
case "atomic":
d := entities[x.EntityRanges[0].Key]
parseddescription.WriteString(`<img width="50%" src="`)
parseddescription.WriteString(ParseMedia(d.Media))
parseddescription.WriteString(`" title="`)
parseddescription.WriteString(d.Author.Username)
parseddescription.WriteString(" - ")
parseddescription.WriteString(d.Title)
parseddescription.WriteString(`">`)
case "unstyled":
if len(ranges) != 0 {
for _, r := range ranges { for _, r := range ranges {
var tag string var tag string
switch x.Type { switch x.Type {
case "header-two": case "header-two":
tag = "h2" tag = "h2"
case "unstyled":
tag = "p"
} }
parseddescription.WriteString(x.Text[:r.from])
if len(urls) != 0 && len(x.EntityRanges) != 0 {
ra := &x.EntityRanges[0]
parseddescription.WriteString(`<a target="_blank" href="`)
parseddescription.WriteString(urls[ra.Key])
parseddescription.WriteString(`">`)
parseddescription.WriteString(r.TXT) parseddescription.WriteString(r.TXT)
parseddescription.WriteString(`</a>`)
} else {
parseddescription.WriteString(r.TXT)
}
parseddescription.WriteString(TagBuilder(tag, x.Text[r.to:])) parseddescription.WriteString(TagBuilder(tag, x.Text[r.to:]))
} }
} else {
parseddescription.WriteString(x.Text)
}
}
parseddescription.WriteString("<br>")
} }
} else if dl != 0 { } else if dl != 0 {
for tt := html.NewTokenizer(strings.NewReader(dscr.Html.Markup)); ; { for tt := html.NewTokenizer(strings.NewReader(dscr.Html.Markup)); ; {
t := tt.Next() switch tt.Next() {
switch t {
case html.ErrorToken: case html.ErrorToken:
return parseddescription.String() return parseddescription.String()
case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken: case html.StartTagToken, html.EndTagToken, html.SelfClosingTagToken:
@ -216,18 +236,16 @@ func ParseDescription(dscr devianter.Text) string {
case "a": case "a":
for _, a := range token.Attr { for _, a := range token.Attr {
if a.Key == "href" { if a.Key == "href" {
url := strings.ReplaceAll(a.Val, "https://www.deviantart.com/users/outgoing?", "") url := DeleteSpywareFromUrl(a.Val)
if strings.Contains(url, "deviantart") { parseddescription.WriteString(`<a target="_blank" href="`)
url = strings.ReplaceAll(url, "https://www.deviantart.com/", "") parseddescription.WriteString(url)
url = strings.ReplaceAll(url, url[0:strings.Index(url, "/")+1], "") parseddescription.WriteString(`">`)
} parseddescription.WriteString(tagval(tt))
parseddescription.WriteString("<a target=\"_blank\" href=\"" + url + "\">" + tagval(tt) + "</a> ") parseddescription.WriteString("</a> ")
} }
} }
case "img": case "img":
var ( var uri, title string
uri, title string
)
for b, a := range token.Attr { for b, a := range token.Attr {
switch a.Key { switch a.Key {
case "src": case "src":
@ -239,7 +257,11 @@ func ParseDescription(dscr devianter.Text) string {
} }
if title != "" { if title != "" {
for x := -1; x < b; x++ { for x := -1; x < b; x++ {
parseddescription.WriteString("<img src=\"" + uri + "\" title=\"" + title + "\">") parseddescription.WriteString(`<img src="`)
parseddescription.WriteString(uri)
parseddescription.WriteString(`" title="`)
parseddescription.WriteString(title)
parseddescription.WriteString(`">`)
} }
} }
} }

0
gowna Normal file
View File

View File

@ -3,6 +3,7 @@
<head> <head>
<title>SkunkyArt | {{.Templates.Deviation.Post.Deviation.Author.Username}} - {{.Templates.Deviation.Post.Deviation.Title}}</title> <title>SkunkyArt | {{.Templates.Deviation.Post.Deviation.Author.Username}} - {{.Templates.Deviation.Post.Deviation.Title}}</title>
<link rel="stylesheet" href="{{.BasePath}}gui/css/skunky.css"> <link rel="stylesheet" href="{{.BasePath}}gui/css/skunky.css">
<meta name="referrer" content="no-referrer" />
</head> </head>
<main> <main>
<header> <header>
@ -33,10 +34,15 @@
{{if (ne .Templates.Deviation.Tags "")}} {{if (ne .Templates.Deviation.Tags "")}}
{{.Templates.Deviation.Tags}}<br> {{.Templates.Deviation.Tags}}<br>
{{end}} {{end}}
<span>Published: <strong>{{.Templates.Deviation.StringTime}}</strong>; Views: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Views}}</strong>; Favourites: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Favourites}}</strong>; Downloads: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Downloads}}</strong></span> <span>Published: <strong>{{.Templates.Deviation.StringTime}}</strong>; Views: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Views}}</strong>; Favourites: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Favourites}}</strong>; Downloads: <strong>{{.Templates.Deviation.Post.Deviation.Stats.Downloads}}</strong><span>
<br><a target="_blank" href="https://www.deviantart.com/{{.Templates.Deviation.Post.Deviation.Author.Username}}/art/art-{{.Templates.Deviation.Post.Deviation.ID}}">Redirect to original</a>
</span>
{{if (ne .Templates.Deviation.Post.Description "")}} {{if (ne .Templates.Deviation.Post.Description "")}}
<figcaption> <figcaption>
<details>
<summary>Description</summary>
{{.Templates.Deviation.Post.Description}} {{.Templates.Deviation.Post.Description}}
</details>
</figcaption> </figcaption>
{{end}} {{end}}
{{if ne .Templates.Deviation.Related ""}} {{if ne .Templates.Deviation.Related ""}}