mirror of
https://github.com/mudler/LocalAI.git
synced 2026-04-16 12:59:33 -04:00
chore(gallery-agent): extract readme
Signed-off-by: Ettore Di Giacinto <mudler@localai.io>
This commit is contained in:
59
.github/gallery-agent/helpers.go
vendored
59
.github/gallery-agent/helpers.go
vendored
@@ -141,6 +141,65 @@ func resolveReadme(client *hfapi.Client, modelID string, hfTags []string) (strin
|
||||
return cleanTextContent(content), nil
|
||||
}
|
||||
|
||||
// extractDescription turns a raw HuggingFace README into a concise plain-text
|
||||
// description suitable for embedding in gallery/index.yaml: strips YAML
|
||||
// frontmatter, HTML tags/comments, markdown images, link URLs (keeping the
|
||||
// link text), markdown tables, and then truncates at a paragraph boundary
|
||||
// around ~1200 characters. Raw README should still be used for icon
|
||||
// extraction — call this only for the `description:` field.
|
||||
func extractDescription(readme string) string {
|
||||
s := readme
|
||||
|
||||
// Strip leading YAML frontmatter: `---\n...\n---\n` at start of file.
|
||||
if strings.HasPrefix(strings.TrimLeft(s, " \t\n"), "---") {
|
||||
trimmed := strings.TrimLeft(s, " \t\n")
|
||||
rest := strings.TrimPrefix(trimmed, "---")
|
||||
if idx := strings.Index(rest, "\n---"); idx >= 0 {
|
||||
after := rest[idx+len("\n---"):]
|
||||
after = strings.TrimPrefix(after, "\n")
|
||||
s = after
|
||||
}
|
||||
}
|
||||
|
||||
// Strip HTML comments and tags.
|
||||
s = regexp.MustCompile(`(?s)<!--.*?-->`).ReplaceAllString(s, "")
|
||||
s = regexp.MustCompile(`(?is)<[^>]+>`).ReplaceAllString(s, "")
|
||||
|
||||
// Strip markdown images entirely.
|
||||
s = regexp.MustCompile(`!\[[^\]]*\]\([^)]*\)`).ReplaceAllString(s, "")
|
||||
// Replace markdown links `[text](url)` with just `text`.
|
||||
s = regexp.MustCompile(`\[([^\]]+)\]\([^)]+\)`).ReplaceAllString(s, "$1")
|
||||
|
||||
// Drop table lines and horizontal rules.
|
||||
var kept []string
|
||||
for _, line := range strings.Split(s, "\n") {
|
||||
t := strings.TrimSpace(line)
|
||||
if strings.HasPrefix(t, "|") {
|
||||
continue
|
||||
}
|
||||
if strings.HasPrefix(t, ":--") || strings.HasPrefix(t, "---") || strings.HasPrefix(t, "===") {
|
||||
continue
|
||||
}
|
||||
kept = append(kept, line)
|
||||
}
|
||||
s = strings.Join(kept, "\n")
|
||||
|
||||
// Normalise whitespace.
|
||||
s = cleanTextContent(s)
|
||||
|
||||
// Truncate at a paragraph boundary around maxLen chars.
|
||||
const maxLen = 1200
|
||||
if len(s) > maxLen {
|
||||
cut := strings.LastIndex(s[:maxLen], "\n\n")
|
||||
if cut < maxLen/3 {
|
||||
cut = maxLen
|
||||
}
|
||||
s = strings.TrimRight(s[:cut], " \t\n") + "\n\n..."
|
||||
}
|
||||
|
||||
return s
|
||||
}
|
||||
|
||||
// cleanTextContent removes trailing spaces/tabs and collapses multiple empty
|
||||
// lines so README content embeds cleanly into YAML without lint noise.
|
||||
func cleanTextContent(text string) string {
|
||||
|
||||
14
.github/gallery-agent/main.go
vendored
14
.github/gallery-agent/main.go
vendored
@@ -200,18 +200,24 @@ func main() {
|
||||
}
|
||||
|
||||
// Deterministic README resolution: follow base_model tag if set.
|
||||
// Keep the raw (HTML-bearing) README around while we extract the
|
||||
// icon, then strip it down to a plain-text description for the
|
||||
// `description:` YAML field.
|
||||
readme, err := resolveReadme(client, m.ModelID, m.Tags)
|
||||
if err == nil {
|
||||
pm.ReadmeContent = readme
|
||||
pm.ReadmeContentPreview = truncateString(readme, 200)
|
||||
} else {
|
||||
if err != nil {
|
||||
fmt.Printf(" Warning: failed to fetch README: %v\n", err)
|
||||
}
|
||||
pm.ReadmeContent = readme
|
||||
|
||||
pm.License = licenseFromTags(m.Tags)
|
||||
pm.Tags = curatedTags(m.Tags)
|
||||
pm.Icon = extractModelIcon(pm)
|
||||
|
||||
if pm.ReadmeContent != "" {
|
||||
pm.ReadmeContent = extractDescription(pm.ReadmeContent)
|
||||
pm.ReadmeContentPreview = truncateString(pm.ReadmeContent, 200)
|
||||
}
|
||||
|
||||
fmt.Printf(" License: %s, Tags: %v, Icon: %s\n", pm.License, pm.Tags, pm.Icon)
|
||||
processed = append(processed, pm)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user