mark2web/vendor/github.com/extemporalgenome/slug/slug.go
Sebastian Frank a781485c0a
Some checks failed
continuous-integration/drone/push Build is failing
no git submodules
2022-02-28 10:28:34 +01:00

111 lines
2.7 KiB
Go

// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package slug transforms strings into a normalized form well suited for use in URLs.
package slug
import (
"golang.org/x/text/unicode/norm"
"encoding/hex"
"unicode"
"unicode/utf8"
)
var lat = []*unicode.RangeTable{unicode.Letter, unicode.Number}
var nop = []*unicode.RangeTable{unicode.Mark, unicode.Sk, unicode.Lm}
// Slug replaces each run of characters which are not unicode letters or
// numbers with a single hyphen, except for leading or trailing runs. Letters
// will be stripped of diacritical marks and lowercased. Letter or number
// codepoints that do not have combining marks or a lower-cased variant will
// be passed through unaltered.
func Slug(s string) string {
buf := make([]rune, 0, len(s))
dash := false
for _, r := range norm.NFKD.String(s) {
switch {
// unicode 'letters' like mandarin characters pass through
case unicode.IsOneOf(lat, r):
buf = append(buf, unicode.ToLower(r))
dash = true
case unicode.IsOneOf(nop, r):
// skip
case dash:
buf = append(buf, '-')
dash = false
}
}
if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
buf = buf[:i]
}
return string(buf)
}
// SlugAscii is identical to Slug, except that runs of one or more unicode
// letters or numbers that still fall outside the ASCII range will have their
// UTF-8 representation hex encoded and delimited by hyphens. As with Slug, in
// no case will hyphens appear at either end of the returned string.
func SlugAscii(s string) string {
const m = utf8.UTFMax
var (
ib [m * 3]byte
ob []byte
buf = make([]byte, 0, len(s))
dash = false
latin = true
)
for _, r := range norm.NFKD.String(s) {
switch {
case unicode.IsOneOf(lat, r):
r = unicode.ToLower(r)
n := utf8.EncodeRune(ib[:m], r)
if r >= 128 {
if latin && dash {
buf = append(buf, '-')
}
n = hex.Encode(ib[m:], ib[:n])
ob = ib[m : m+n]
latin = false
} else {
if !latin {
buf = append(buf, '-')
}
ob = ib[:n]
latin = true
}
dash = true
buf = append(buf, ob...)
case unicode.IsOneOf(nop, r):
// skip
case dash:
buf = append(buf, '-')
dash = false
latin = true
}
}
if i := len(buf) - 1; i >= 0 && buf[i] == '-' {
buf = buf[:i]
}
return string(buf)
}
// IsSlugAscii returns true only if SlugAscii(s) == s.
func IsSlugAscii(s string) bool {
dash := true
for _, r := range s {
switch {
case r == '-':
if dash {
return false
}
dash = true
case 'a' <= r && r <= 'z', '0' <= r && r <= '9':
dash = false
default:
return false
}
}
return !dash
}