githaven/build/generate-emoji.go
isla w f41ad344cb
Update Emoji dataset to Unicode 14 (#22342)
Gitea emoji dataset was out of date because it gets manually built and
hasn't been rebuilt since it was added. This means Gitea doesn't
recognize some newer emoji or changes to existing ones.

After changing the max unicode version to 14 I just ran: `go run
build/generate-emoji.go`

This should address the initial issue seen in #22153 where Gitea doesn't
recognize a standard alias used elsewhere when importing content.

14 is the latest supported version from the upstream source as 15 is not
widely supported (in their opinion) yet
2023-01-04 11:52:48 -06:00

220 lines
4.5 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// Copyright 2015 Kenneth Shaw
// SPDX-License-Identifier: MIT
//go:build ignore
package main
import (
"flag"
"fmt"
"go/format"
"io"
"log"
"net/http"
"os"
"regexp"
"sort"
"strconv"
"strings"
"unicode/utf8"
"code.gitea.io/gitea/modules/json"
)
const (
gemojiURL = "https://raw.githubusercontent.com/github/gemoji/master/db/emoji.json"
maxUnicodeVersion = 14
)
var flagOut = flag.String("o", "modules/emoji/emoji_data.go", "out")
// Gemoji is a set of emoji data.
type Gemoji []Emoji
// Emoji represents a single emoji and associated data.
type Emoji struct {
Emoji string `json:"emoji"`
Description string `json:"description,omitempty"`
Aliases []string `json:"aliases"`
UnicodeVersion string `json:"unicode_version,omitempty"`
SkinTones bool `json:"skin_tones,omitempty"`
}
// Don't include some fields in JSON
func (e Emoji) MarshalJSON() ([]byte, error) {
type emoji Emoji
x := emoji(e)
x.UnicodeVersion = ""
x.Description = ""
x.SkinTones = false
return json.Marshal(x)
}
func main() {
var err error
flag.Parse()
// generate data
buf, err := generate()
if err != nil {
log.Fatal(err)
}
// write
err = os.WriteFile(*flagOut, buf, 0o644)
if err != nil {
log.Fatal(err)
}
}
var replacer = strings.NewReplacer(
"main.Gemoji", "Gemoji",
"main.Emoji", "\n",
"}}", "},\n}",
", Description:", ", ",
", Aliases:", ", ",
", UnicodeVersion:", ", ",
", SkinTones:", ", ",
)
var emojiRE = regexp.MustCompile(`\{Emoji:"([^"]*)"`)
func generate() ([]byte, error) {
var err error
// load gemoji data
res, err := http.Get(gemojiURL)
if err != nil {
return nil, err
}
defer res.Body.Close()
// read all
body, err := io.ReadAll(res.Body)
if err != nil {
return nil, err
}
// unmarshal
var data Gemoji
err = json.Unmarshal(body, &data)
if err != nil {
return nil, err
}
skinTones := make(map[string]string)
skinTones["\U0001f3fb"] = "Light Skin Tone"
skinTones["\U0001f3fc"] = "Medium-Light Skin Tone"
skinTones["\U0001f3fd"] = "Medium Skin Tone"
skinTones["\U0001f3fe"] = "Medium-Dark Skin Tone"
skinTones["\U0001f3ff"] = "Dark Skin Tone"
var tmp Gemoji
// filter out emoji that require greater than max unicode version
for i := range data {
val, _ := strconv.ParseFloat(data[i].UnicodeVersion, 64)
if int(val) <= maxUnicodeVersion {
tmp = append(tmp, data[i])
}
}
data = tmp
sort.Slice(data, func(i, j int) bool {
return data[i].Aliases[0] < data[j].Aliases[0]
})
aliasMap := make(map[string]int, len(data))
for i, e := range data {
if e.Emoji == "" || len(e.Aliases) == 0 {
continue
}
for _, a := range e.Aliases {
if a == "" {
continue
}
aliasMap[a] = i
}
}
// gitea customizations
i, ok := aliasMap["tada"]
if ok {
data[i].Aliases = append(data[i].Aliases, "hooray")
}
i, ok = aliasMap["laughing"]
if ok {
data[i].Aliases = append(data[i].Aliases, "laugh")
}
// write a JSON file to use with tribute (write before adding skin tones since we can't support them there yet)
file, _ := json.Marshal(data)
_ = os.WriteFile("assets/emoji.json", file, 0o644)
// Add skin tones to emoji that support it
var (
s []string
newEmoji string
newDescription string
newData Emoji
)
for i := range data {
if data[i].SkinTones {
for k, v := range skinTones {
s = strings.Split(data[i].Emoji, "")
if utf8.RuneCountInString(data[i].Emoji) == 1 {
s = append(s, k)
} else {
// insert into slice after first element because all emoji that support skin tones
// have that modifier placed at this spot
s = append(s, "")
copy(s[2:], s[1:])
s[1] = k
}
newEmoji = strings.Join(s, "")
newDescription = data[i].Description + ": " + v
newAlias := data[i].Aliases[0] + "_" + strings.ReplaceAll(v, " ", "_")
newData = Emoji{newEmoji, newDescription, []string{newAlias}, "12.0", false}
data = append(data, newData)
}
}
}
// add header
str := replacer.Replace(fmt.Sprintf(hdr, gemojiURL, data))
// change the format of the unicode string
str = emojiRE.ReplaceAllStringFunc(str, func(s string) string {
var err error
s, err = strconv.Unquote(s[len("{Emoji:"):])
if err != nil {
panic(err)
}
return "{" + strconv.QuoteToASCII(s)
})
// format
return format.Source([]byte(str))
}
const hdr = `
// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package emoji
// Code generated by build/generate-emoji.go. DO NOT EDIT.
// Sourced from %s
var GemojiData = %#v
`