261 lines
7.7 KiB
Go
261 lines
7.7 KiB
Go
|
// Package org is an Org mode syntax processor.
|
||
|
//
|
||
|
// It parses plain text into an AST and can export it as HTML or pretty printed Org mode syntax.
|
||
|
// Further export formats can be defined using the Writer interface.
|
||
|
//
|
||
|
// You probably want to start with something like this:
|
||
|
// input := strings.NewReader("Your Org mode input")
|
||
|
// html, err := org.New().Parse(input, "./").Write(org.NewHTMLWriter())
|
||
|
// if err != nil {
|
||
|
// log.Fatalf("Something went wrong: %s", err)
|
||
|
// }
|
||
|
// log.Print(html)
|
||
|
package org
|
||
|
|
||
|
import (
|
||
|
"bufio"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"io/ioutil"
|
||
|
"log"
|
||
|
"os"
|
||
|
"strings"
|
||
|
)
|
||
|
|
||
|
type Configuration struct {
|
||
|
MaxEmphasisNewLines int // Maximum number of newlines inside an emphasis. See org-emphasis-regexp-components newline.
|
||
|
AutoLink bool // Try to convert text passages that look like hyperlinks into hyperlinks.
|
||
|
DefaultSettings map[string]string // Default values for settings that are overriden by setting the same key in BufferSettings.
|
||
|
Log *log.Logger // Log is used to print warnings during parsing.
|
||
|
ReadFile func(filename string) ([]byte, error) // ReadFile is used to read e.g. #+INCLUDE files.
|
||
|
}
|
||
|
|
||
|
// Document contains the parsing results and a pointer to the Configuration.
|
||
|
type Document struct {
|
||
|
*Configuration
|
||
|
Path string // Path of the file containing the parse input - used to resolve relative paths during parsing (e.g. INCLUDE).
|
||
|
tokens []token
|
||
|
Nodes []Node
|
||
|
NamedNodes map[string]Node
|
||
|
Outline Outline // Outline is a Table Of Contents for the document and contains all sections (headline + content).
|
||
|
BufferSettings map[string]string // Settings contains all settings that were parsed from keywords.
|
||
|
Error error
|
||
|
}
|
||
|
|
||
|
// Node represents a parsed node of the document.
|
||
|
type Node interface {
|
||
|
String() string // String returns the pretty printed Org mode string for the node (see OrgWriter).
|
||
|
}
|
||
|
|
||
|
type lexFn = func(line string) (t token, ok bool)
|
||
|
type parseFn = func(*Document, int, stopFn) (int, Node)
|
||
|
type stopFn = func(*Document, int) bool
|
||
|
|
||
|
type token struct {
|
||
|
kind string
|
||
|
lvl int
|
||
|
content string
|
||
|
matches []string
|
||
|
}
|
||
|
|
||
|
var lexFns = []lexFn{
|
||
|
lexHeadline,
|
||
|
lexDrawer,
|
||
|
lexBlock,
|
||
|
lexList,
|
||
|
lexTable,
|
||
|
lexHorizontalRule,
|
||
|
lexKeywordOrComment,
|
||
|
lexFootnoteDefinition,
|
||
|
lexExample,
|
||
|
lexText,
|
||
|
}
|
||
|
|
||
|
var nilToken = token{"nil", -1, "", nil}
|
||
|
var orgWriter = NewOrgWriter()
|
||
|
|
||
|
// New returns a new Configuration with (hopefully) sane defaults.
|
||
|
func New() *Configuration {
|
||
|
return &Configuration{
|
||
|
AutoLink: true,
|
||
|
MaxEmphasisNewLines: 1,
|
||
|
DefaultSettings: map[string]string{
|
||
|
"TODO": "TODO | DONE",
|
||
|
"EXCLUDE_TAGS": "noexport",
|
||
|
"OPTIONS": "toc:t <:t e:t f:t pri:t todo:t tags:t",
|
||
|
},
|
||
|
Log: log.New(os.Stderr, "go-org: ", 0),
|
||
|
ReadFile: ioutil.ReadFile,
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// String returns the pretty printed Org mode string for the given nodes (see OrgWriter).
|
||
|
func String(nodes []Node) string { return orgWriter.nodesAsString(nodes...) }
|
||
|
|
||
|
// Write is called after with an instance of the Writer interface to export a parsed Document into another format.
|
||
|
func (d *Document) Write(w Writer) (out string, err error) {
|
||
|
defer func() {
|
||
|
if recovered := recover(); recovered != nil {
|
||
|
err = fmt.Errorf("could not write output: %s", recovered)
|
||
|
}
|
||
|
}()
|
||
|
if d.Error != nil {
|
||
|
return "", d.Error
|
||
|
} else if d.Nodes == nil {
|
||
|
return "", fmt.Errorf("could not write output: parse was not called")
|
||
|
}
|
||
|
w.Before(d)
|
||
|
WriteNodes(w, d.Nodes...)
|
||
|
w.After(d)
|
||
|
return w.String(), err
|
||
|
}
|
||
|
|
||
|
// Parse parses the input into an AST (and some other helpful fields like Outline).
|
||
|
// To allow method chaining, errors are stored in document.Error rather than being returned.
|
||
|
func (c *Configuration) Parse(input io.Reader, path string) (d *Document) {
|
||
|
outlineSection := &Section{}
|
||
|
d = &Document{
|
||
|
Configuration: c,
|
||
|
Outline: Outline{outlineSection, outlineSection, 0},
|
||
|
BufferSettings: map[string]string{},
|
||
|
NamedNodes: map[string]Node{},
|
||
|
Path: path,
|
||
|
}
|
||
|
defer func() {
|
||
|
if recovered := recover(); recovered != nil {
|
||
|
d.Error = fmt.Errorf("could not parse input: %v", recovered)
|
||
|
}
|
||
|
}()
|
||
|
if d.tokens != nil {
|
||
|
d.Error = fmt.Errorf("parse was called multiple times")
|
||
|
}
|
||
|
d.tokenize(input)
|
||
|
_, nodes := d.parseMany(0, func(d *Document, i int) bool { return i >= len(d.tokens) })
|
||
|
d.Nodes = nodes
|
||
|
return d
|
||
|
}
|
||
|
|
||
|
// Silent disables all logging of warnings during parsing.
|
||
|
func (c *Configuration) Silent() *Configuration {
|
||
|
c.Log = log.New(ioutil.Discard, "", 0)
|
||
|
return c
|
||
|
}
|
||
|
|
||
|
func (d *Document) tokenize(input io.Reader) {
|
||
|
d.tokens = []token{}
|
||
|
scanner := bufio.NewScanner(input)
|
||
|
for scanner.Scan() {
|
||
|
d.tokens = append(d.tokens, tokenize(scanner.Text()))
|
||
|
}
|
||
|
if err := scanner.Err(); err != nil {
|
||
|
d.Error = fmt.Errorf("could not tokenize input: %s", err)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Get returns the value for key in BufferSettings or DefaultSettings if key does not exist in the former
|
||
|
func (d *Document) Get(key string) string {
|
||
|
if v, ok := d.BufferSettings[key]; ok {
|
||
|
return v
|
||
|
}
|
||
|
if v, ok := d.DefaultSettings[key]; ok {
|
||
|
return v
|
||
|
}
|
||
|
return ""
|
||
|
}
|
||
|
|
||
|
// GetOption returns the value associated to the export option key
|
||
|
// Currently supported options:
|
||
|
// - < (export timestamps)
|
||
|
// - e (export org entities)
|
||
|
// - f (export footnotes)
|
||
|
// - toc (export table of content)
|
||
|
// - todo (export headline todo status)
|
||
|
// - pri (export headline priority)
|
||
|
// - tags (export headline tags)
|
||
|
// see https://orgmode.org/manual/Export-settings.html for more information
|
||
|
func (d *Document) GetOption(key string) bool {
|
||
|
get := func(settings map[string]string) string {
|
||
|
for _, field := range strings.Fields(settings["OPTIONS"]) {
|
||
|
if strings.HasPrefix(field, key+":") {
|
||
|
return field[len(key)+1:]
|
||
|
}
|
||
|
}
|
||
|
return ""
|
||
|
}
|
||
|
value := get(d.BufferSettings)
|
||
|
if value == "" {
|
||
|
value = get(d.DefaultSettings)
|
||
|
}
|
||
|
switch value {
|
||
|
case "t":
|
||
|
return true
|
||
|
case "nil":
|
||
|
return false
|
||
|
default:
|
||
|
d.Log.Printf("Bad value for export option %s (%s)", key, value)
|
||
|
return false
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (d *Document) parseOne(i int, stop stopFn) (consumed int, node Node) {
|
||
|
switch d.tokens[i].kind {
|
||
|
case "unorderedList", "orderedList":
|
||
|
consumed, node = d.parseList(i, stop)
|
||
|
case "tableRow", "tableSeparator":
|
||
|
consumed, node = d.parseTable(i, stop)
|
||
|
case "beginBlock":
|
||
|
consumed, node = d.parseBlock(i, stop)
|
||
|
case "beginDrawer":
|
||
|
consumed, node = d.parseDrawer(i, stop)
|
||
|
case "text":
|
||
|
consumed, node = d.parseParagraph(i, stop)
|
||
|
case "example":
|
||
|
consumed, node = d.parseExample(i, stop)
|
||
|
case "horizontalRule":
|
||
|
consumed, node = d.parseHorizontalRule(i, stop)
|
||
|
case "comment":
|
||
|
consumed, node = d.parseComment(i, stop)
|
||
|
case "keyword":
|
||
|
consumed, node = d.parseKeyword(i, stop)
|
||
|
case "headline":
|
||
|
consumed, node = d.parseHeadline(i, stop)
|
||
|
case "footnoteDefinition":
|
||
|
consumed, node = d.parseFootnoteDefinition(i, stop)
|
||
|
}
|
||
|
|
||
|
if consumed != 0 {
|
||
|
return consumed, node
|
||
|
}
|
||
|
d.Log.Printf("Could not parse token %#v: Falling back to treating it as plain text.", d.tokens[i])
|
||
|
m := plainTextRegexp.FindStringSubmatch(d.tokens[i].matches[0])
|
||
|
d.tokens[i] = token{"text", len(m[1]), m[2], m}
|
||
|
return d.parseOne(i, stop)
|
||
|
}
|
||
|
|
||
|
func (d *Document) parseMany(i int, stop stopFn) (int, []Node) {
|
||
|
start, nodes := i, []Node{}
|
||
|
for i < len(d.tokens) && !stop(d, i) {
|
||
|
consumed, node := d.parseOne(i, stop)
|
||
|
i += consumed
|
||
|
nodes = append(nodes, node)
|
||
|
}
|
||
|
return i - start, nodes
|
||
|
}
|
||
|
|
||
|
func (d *Document) addHeadline(headline *Headline) int {
|
||
|
current := &Section{Headline: headline}
|
||
|
d.Outline.last.add(current)
|
||
|
d.Outline.count++
|
||
|
d.Outline.last = current
|
||
|
return d.Outline.count
|
||
|
}
|
||
|
|
||
|
func tokenize(line string) token {
|
||
|
for _, lexFn := range lexFns {
|
||
|
if token, ok := lexFn(line); ok {
|
||
|
return token
|
||
|
}
|
||
|
}
|
||
|
panic(fmt.Sprintf("could not lex line: %s", line))
|
||
|
}
|