840 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			840 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
| package porterstemmer
 | |
| 
 | |
| import (
 | |
| 	//	"log"
 | |
| 	"unicode"
 | |
| )
 | |
| 
 | |
| func isConsonant(s []rune, i int) bool {
 | |
| 
 | |
| 	//DEBUG
 | |
| 	//log.Printf("isConsonant: [%+v]", string(s[i]))
 | |
| 
 | |
| 	result := true
 | |
| 
 | |
| 	switch s[i] {
 | |
| 	case 'a', 'e', 'i', 'o', 'u':
 | |
| 		result = false
 | |
| 	case 'y':
 | |
| 		if 0 == i {
 | |
| 			result = true
 | |
| 		} else {
 | |
| 			result = !isConsonant(s, i-1)
 | |
| 		}
 | |
| 	default:
 | |
| 		result = true
 | |
| 	}
 | |
| 
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func measure(s []rune) uint {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 	result := uint(0)
 | |
| 	i := 0
 | |
| 
 | |
| 	// Short Circuit.
 | |
| 	if 0 == lenS {
 | |
| 		/////////// RETURN
 | |
| 		return result
 | |
| 	}
 | |
| 
 | |
| 	// Ignore (potential) consonant sequence at the beginning of word.
 | |
| 	for isConsonant(s, i) {
 | |
| 
 | |
| 		//DEBUG
 | |
| 		//log.Printf("[measure([%s])] Eat Consonant [%d] -> [%s]", string(s), i, string(s[i]))
 | |
| 
 | |
| 		i++
 | |
| 		if i >= lenS {
 | |
| 			/////////////// RETURN
 | |
| 			return result
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// For each pair of a vowel sequence followed by a consonant sequence, increment result.
 | |
| Outer:
 | |
| 	for i < lenS {
 | |
| 
 | |
| 		for !isConsonant(s, i) {
 | |
| 
 | |
| 			//DEBUG
 | |
| 			//log.Printf("[measure([%s])] VOWEL [%d] -> [%s]", string(s), i, string(s[i]))
 | |
| 
 | |
| 			i++
 | |
| 			if i >= lenS {
 | |
| 				/////////// BREAK
 | |
| 				break Outer
 | |
| 			}
 | |
| 		}
 | |
| 		for isConsonant(s, i) {
 | |
| 
 | |
| 			//DEBUG
 | |
| 			//log.Printf("[measure([%s])] CONSONANT [%d] -> [%s]", string(s), i, string(s[i]))
 | |
| 
 | |
| 			i++
 | |
| 			if i >= lenS {
 | |
| 				result++
 | |
| 				/////////// BREAK
 | |
| 				break Outer
 | |
| 			}
 | |
| 		}
 | |
| 		result++
 | |
| 	}
 | |
| 
 | |
| 	// Return
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func hasSuffix(s, suffix []rune) bool {
 | |
| 
 | |
| 	lenSMinusOne := len(s) - 1
 | |
| 	lenSuffixMinusOne := len(suffix) - 1
 | |
| 
 | |
| 	if lenSMinusOne <= lenSuffixMinusOne {
 | |
| 		return false
 | |
| 	} else if s[lenSMinusOne] != suffix[lenSuffixMinusOne] { // I suspect checking this first should speed this function up in practice.
 | |
| 		/////// RETURN
 | |
| 		return false
 | |
| 	} else {
 | |
| 
 | |
| 		for i := 0; i < lenSuffixMinusOne; i++ {
 | |
| 
 | |
| 			if suffix[i] != s[lenSMinusOne-lenSuffixMinusOne+i] {
 | |
| 				/////////////// RETURN
 | |
| 				return false
 | |
| 			}
 | |
| 
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	return true
 | |
| }
 | |
| 
 | |
| func containsVowel(s []rune) bool {
 | |
| 
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	for i := 0; i < lenS; i++ {
 | |
| 
 | |
| 		if !isConsonant(s, i) {
 | |
| 			/////////// RETURN
 | |
| 			return true
 | |
| 		}
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	return false
 | |
| }
 | |
| 
 | |
| func hasRepeatDoubleConsonantSuffix(s []rune) bool {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	result := false
 | |
| 
 | |
| 	// Do it!
 | |
| 	if 2 > lenS {
 | |
| 		result = false
 | |
| 	} else if s[lenS-1] == s[lenS-2] && isConsonant(s, lenS-1) { // Will using isConsonant() cause a problem with "YY"?
 | |
| 		result = true
 | |
| 	} else {
 | |
| 		result = false
 | |
| 	}
 | |
| 
 | |
| 	// Return,
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func hasConsonantVowelConsonantSuffix(s []rune) bool {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	result := false
 | |
| 
 | |
| 	// Do it!
 | |
| 	if 3 > lenS {
 | |
| 		result = false
 | |
| 	} else if isConsonant(s, lenS-3) && !isConsonant(s, lenS-2) && isConsonant(s, lenS-1) {
 | |
| 		result = true
 | |
| 	} else {
 | |
| 		result = false
 | |
| 	}
 | |
| 
 | |
| 	// Return
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step1a(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	var result []rune = s
 | |
| 
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	// Do it!
 | |
| 	if suffix := []rune("sses"); hasSuffix(s, suffix) {
 | |
| 
 | |
| 		lenTrim := 2
 | |
| 
 | |
| 		subSlice := s[:lenS-lenTrim]
 | |
| 
 | |
| 		result = subSlice
 | |
| 	} else if suffix := []rune("ies"); hasSuffix(s, suffix) {
 | |
| 		lenTrim := 2
 | |
| 
 | |
| 		subSlice := s[:lenS-lenTrim]
 | |
| 
 | |
| 		result = subSlice
 | |
| 	} else if suffix := []rune("ss"); hasSuffix(s, suffix) {
 | |
| 
 | |
| 		result = s
 | |
| 	} else if suffix := []rune("s"); hasSuffix(s, suffix) {
 | |
| 
 | |
| 		lenSuffix := 1
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		result = subSlice
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step1b(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	var result []rune = s
 | |
| 
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	// Do it!
 | |
| 	if suffix := []rune("eed"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 0 < m {
 | |
| 			lenTrim := 1
 | |
| 
 | |
| 			result = s[:lenS-lenTrim]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ed"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		if containsVowel(subSlice) {
 | |
| 
 | |
| 			if suffix2 := []rune("at"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 			} else if suffix2 := []rune("bl"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 			} else if suffix2 := []rune("iz"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 			} else if c := subSlice[len(subSlice)-1]; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) {
 | |
| 				lenTrim := 1
 | |
| 
 | |
| 				lenSubSlice := len(subSlice)
 | |
| 
 | |
| 				result = subSlice[:lenSubSlice-lenTrim]
 | |
| 			} else if c := subSlice[len(subSlice)-1]; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 
 | |
| 				result[len(result)-1] = 'e'
 | |
| 			} else {
 | |
| 				result = subSlice
 | |
| 			}
 | |
| 
 | |
| 		}
 | |
| 	} else if suffix := []rune("ing"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		if containsVowel(subSlice) {
 | |
| 
 | |
| 			if suffix2 := []rune("at"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 
 | |
| 				result[len(result)-1] = 'e'
 | |
| 			} else if suffix2 := []rune("bl"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 
 | |
| 				result[len(result)-1] = 'e'
 | |
| 			} else if suffix2 := []rune("iz"); hasSuffix(subSlice, suffix2) {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 
 | |
| 				result[len(result)-1] = 'e'
 | |
| 			} else if c := subSlice[len(subSlice)-1]; 'l' != c && 's' != c && 'z' != c && hasRepeatDoubleConsonantSuffix(subSlice) {
 | |
| 				lenTrim := 1
 | |
| 
 | |
| 				lenSubSlice := len(subSlice)
 | |
| 
 | |
| 				result = subSlice[:lenSubSlice-lenTrim]
 | |
| 			} else if c := subSlice[len(subSlice)-1]; 1 == measure(subSlice) && hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c {
 | |
| 				lenTrim := -1
 | |
| 
 | |
| 				result = s[:lenS-lenSuffix-lenTrim]
 | |
| 
 | |
| 				result[len(result)-1] = 'e'
 | |
| 			} else {
 | |
| 				result = subSlice
 | |
| 			}
 | |
| 
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step1c(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if 2 > lenS {
 | |
| 		/////////// RETURN
 | |
| 		return result
 | |
| 	}
 | |
| 
 | |
| 	if 'y' == s[lenS-1] && containsVowel(s[:lenS-1]) {
 | |
| 
 | |
| 		result[lenS-1] = 'i'
 | |
| 
 | |
| 	} else if 'Y' == s[lenS-1] && containsVowel(s[:lenS-1]) {
 | |
| 
 | |
| 		result[lenS-1] = 'I'
 | |
| 
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step2(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if suffix := []rune("ational"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-5] = 'e'
 | |
| 			result = result[:lenS-4]
 | |
| 		}
 | |
| 	} else if suffix := []rune("tional"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = result[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("enci"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-1] = 'e'
 | |
| 		}
 | |
| 	} else if suffix := []rune("anci"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-1] = 'e'
 | |
| 		}
 | |
| 	} else if suffix := []rune("izer"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-1]
 | |
| 		}
 | |
| 	} else if suffix := []rune("bli"); hasSuffix(s, suffix) { // --DEPARTURE--
 | |
| 		//		} else if suffix := []rune("abli") ; hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-1] = 'e'
 | |
| 		}
 | |
| 	} else if suffix := []rune("alli"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("entli"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("eli"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ousli"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ization"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-5] = 'e'
 | |
| 
 | |
| 			result = s[:lenS-4]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ation"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-3] = 'e'
 | |
| 
 | |
| 			result = s[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ator"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-2] = 'e'
 | |
| 
 | |
| 			result = s[:lenS-1]
 | |
| 		}
 | |
| 	} else if suffix := []rune("alism"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("iveness"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-4]
 | |
| 		}
 | |
| 	} else if suffix := []rune("fulness"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-4]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ousness"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-4]
 | |
| 		}
 | |
| 	} else if suffix := []rune("aliti"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result = s[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("iviti"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-3] = 'e'
 | |
| 
 | |
| 			result = result[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("biliti"); hasSuffix(s, suffix) {
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			result[lenS-5] = 'l'
 | |
| 			result[lenS-4] = 'e'
 | |
| 
 | |
| 			result = result[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("logi"); hasSuffix(s, suffix) { // --DEPARTURE--
 | |
| 		if 0 < measure(s[:lenS-len(suffix)]) {
 | |
| 			lenTrim := 1
 | |
| 
 | |
| 			result = s[:lenS-lenTrim]
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step3(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if suffix := []rune("icate"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		if 0 < measure(s[:lenS-lenSuffix]) {
 | |
| 			result = result[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ative"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 0 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("alize"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		if 0 < measure(s[:lenS-lenSuffix]) {
 | |
| 			result = result[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("iciti"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		if 0 < measure(s[:lenS-lenSuffix]) {
 | |
| 			result = result[:lenS-3]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ical"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		if 0 < measure(s[:lenS-lenSuffix]) {
 | |
| 			result = result[:lenS-2]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ful"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 0 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ness"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 0 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step4(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if suffix := []rune("al"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = result[:lenS-lenSuffix]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ance"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = result[:lenS-lenSuffix]
 | |
| 		}
 | |
| 	} else if suffix := []rune("ence"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = result[:lenS-lenSuffix]
 | |
| 		}
 | |
| 	} else if suffix := []rune("er"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ic"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("able"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ible"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ant"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ement"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ment"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ent"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ion"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		c := subSlice[len(subSlice)-1]
 | |
| 
 | |
| 		if 1 < m && ('s' == c || 't' == c) {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ou"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ism"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ate"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("iti"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ous"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ive"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	} else if suffix := []rune("ize"); hasSuffix(s, suffix) {
 | |
| 		lenSuffix := len(suffix)
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step5a(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if 'e' == s[lenS-1] {
 | |
| 		lenSuffix := 1
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		} else if 1 == m {
 | |
| 			if c := subSlice[len(subSlice)-1]; !(hasConsonantVowelConsonantSuffix(subSlice) && 'w' != c && 'x' != c && 'y' != c) {
 | |
| 				result = subSlice
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func step5b(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 	result := s
 | |
| 
 | |
| 	// Do it!
 | |
| 	if 2 < lenS && 'l' == s[lenS-2] && 'l' == s[lenS-1] {
 | |
| 
 | |
| 		lenSuffix := 1
 | |
| 
 | |
| 		subSlice := s[:lenS-lenSuffix]
 | |
| 
 | |
| 		m := measure(subSlice)
 | |
| 
 | |
| 		if 1 < m {
 | |
| 			result = subSlice
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func StemString(s string) string {
 | |
| 
 | |
| 	// Convert string to []rune
 | |
| 	runeArr := []rune(s)
 | |
| 
 | |
| 	// Stem.
 | |
| 	runeArr = Stem(runeArr)
 | |
| 
 | |
| 	// Convert []rune to string
 | |
| 	str := string(runeArr)
 | |
| 
 | |
| 	// Return.
 | |
| 	return str
 | |
| }
 | |
| 
 | |
| func Stem(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	// Short circuit.
 | |
| 	if 0 == lenS {
 | |
| 		/////////// RETURN
 | |
| 		return s
 | |
| 	}
 | |
| 
 | |
| 	// Make all runes lowercase.
 | |
| 	for i := 0; i < lenS; i++ {
 | |
| 		s[i] = unicode.ToLower(s[i])
 | |
| 	}
 | |
| 
 | |
| 	// Stem
 | |
| 	result := StemWithoutLowerCasing(s)
 | |
| 
 | |
| 	// Return.
 | |
| 	return result
 | |
| }
 | |
| 
 | |
| func StemWithoutLowerCasing(s []rune) []rune {
 | |
| 
 | |
| 	// Initialize.
 | |
| 	lenS := len(s)
 | |
| 
 | |
| 	// Words that are of length 2 or less is already stemmed.
 | |
| 	// Don't do anything.
 | |
| 	if 2 >= lenS {
 | |
| 		/////////// RETURN
 | |
| 		return s
 | |
| 	}
 | |
| 
 | |
| 	// Stem
 | |
| 	s = step1a(s)
 | |
| 	s = step1b(s)
 | |
| 	s = step1c(s)
 | |
| 	s = step2(s)
 | |
| 	s = step3(s)
 | |
| 	s = step4(s)
 | |
| 	s = step5a(s)
 | |
| 	s = step5b(s)
 | |
| 
 | |
| 	// Return.
 | |
| 	return s
 | |
| }
 |