package utils import ( "math" "strings" "unicode" ) // CalculateStringSimilarity calculates similarity between two strings using Levenshtein distance func CalculateStringSimilarity(s1, s2 string) float64 { // Normalize strings s1 = normalizeString(s1) s2 = normalizeString(s2) if s1 == s2 { return 1.0 } if len(s1) == 0 || len(s2) == 0 { return 0.0 } // Calculate Levenshtein distance distance := levenshteinDistance(s1, s2) maxLen := math.Max(float64(len(s1)), float64(len(s2))) similarity := 1.0 - (float64(distance) / maxLen) return math.Max(0, similarity) } // levenshteinDistance calculates the Levenshtein distance between two strings func levenshteinDistance(s1, s2 string) int { len1 := len(s1) len2 := len(s2) // Create a 2D slice for dynamic programming dp := make([][]int, len1+1) for i := range dp { dp[i] = make([]int, len2+1) } // Initialize first row and column for i := 0; i <= len1; i++ { dp[i][0] = i } for j := 0; j <= len2; j++ { dp[0][j] = j } // Fill the dp table for i := 1; i <= len1; i++ { for j := 1; j <= len2; j++ { cost := 0 if s1[i-1] != s2[j-1] { cost = 1 } dp[i][j] = min3( dp[i-1][j]+1, // deletion dp[i][j-1]+1, // insertion dp[i-1][j-1]+cost, // substitution ) } } return dp[len1][len2] } // ExtractKeywords extracts keywords from a string func ExtractKeywords(text string) []string { // Normalize and split text text = normalizeString(text) words := strings.Fields(text) // Filter stopwords and short words var keywords []string stopwords := getStopwords() for _, word := range words { if len(word) > 2 && !contains(stopwords, word) { keywords = append(keywords, word) } } return keywords } // FindMatchedKeywords finds common keywords between two lists func FindMatchedKeywords(keywords1, keywords2 []string) []string { var matched []string for _, k1 := range keywords1 { for _, k2 := range keywords2 { if strings.EqualFold(k1, k2) || CalculateStringSimilarity(k1, k2) > 0.8 { if !contains(matched, k1) { matched = append(matched, k1) } break } } } return matched } // normalizeString normalizes a string (lowercase, remove extra spaces) func normalizeString(s string) string { // Convert to lowercase s = strings.ToLower(s) // Remove punctuation and extra spaces var result strings.Builder for _, r := range s { if unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsSpace(r) { result.WriteRune(r) } else { result.WriteRune(' ') } } // Remove multiple spaces s = strings.Join(strings.Fields(result.String()), " ") return strings.TrimSpace(s) } // getStopwords returns common Indonesian stopwords func getStopwords() []string { return []string{ "dan", "atau", "dengan", "untuk", "dari", "ke", "di", "yang", "ini", "itu", "ada", "adalah", "akan", "telah", "sudah", "pada", "oleh", "sebagai", "dalam", "juga", "saya", "kamu", "dia", "kita", "mereka", "kami", "the", "a", "an", "of", "to", "in", "for", "on", "at", "by", "with", "from", } } // contains checks if a slice contains a string func contains(slice []string, str string) bool { for _, s := range slice { if strings.EqualFold(s, str) { return true } } return false } // min3 returns the minimum of three integers func min3(a, b, c int) int { if a < b { if a < c { return a } return c } if b < c { return b } return c }