package atom
import (
"fmt"
"html"
"mime"
"regexp"
"strings"
"github.com/google/uuid"
"golang.org/x/text/language"
)
// isValidIRI checks whether an IRI is valid or not. It returns a bool.
// https://www.w3.org/2011/04/XMLSchema/TypeLibrary-IRI-RFC3987.xsd
func isValidIRI(iri string) bool {
pattern := `((([A-Za-z])[A-Za-z0-9+\-\.]*):((//(((([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------!$&'()*+,;=:]|(%[0-9A-Fa-f][0-9A-Fa-f]))*@))?((\[((((([0-9A-Fa-f]{0,4}:)){6}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(::(([0-9A-Fa-f]{0,4}:)){5}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(([0-9A-Fa-f]{0,4})?::(([0-9A-Fa-f]{0,4}:)){4}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:))?[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){3}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,2}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){2}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,3}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4}:(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,4}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,5}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4})|((((([0-9A-Fa-f]{0,4}:)){0,6}[0-9A-Fa-f]{0,4}))?::))|(v[0-9A-Fa-f]+\.[A-Za-z0-9\-\._~!$&'()*+,;=:]+))\])|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))|(([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=]))*)((:[0-9]*))?)((/(([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|(/(((([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*))?)|((([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|)((\?(([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|[---/?])*))?((#((([A-Za-z0-9\-\._~ -豈-﷏ﷰ-𐀀-𠀀-𰀀------------]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|/|\?))*))?)`
return regexp.MustCompile(pattern).MatchString(iri)
}
// isCorrectlyEscaped checks whether a string is correctly escaped as per
// RFC4287. It returns a bool.
func isCorrectlyEscaped(text string) bool {
relevantEntities := []string{"&", "<", ">", """, "'"}
for _, entity := range relevantEntities {
if strings.Contains(text, entity) {
return false
}
}
return true
}
// isCompositeMediaType checks whether a string is a composite media type. It
// returns a bool.
func isCompositeMediaType(m string) bool {
mediaType, _, err := mime.ParseMediaType(m)
if err != nil {
return false
}
return strings.HasPrefix(mediaType, "multipart/") || strings.HasPrefix(mediaType, "message/")
}
// isXMLMediaType checks whether a string is an xml media type. It returns a
// bool.
func isXMLMediaType(m string) bool {
mediaType, _, err := mime.ParseMediaType(m)
if err != nil {
return false
}
return strings.HasSuffix(mediaType, "/xml") || strings.HasSuffix(mediaType, "+xml")
}
// isValidMediaType checks whether a string is a valid media type. It returns a
// bool.
func isValidMediaType(m string) bool {
mediaType, _, err := mime.ParseMediaType(m)
if err != nil {
return false
}
typeParts := strings.Split(mediaType, "/")
if len(typeParts) != 2 || typeParts[0] == "" || typeParts[1] == "" {
return false
}
return true
}
// isValidLanguageTag checks whether a LanguageTag is valid. It returns a bool.
func isValidLanguageTag(languageTag string) bool {
_, err := language.Parse(languageTag)
return err == nil
}
// isValidAttribute checks whether an Attribute is valid. It returns a bool.
func isValidAttribute(attribute string) bool {
regex := regexp.MustCompile(`^[a-zA-Z0-9_]+="[^"]*"$`)
return regex.MatchString(attribute)
}
// NewURN generates an new valid IRI based on a UUID. It returns an IRI.
func NewURN() string {
return fmt.Sprint("urn:uuid:", uuid.New())
}
// Unescape unescapes a string. It returns an IRI.
func Unescape(s string) string {
return html.UnescapeString(s)
}