Change URI to IRI after finding validation pattern

This commit is contained in:
Jason Streifling 2024-10-16 17:33:25 +02:00
parent b76e529ca3
commit 082e71a698
10 changed files with 41 additions and 32 deletions

35
atom.go
View File

@ -2,7 +2,6 @@ package atomfeed
import ( import (
"mime" "mime"
"net/url"
"regexp" "regexp"
"strings" "strings"
) )
@ -11,21 +10,31 @@ type (
EmailAddress string EmailAddress string
LanguageTag string LanguageTag string
MediaType string MediaType string
URI string // URI string
IRI string
) )
func isValidURL(uri URI) bool { // func isValidURL(uri URI) bool {
_, err := url.ParseRequestURI(string(uri)) // _, err := url.ParseRequestURI(string(uri))
return err == nil // return err == nil
} // }
//
// func isValidURN(uri URI) bool {
// pattern := `\A(?i:urn:(?!urn:)(?<nid>[a-z0-9][a-z0-9-]{1,31}):(?<nss>(?:[-a-z0-9()+,.:=@;$_!*'&~\/]|%[0-9a-f]{2})+)(?:\?\+(?<rcomponent>.*?))?(?:\?=(?<qcomponent>.*?))?(?:#(?<fcomponent>.*?))?)\z`
// return regexp.MustCompile(pattern).MatchString(string(uri))
// }
//
// // isValidURI checks whether an URI is valid or not.
// func isValidURI(uri URI) bool {
// return isValidURL(uri) || isValidURN(uri)
// }
func isValidURN(uri URI) bool { // isValidIRI checks whether an IRI is valid or not.
pattern := `\A(?i:urn:(?!urn:)(?<nid>[a-z0-9][a-z0-9-]{1,31}):(?<nss>(?:[-a-z0-9()+,.:=@;$_!*'&~\/]|%[0-9a-f]{2})+)(?:\?\+(?<rcomponent>.*?))?(?:\?=(?<qcomponent>.*?))?(?:#(?<fcomponent>.*?))?)\z` // The used pattern stems from
return regexp.MustCompile(pattern).MatchString(string(uri)) // https://www.w3.org/2011/04/XMLSchema/TypeLibrary-IRI-RFC3987.xsd
} func isValidIRI(iri IRI) bool {
pattern := `((([A-Za-z])[A-Za-z0-9+\-\.]*):((//(((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽!$&'()*+,;=:]|(%[0-9A-Fa-f][0-9A-Fa-f]))*@))?((\[((((([0-9A-Fa-f]{0,4}:)){6}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(::(([0-9A-Fa-f]{0,4}:)){5}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(([0-9A-Fa-f]{0,4})?::(([0-9A-Fa-f]{0,4}:)){4}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:))?[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){3}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,2}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){2}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,3}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4}:(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,4}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,5}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4})|((((([0-9A-Fa-f]{0,4}:)){0,6}[0-9A-Fa-f]{0,4}))?::))|(v[0-9A-Fa-f]+\.[A-Za-z0-9\-\._~!$&'()*+,;=:]+))\])|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))|(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=]))*)((:[0-9]*))?)((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|(/(((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*))?)|((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|)((\?(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|[-󰀀-󿿽􀀀-􏿽/?])*))?((#((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|/|\?))*))?)`
func isValidURI(uri URI) bool { return regexp.MustCompile(pattern).MatchString(string(iri))
return isValidURL(uri) || isValidURN(uri)
} }
func isCorrectlyEscaped(text string) bool { func isCorrectlyEscaped(text string) bool {

View File

@ -10,7 +10,7 @@ type Category struct {
*CommonAttributes *CommonAttributes
Content Content `xml:"content"` // undefinedContent in RFC4287 Content Content `xml:"content"` // undefinedContent in RFC4287
Term string `xml:"term,attr"` Term string `xml:"term,attr"`
Scheme URI `xml:"scheme,attr,omitempty"` Scheme IRI `xml:"scheme,attr,omitempty"`
Label string `xml:"label,attr,omitempty"` Label string `xml:"label,attr,omitempty"`
} }
@ -33,7 +33,7 @@ func (c *Category) Check() error {
} }
if c.Scheme != "" { if c.Scheme != "" {
if !isValidURI(c.Scheme) { if !isValidIRI(c.Scheme) {
return fmt.Errorf("scheme attribute %v of category not correctly formatted", c.Scheme) return fmt.Errorf("scheme attribute %v of category not correctly formatted", c.Scheme)
} }
} }

View File

@ -3,7 +3,7 @@ package atomfeed
import "fmt" import "fmt"
type CommonAttributes struct { type CommonAttributes struct {
Base URI `xml:"base,attr,omitempty"` Base IRI `xml:"base,attr,omitempty"`
Lang LanguageTag `xml:"lang,attr,omitempty"` Lang LanguageTag `xml:"lang,attr,omitempty"`
UndefinedAttributes []*ExtensionAttribute `xml:",any"` UndefinedAttributes []*ExtensionAttribute `xml:",any"`
} }

View File

@ -8,7 +8,7 @@ import (
type Generator struct { type Generator struct {
*CommonAttributes *CommonAttributes
URI URI `xml:"uri,attr,omitempty"` URI IRI `xml:"uri,attr,omitempty"`
Version string `xml:"version,attr,omitempty"` Version string `xml:"version,attr,omitempty"`
Text string `xml:"text"` Text string `xml:"text"`
} }
@ -19,7 +19,7 @@ func NewGenerator(text string) *Generator {
func (g *Generator) Check() error { func (g *Generator) Check() error {
if g.URI != "" { if g.URI != "" {
if !isValidURI(g.URI) { if !isValidIRI(g.URI) {
return fmt.Errorf("uri attribute %v of generator not correctly formatted", g.URI) return fmt.Errorf("uri attribute %v of generator not correctly formatted", g.URI)
} }
} }

View File

@ -7,18 +7,18 @@ import (
type Icon struct { type Icon struct {
*CommonAttributes *CommonAttributes
URI URI `xml:"uri"` URI IRI `xml:"uri"`
} }
func NewIcon(uri string) *Icon { func NewIcon(uri string) *Icon {
return &Icon{URI: URI(uri)} return &Icon{URI: IRI(uri)}
} }
func (i *Icon) Check() error { func (i *Icon) Check() error {
if i.URI == "" { if i.URI == "" {
return errors.New("uri element of icon empty") return errors.New("uri element of icon empty")
} else { } else {
if !isValidURI(i.URI) { if !isValidIRI(i.URI) {
return fmt.Errorf("uri attribute %v of icon not correctly formatted", i.URI) return fmt.Errorf("uri attribute %v of icon not correctly formatted", i.URI)
} }
} }

6
id.go
View File

@ -9,18 +9,18 @@ import (
type ID struct { type ID struct {
*CommonAttributes *CommonAttributes
URI URI `xml:"uri"` URI IRI `xml:"uri"`
} }
func NewID() *ID { func NewID() *ID {
return &ID{URI: URI(fmt.Sprint("urn:uuid:", uuid.New()))} return &ID{URI: IRI(fmt.Sprint("urn:uuid:", uuid.New()))}
} }
func (i *ID) Check() error { func (i *ID) Check() error {
if i.URI == "" { if i.URI == "" {
return errors.New("uri element of id empty") return errors.New("uri element of id empty")
} else { } else {
if !isValidURI(i.URI) { if !isValidIRI(i.URI) {
return fmt.Errorf("uri element %v of id not correctly formatted", i.URI) return fmt.Errorf("uri element %v of id not correctly formatted", i.URI)
} }
} }

View File

@ -9,7 +9,7 @@ type Link struct {
*CommonAttributes *CommonAttributes
Title Text `xml:"title,attr,omitempty"` Title Text `xml:"title,attr,omitempty"`
Content Content `xml:"content"` // undefinedContent in RFC4287 Content Content `xml:"content"` // undefinedContent in RFC4287
Href URI `xml:"href,attr"` Href IRI `xml:"href,attr"`
Rel string `xml:"rel,attr,omitempty"` Rel string `xml:"rel,attr,omitempty"`
Type MediaType `xml:"type,attr,omitempty"` Type MediaType `xml:"type,attr,omitempty"`
HrefLang LanguageTag `xml:"hreflang,attr,omitempty"` HrefLang LanguageTag `xml:"hreflang,attr,omitempty"`
@ -22,7 +22,7 @@ func NewLink(href string) (*Link, error) {
return nil, fmt.Errorf("error creating content element: %v", err) return nil, fmt.Errorf("error creating content element: %v", err)
} }
return &Link{Href: URI(href), Content: content}, nil return &Link{Href: IRI(href), Content: content}, nil
} }
func (l *Link) Check() error { func (l *Link) Check() error {

View File

@ -4,7 +4,7 @@ import "errors"
type Logo struct { type Logo struct {
*CommonAttributes *CommonAttributes
URI URI `xml:"uri"` URI IRI `xml:"uri"`
} }
func (l *Logo) Check() error { func (l *Logo) Check() error {

View File

@ -10,7 +10,7 @@ import (
type OutOfLineContent struct { type OutOfLineContent struct {
*CommonAttributes *CommonAttributes
Type MediaType `xml:"type,attr,omitempty"` Type MediaType `xml:"type,attr,omitempty"`
SRC URI `xml:"src,attr"` SRC IRI `xml:"src,attr"`
} }
func newOutOfLineContent(mediaType string, content any) (*OutOfLineContent, error) { func newOutOfLineContent(mediaType string, content any) (*OutOfLineContent, error) {
@ -22,11 +22,11 @@ func newOutOfLineContent(mediaType string, content any) (*OutOfLineContent, erro
return nil, fmt.Errorf("content type %T incompatible with out of line content", content) return nil, fmt.Errorf("content type %T incompatible with out of line content", content)
} }
if !isValidURI(content.(URI)) { if !isValidIRI(content.(IRI)) {
return nil, errors.New("content not a valid uri") return nil, errors.New("content not a valid uri")
} }
return &OutOfLineContent{Type: MediaType(mediaType), SRC: content.(URI)}, nil return &OutOfLineContent{Type: MediaType(mediaType), SRC: content.(IRI)}, nil
} }
func (o *OutOfLineContent) isContent() bool { return true } func (o *OutOfLineContent) isContent() bool { return true }

View File

@ -10,7 +10,7 @@ import (
type Person struct { type Person struct {
*CommonAttributes *CommonAttributes
Name string `xml:"name"` Name string `xml:"name"`
URI URI `xml:"uri,omitempty"` URI IRI `xml:"uri,omitempty"`
Email EmailAddress `xml:"email,omitempty"` Email EmailAddress `xml:"email,omitempty"`
Extensions []*ExtensionElement `xml:",any,omitempty"` Extensions []*ExtensionElement `xml:",any,omitempty"`
} }
@ -29,7 +29,7 @@ func (p *Person) Check() error {
} }
if p.URI != "" { if p.URI != "" {
if !isValidURI(p.URI) { if !isValidIRI(p.URI) {
return fmt.Errorf("uri element of person %v not correctly formatted", p.Name) return fmt.Errorf("uri element of person %v not correctly formatted", p.Name)
} }
} }