From 082e71a698c2b32cc0af2d62e870798b40d25e94 Mon Sep 17 00:00:00 2001 From: Jason Streifling Date: Wed, 16 Oct 2024 17:33:25 +0200 Subject: [PATCH] Change URI to IRI after finding validation pattern --- atom.go | 35 ++++++++++++++++++++++------------- category.go | 4 ++-- commonAttributes.go | 2 +- generator.go | 4 ++-- icon.go | 6 +++--- id.go | 6 +++--- link.go | 4 ++-- logo.go | 2 +- outOfLineContent.go | 6 +++--- person.go | 4 ++-- 10 files changed, 41 insertions(+), 32 deletions(-) diff --git a/atom.go b/atom.go index 5b471c6..850bbc8 100644 --- a/atom.go +++ b/atom.go @@ -2,7 +2,6 @@ package atomfeed import ( "mime" - "net/url" "regexp" "strings" ) @@ -11,21 +10,31 @@ type ( EmailAddress string LanguageTag string MediaType string - URI string + // URI string + IRI string ) -func isValidURL(uri URI) bool { - _, err := url.ParseRequestURI(string(uri)) - return err == nil -} +// func isValidURL(uri URI) bool { +// _, err := url.ParseRequestURI(string(uri)) +// return err == nil +// } +// +// func isValidURN(uri URI) bool { +// pattern := `\A(?i:urn:(?!urn:)(?[a-z0-9][a-z0-9-]{1,31}):(?(?:[-a-z0-9()+,.:=@;$_!*'&~\/]|%[0-9a-f]{2})+)(?:\?\+(?.*?))?(?:\?=(?.*?))?(?:#(?.*?))?)\z` +// return regexp.MustCompile(pattern).MatchString(string(uri)) +// } +// +// // isValidURI checks whether an URI is valid or not. +// func isValidURI(uri URI) bool { +// return isValidURL(uri) || isValidURN(uri) +// } -func isValidURN(uri URI) bool { - pattern := `\A(?i:urn:(?!urn:)(?[a-z0-9][a-z0-9-]{1,31}):(?(?:[-a-z0-9()+,.:=@;$_!*'&~\/]|%[0-9a-f]{2})+)(?:\?\+(?.*?))?(?:\?=(?.*?))?(?:#(?.*?))?)\z` - return regexp.MustCompile(pattern).MatchString(string(uri)) -} - -func isValidURI(uri URI) bool { - return isValidURL(uri) || isValidURN(uri) +// isValidIRI checks whether an IRI is valid or not. +// The used pattern stems from +// https://www.w3.org/2011/04/XMLSchema/TypeLibrary-IRI-RFC3987.xsd +func isValidIRI(iri IRI) bool { + pattern := `((([A-Za-z])[A-Za-z0-9+\-\.]*):((//(((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽!$&'()*+,;=:]|(%[0-9A-Fa-f][0-9A-Fa-f]))*@))?((\[((((([0-9A-Fa-f]{0,4}:)){6}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(::(([0-9A-Fa-f]{0,4}:)){5}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|(([0-9A-Fa-f]{0,4})?::(([0-9A-Fa-f]{0,4}:)){4}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:))?[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){3}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,2}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:)){2}(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,3}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4}:(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,4}[0-9A-Fa-f]{0,4}))?::(([0-9A-Fa-f]{0,4}:[0-9A-Fa-f]{0,4})|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))))|((((([0-9A-Fa-f]{0,4}:)){0,5}[0-9A-Fa-f]{0,4}))?::[0-9A-Fa-f]{0,4})|((((([0-9A-Fa-f]{0,4}:)){0,6}[0-9A-Fa-f]{0,4}))?::))|(v[0-9A-Fa-f]+\.[A-Za-z0-9\-\._~!$&'()*+,;=:]+))\])|(([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5]))\.([0-9]|([1-9][0-9])|(1([0-9]){2})|(2[0-4][0-9])|(25[0-5])))|(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=]))*)((:[0-9]*))?)((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|(/(((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*))?)|((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))+((/(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@]))*))*)|)((\?(([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|[-󰀀-󿿽􀀀-􏿽/?])*))?((#((([A-Za-z0-9\-\._~ -퟿豈-﷏ﷰ-￯𐀀-🿽𠀀-𯿽𰀀-𿿽񀀀-񏿽񐀀-񟿽񠀀-񯿽񰀀-񿿽򀀀-򏿽򐀀-򟿽򠀀-򯿽򰀀-򿿽󀀀-󏿽󐀀-󟿽󡀀-󯿽]|(%[0-9A-Fa-f][0-9A-Fa-f])|[!$&'()*+,;=:@])|/|\?))*))?)` + return regexp.MustCompile(pattern).MatchString(string(iri)) } func isCorrectlyEscaped(text string) bool { diff --git a/category.go b/category.go index 0cad056..e4ad587 100644 --- a/category.go +++ b/category.go @@ -10,7 +10,7 @@ type Category struct { *CommonAttributes Content Content `xml:"content"` // undefinedContent in RFC4287 Term string `xml:"term,attr"` - Scheme URI `xml:"scheme,attr,omitempty"` + Scheme IRI `xml:"scheme,attr,omitempty"` Label string `xml:"label,attr,omitempty"` } @@ -33,7 +33,7 @@ func (c *Category) Check() error { } if c.Scheme != "" { - if !isValidURI(c.Scheme) { + if !isValidIRI(c.Scheme) { return fmt.Errorf("scheme attribute %v of category not correctly formatted", c.Scheme) } } diff --git a/commonAttributes.go b/commonAttributes.go index 1e4737b..696e512 100644 --- a/commonAttributes.go +++ b/commonAttributes.go @@ -3,7 +3,7 @@ package atomfeed import "fmt" type CommonAttributes struct { - Base URI `xml:"base,attr,omitempty"` + Base IRI `xml:"base,attr,omitempty"` Lang LanguageTag `xml:"lang,attr,omitempty"` UndefinedAttributes []*ExtensionAttribute `xml:",any"` } diff --git a/generator.go b/generator.go index 6867b58..9c27841 100644 --- a/generator.go +++ b/generator.go @@ -8,7 +8,7 @@ import ( type Generator struct { *CommonAttributes - URI URI `xml:"uri,attr,omitempty"` + URI IRI `xml:"uri,attr,omitempty"` Version string `xml:"version,attr,omitempty"` Text string `xml:"text"` } @@ -19,7 +19,7 @@ func NewGenerator(text string) *Generator { func (g *Generator) Check() error { if g.URI != "" { - if !isValidURI(g.URI) { + if !isValidIRI(g.URI) { return fmt.Errorf("uri attribute %v of generator not correctly formatted", g.URI) } } diff --git a/icon.go b/icon.go index 89957e1..e444b46 100644 --- a/icon.go +++ b/icon.go @@ -7,18 +7,18 @@ import ( type Icon struct { *CommonAttributes - URI URI `xml:"uri"` + URI IRI `xml:"uri"` } func NewIcon(uri string) *Icon { - return &Icon{URI: URI(uri)} + return &Icon{URI: IRI(uri)} } func (i *Icon) Check() error { if i.URI == "" { return errors.New("uri element of icon empty") } else { - if !isValidURI(i.URI) { + if !isValidIRI(i.URI) { return fmt.Errorf("uri attribute %v of icon not correctly formatted", i.URI) } } diff --git a/id.go b/id.go index 9f69714..64a996b 100644 --- a/id.go +++ b/id.go @@ -9,18 +9,18 @@ import ( type ID struct { *CommonAttributes - URI URI `xml:"uri"` + URI IRI `xml:"uri"` } func NewID() *ID { - return &ID{URI: URI(fmt.Sprint("urn:uuid:", uuid.New()))} + return &ID{URI: IRI(fmt.Sprint("urn:uuid:", uuid.New()))} } func (i *ID) Check() error { if i.URI == "" { return errors.New("uri element of id empty") } else { - if !isValidURI(i.URI) { + if !isValidIRI(i.URI) { return fmt.Errorf("uri element %v of id not correctly formatted", i.URI) } } diff --git a/link.go b/link.go index 02868f8..8ff5381 100644 --- a/link.go +++ b/link.go @@ -9,7 +9,7 @@ type Link struct { *CommonAttributes Title Text `xml:"title,attr,omitempty"` Content Content `xml:"content"` // undefinedContent in RFC4287 - Href URI `xml:"href,attr"` + Href IRI `xml:"href,attr"` Rel string `xml:"rel,attr,omitempty"` Type MediaType `xml:"type,attr,omitempty"` HrefLang LanguageTag `xml:"hreflang,attr,omitempty"` @@ -22,7 +22,7 @@ func NewLink(href string) (*Link, error) { return nil, fmt.Errorf("error creating content element: %v", err) } - return &Link{Href: URI(href), Content: content}, nil + return &Link{Href: IRI(href), Content: content}, nil } func (l *Link) Check() error { diff --git a/logo.go b/logo.go index d794d45..5c0fe63 100644 --- a/logo.go +++ b/logo.go @@ -4,7 +4,7 @@ import "errors" type Logo struct { *CommonAttributes - URI URI `xml:"uri"` + URI IRI `xml:"uri"` } func (l *Logo) Check() error { diff --git a/outOfLineContent.go b/outOfLineContent.go index b8ab7d7..3a466c9 100644 --- a/outOfLineContent.go +++ b/outOfLineContent.go @@ -10,7 +10,7 @@ import ( type OutOfLineContent struct { *CommonAttributes Type MediaType `xml:"type,attr,omitempty"` - SRC URI `xml:"src,attr"` + SRC IRI `xml:"src,attr"` } func newOutOfLineContent(mediaType string, content any) (*OutOfLineContent, error) { @@ -22,11 +22,11 @@ func newOutOfLineContent(mediaType string, content any) (*OutOfLineContent, erro return nil, fmt.Errorf("content type %T incompatible with out of line content", content) } - if !isValidURI(content.(URI)) { + if !isValidIRI(content.(IRI)) { return nil, errors.New("content not a valid uri") } - return &OutOfLineContent{Type: MediaType(mediaType), SRC: content.(URI)}, nil + return &OutOfLineContent{Type: MediaType(mediaType), SRC: content.(IRI)}, nil } func (o *OutOfLineContent) isContent() bool { return true } diff --git a/person.go b/person.go index 27818d9..0a433a0 100644 --- a/person.go +++ b/person.go @@ -10,7 +10,7 @@ import ( type Person struct { *CommonAttributes Name string `xml:"name"` - URI URI `xml:"uri,omitempty"` + URI IRI `xml:"uri,omitempty"` Email EmailAddress `xml:"email,omitempty"` Extensions []*ExtensionElement `xml:",any,omitempty"` } @@ -29,7 +29,7 @@ func (p *Person) Check() error { } if p.URI != "" { - if !isValidURI(p.URI) { + if !isValidIRI(p.URI) { return fmt.Errorf("uri element of person %v not correctly formatted", p.Name) } }