commit ff99a09b4721f41da8a084b9717a05ba3b785067
parent 736fb0f46d52949d7582f86198a328743798a6f0
Author: Sebastiano Tronto <sebastiano@tronto.net>
Date: Sun, 9 Jul 2023 08:25:35 +0200
Attempt to write my own url regex
Diffstat:
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/urlgrep b/urlgrep
@@ -2,7 +2,12 @@
# Find all url in stdin, print them newline-separated to stdout
-reg='(((http|https|ftp)|mailto)[.:][^ >"\t]*|www\.[-a-z0-9.]+)[^ .,;\t>">\):]'
+# Old regex (PCRE), kept it for later review:
+# reg='(((http|https|ftp)|mailto)[.:][^ >"\t]*|www\.[-a-z0-9.]+)[^ .,;\t>">\):]'
+# grep -Po "$reg"
-grep -Po "$reg"
+protocols='http|https|ftp|gemini|mailto'
+valid_chars="][a-zA-Z0-9_~/?#@!$&'()*+=.,;:-"
+regex="(($protocols):|www\.)[$valid_chars]+"
+egrep -o "$regex"