first commit
This commit is contained in:
commit
276bb92d08
10 changed files with 294 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
_build
|
||||||
42
.ocamlformat
Normal file
42
.ocamlformat
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
version=0.20.1
|
||||||
|
assignment-operator=end-line
|
||||||
|
break-cases=fit
|
||||||
|
break-fun-decl=wrap
|
||||||
|
break-fun-sig=wrap
|
||||||
|
break-infix=wrap
|
||||||
|
break-infix-before-func=false
|
||||||
|
break-separators=before
|
||||||
|
break-sequences=true
|
||||||
|
cases-exp-indent=2
|
||||||
|
cases-matching-exp-indent=normal
|
||||||
|
doc-comments=before
|
||||||
|
doc-comments-padding=2
|
||||||
|
doc-comments-tag-only=default
|
||||||
|
dock-collection-brackets=false
|
||||||
|
exp-grouping=preserve
|
||||||
|
field-space=loose
|
||||||
|
if-then-else=compact
|
||||||
|
indicate-multiline-delimiters=space
|
||||||
|
indicate-nested-or-patterns=unsafe-no
|
||||||
|
infix-precedence=indent
|
||||||
|
leading-nested-match-parens=false
|
||||||
|
let-and=sparse
|
||||||
|
let-binding-spacing=compact
|
||||||
|
let-module=compact
|
||||||
|
margin=80
|
||||||
|
max-indent=68
|
||||||
|
module-item-spacing=sparse
|
||||||
|
ocp-indent-compat=false
|
||||||
|
parens-ite=false
|
||||||
|
parens-tuple=always
|
||||||
|
parse-docstrings=true
|
||||||
|
sequence-blank-line=preserve-one
|
||||||
|
sequence-style=terminator
|
||||||
|
single-case=compact
|
||||||
|
space-around-arrays=true
|
||||||
|
space-around-lists=true
|
||||||
|
space-around-records=true
|
||||||
|
space-around-variants=true
|
||||||
|
type-decl=sparse
|
||||||
|
wrap-comments=false
|
||||||
|
wrap-fun-args=true
|
||||||
8
LICENSE.md
Normal file
8
LICENSE.md
Normal file
|
|
@ -0,0 +1,8 @@
|
||||||
|
The ISC License (ISC)
|
||||||
|
=====================
|
||||||
|
|
||||||
|
Copyright © 2022, TODO
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
23
README.md
Normal file
23
README.md
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
# clear_url
|
||||||
|
|
||||||
|
clear_url is an [OCaml] library to remove tracking elements from URLs.
|
||||||
|
|
||||||
|
It is based on [ClearURLs](https://gitlab.com/KevinRoebert/ClearUrls)
|
||||||
|
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```ocaml
|
||||||
|
let dirty_url = "https://example.com?utm_source=newsletter1&utm_medium=email&utm_campaign=sale"
|
||||||
|
let () = Format.printf (Clear_url.clean dirty_url)
|
||||||
|
```
|
||||||
|
|
||||||
|
## About
|
||||||
|
|
||||||
|
- [LICENSE]
|
||||||
|
|
||||||
|
[example]: ./example
|
||||||
|
[LICENSE]: ./LICENSE.md
|
||||||
|
[opam file]: ./clear_url.opam
|
||||||
|
|
||||||
|
[OCaml]: https://ocaml.org
|
||||||
31
clear_url.opam
Normal file
31
clear_url.opam
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
# This file is generated by dune, edit dune-project instead
|
||||||
|
opam-version: "2.0"
|
||||||
|
synopsis: "OCaml library to remove tracking in URLs"
|
||||||
|
description: "Use `Clear_url.clean` to remove tracking elements from URLs."
|
||||||
|
authors: ["swrup <swrup@protonmail.com>"]
|
||||||
|
license: "ISC"
|
||||||
|
tags: ["clear_url" "clean" "url" "tracker" "web3"]
|
||||||
|
homepage: "https://git.zapashcanon.fr/swrup/clear-url"
|
||||||
|
bug-reports: "https://git.zapashcanon.fr/swrup/ocaml-clear-url/issues"
|
||||||
|
depends: [
|
||||||
|
"dune" {>= "2.9"}
|
||||||
|
"ocaml" {>= "4.08"}
|
||||||
|
"odoc" {with-doc}
|
||||||
|
]
|
||||||
|
build: [
|
||||||
|
["dune" "subst"] {dev}
|
||||||
|
[
|
||||||
|
"dune"
|
||||||
|
"build"
|
||||||
|
"-p"
|
||||||
|
name
|
||||||
|
"-j"
|
||||||
|
jobs
|
||||||
|
"--promote-install-files=false"
|
||||||
|
"@install"
|
||||||
|
"@runtest" {with-test}
|
||||||
|
"@doc" {with-doc}
|
||||||
|
]
|
||||||
|
["dune" "install" "-p" name "--create-install-files" name]
|
||||||
|
]
|
||||||
|
dev-repo: "git+https://git.zapashcanon.fr/swrup/clear-url.git"
|
||||||
29
dune-project
Normal file
29
dune-project
Normal file
|
|
@ -0,0 +1,29 @@
|
||||||
|
(lang dune 2.9)
|
||||||
|
|
||||||
|
(implicit_transitive_deps false)
|
||||||
|
|
||||||
|
(name clear_url)
|
||||||
|
|
||||||
|
(license ISC)
|
||||||
|
|
||||||
|
(authors "swrup <swrup@protonmail.com>")
|
||||||
|
|
||||||
|
(source
|
||||||
|
(uri git+https://git.zapashcanon.fr/swrup/clear-url.git))
|
||||||
|
|
||||||
|
(homepage https://git.zapashcanon.fr/swrup/clear-url)
|
||||||
|
|
||||||
|
(bug_reports https://git.zapashcanon.fr/swrup/ocaml-clear-url/issues)
|
||||||
|
|
||||||
|
(generate_opam_files true)
|
||||||
|
|
||||||
|
(package
|
||||||
|
(name clear_url)
|
||||||
|
(synopsis "OCaml library to remove tracking in URLs")
|
||||||
|
(description
|
||||||
|
"Use `Clear_url.clean` to remove tracking elements from URLs.")
|
||||||
|
(tags
|
||||||
|
(clear_url clean url tracker web3))
|
||||||
|
(depends
|
||||||
|
(ocaml
|
||||||
|
(>= 4.08))))
|
||||||
131
src/clear_url.ml
Normal file
131
src/clear_url.ml
Normal file
|
|
@ -0,0 +1,131 @@
|
||||||
|
open Yojson.Basic.Util
|
||||||
|
|
||||||
|
let file =
|
||||||
|
match Fpath.of_string "./src/rules.json" with
|
||||||
|
| Ok f -> f
|
||||||
|
| Error (`Msg s) -> failwith s
|
||||||
|
|
||||||
|
let parsed =
|
||||||
|
match Bos.OS.File.read file with
|
||||||
|
| Error (`Msg s) -> failwith s
|
||||||
|
| Ok s -> Yojson.Safe.to_basic (Yojson.Safe.from_string s)
|
||||||
|
|
||||||
|
type regexps =
|
||||||
|
{ urlpattern : Re.re
|
||||||
|
; rawrules : Re.re array
|
||||||
|
; rules : Re.re array
|
||||||
|
; exceptions : Re.re array
|
||||||
|
; redirections : Re.re array
|
||||||
|
}
|
||||||
|
|
||||||
|
let values = parsed |> member "providers" |> values |> Array.of_list
|
||||||
|
|
||||||
|
let regexps_array =
|
||||||
|
Array.map
|
||||||
|
(fun value ->
|
||||||
|
let urlpattern =
|
||||||
|
value |> member "urlPattern" |> to_string |> Re.Perl.re |> Re.compile
|
||||||
|
in
|
||||||
|
|
||||||
|
let get_array key =
|
||||||
|
value
|
||||||
|
|> (fun el ->
|
||||||
|
(*we can't use `filter_member "rules"` here because empty list
|
||||||
|
* where removed from the file ..*)
|
||||||
|
match member key el with
|
||||||
|
| `Null -> (*it was an empty list *) []
|
||||||
|
| json -> to_list json )
|
||||||
|
|> filter_string |> Array.of_list
|
||||||
|
in
|
||||||
|
let compile_array l =
|
||||||
|
l |> Array.map (fun s -> Re.compile @@ Re.Perl.re s)
|
||||||
|
in
|
||||||
|
|
||||||
|
(* referralMarketing should be in rules *)
|
||||||
|
let rules =
|
||||||
|
Array.append (get_array "rules") (get_array "referralMarketing")
|
||||||
|
|> Array.map (Format.sprintf "(?:&|[/?#&])(?:%s=[^&]*)")
|
||||||
|
|> compile_array
|
||||||
|
in
|
||||||
|
let rawrules = get_array "rawRules" |> compile_array in
|
||||||
|
let exceptions = get_array "exceptions" |> compile_array in
|
||||||
|
let redirections = get_array "redirections" |> compile_array in
|
||||||
|
|
||||||
|
{ urlpattern; rawrules; rules; exceptions; redirections } )
|
||||||
|
values
|
||||||
|
|
||||||
|
let split_queries_regex = Re.compile (Re.Perl.re "[^\\/|\\?|&]+=[^\\/|\\?|&]+")
|
||||||
|
|
||||||
|
(* see https://docs.clearurls.xyz/1.23.0/specs/rules/ *)
|
||||||
|
let clean url =
|
||||||
|
(* filter by url pattern *)
|
||||||
|
let regexps_list =
|
||||||
|
Array.fold_left
|
||||||
|
(fun acc regexps ->
|
||||||
|
if Re.matches regexps.urlpattern url <> [] then regexps :: acc else acc
|
||||||
|
)
|
||||||
|
[] regexps_array
|
||||||
|
in
|
||||||
|
|
||||||
|
let is_exception =
|
||||||
|
List.exists
|
||||||
|
(fun t ->
|
||||||
|
Array.exists (fun except -> Re.matches except url <> []) t.exceptions )
|
||||||
|
regexps_list
|
||||||
|
in
|
||||||
|
if is_exception then url
|
||||||
|
else
|
||||||
|
(*TODO implement redirection, is it always percent-encoded ?? *)
|
||||||
|
|
||||||
|
(* apply rawrules to whole url *)
|
||||||
|
let url =
|
||||||
|
List.fold_left
|
||||||
|
(fun acc t ->
|
||||||
|
Array.fold_left
|
||||||
|
(fun acc rawrule -> Re.replace_string ~all:true rawrule ~by:"" acc)
|
||||||
|
acc t.rawrules )
|
||||||
|
url regexps_list
|
||||||
|
in
|
||||||
|
|
||||||
|
let uri = Uri.of_string url in
|
||||||
|
match Uri.verbatim_query uri with
|
||||||
|
| None -> url
|
||||||
|
| Some query ->
|
||||||
|
let query = "?" ^ query in
|
||||||
|
let fragment =
|
||||||
|
Option.fold ~none:"" ~some:(fun s -> "#" ^ s) (Uri.fragment uri)
|
||||||
|
in
|
||||||
|
(* let query = "?" ^ Re.replace_string get_query_regex ~by:"" url in *)
|
||||||
|
let base =
|
||||||
|
String.sub url 0
|
||||||
|
(String.length url - String.length query - String.length fragment)
|
||||||
|
in
|
||||||
|
|
||||||
|
(* apply rules to query *)
|
||||||
|
let apply_rules s =
|
||||||
|
List.fold_left
|
||||||
|
(fun s t ->
|
||||||
|
Array.fold_left
|
||||||
|
(fun acc rule -> Re.replace_string ~all:true rule ~by:"" acc)
|
||||||
|
s t.rules )
|
||||||
|
s regexps_list
|
||||||
|
in
|
||||||
|
|
||||||
|
let clean_query = apply_rules query in
|
||||||
|
|
||||||
|
(* TODO fragment might contains tracking fields as well (really?) *)
|
||||||
|
let query_list = Re.matches split_queries_regex clean_query in
|
||||||
|
|
||||||
|
let pp_field_list =
|
||||||
|
Format.pp_print_list
|
||||||
|
~pp_sep:(fun fmt () -> Format.pp_print_string fmt "&")
|
||||||
|
Format.pp_print_string
|
||||||
|
in
|
||||||
|
let pp_query fmt query_list =
|
||||||
|
match query_list with
|
||||||
|
| [] -> Format.fprintf fmt ""
|
||||||
|
| query_list -> Format.fprintf fmt "?%a" pp_field_list query_list
|
||||||
|
in
|
||||||
|
Format.asprintf "%s%a%s" base pp_query query_list fragment
|
||||||
|
|
||||||
|
(* TODO implement link unshortening like https://github.com/AmanoTeam/Unalix/blob/master/unalix/core/url_unshort.py *)
|
||||||
18
src/dune
Normal file
18
src/dune
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
(library
|
||||||
|
(name clear_url)
|
||||||
|
(modules clear_url)
|
||||||
|
(libraries bos fpath re uri yojson))
|
||||||
|
|
||||||
|
(executable
|
||||||
|
(name updaterules)
|
||||||
|
(modules updaterules)
|
||||||
|
(libraries lwt lwt.unix cohttp-lwt cohttp-lwt-unix uri yojson))
|
||||||
|
|
||||||
|
(rule
|
||||||
|
(target rules.json)
|
||||||
|
(deps updaterules.exe)
|
||||||
|
(mode promote)
|
||||||
|
(action
|
||||||
|
(with-stdout-to
|
||||||
|
rules.json
|
||||||
|
(run ./updaterules.exe))))
|
||||||
1
src/rules.json
Normal file
1
src/rules.json
Normal file
File diff suppressed because one or more lines are too long
10
src/updaterules.ml
Normal file
10
src/updaterules.ml
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
open Lwt.Syntax
|
||||||
|
|
||||||
|
let url = "https://rules2.clearurls.xyz/data.minify.json"
|
||||||
|
|
||||||
|
let program =
|
||||||
|
let* _, body = Cohttp_lwt_unix.Client.get (url |> Uri.of_string) in
|
||||||
|
let* body = Cohttp_lwt.Body.to_string body in
|
||||||
|
Lwt_io.write_line Lwt_io.stdout body
|
||||||
|
|
||||||
|
let () = Lwt_main.run program
|
||||||
Loading…
Add table
Add a link
Reference in a new issue