summaryrefslogtreecommitdiff
path: root/cookiebanners/script.sh
blob: eebf03a1ca2cfff26749a87dee9fefc89b10653d (plain)
  1. #!/bin/sh
  2. # SPDX-License-Identifier: GPL-3.0-or-later
  3. # SPDX-FileCopyrightText: 2024 Jonas Smedegaard <dr@jones.dk>
  4. # iterate through host list load web page for each, pause, and gather data
  5. #
  6. # Requires sway, firefox-esr, jq
  7. # Recommends (for postprocessing): heif-examples, poppler-utils
  8. # break on error
  9. set -eu
  10. # top 1000 domains
  11. # * go to <https://dataforseo.com/free-seo-stats/top-1000-websites>
  12. # * select "Denmark"
  13. # * download
  14. json_file="ranked_domains.json"
  15. # count Firefox windows
  16. firefox_windows() {
  17. swaymsg -t get_tree | jq '[recurse(.nodes[]? | .nodes[]?)] | map(select(.app_id == "firefox-esr")) | length'
  18. }
  19. # count already opened Firefox windows
  20. baseline_windows=$(firefox_windows)
  21. # iterate through top domains and collect data about each
  22. jq -c '.[]' "$json_file" | while read -r item; do
  23. domain=$(echo "$item" | jq -r '.domain')
  24. pos=$(echo "$item" | jq -r '.position')
  25. # skip if screenshot already exist for this domain
  26. [ ! -e "$pos.png" ] || continue
  27. # load front page of www host at domain into Firefox
  28. firefox --new-window "https://www.$domain"
  29. # wait until firefox window is closed
  30. while true; do
  31. sleep 1
  32. if [ "$(firefox_windows)" -eq "$baseline_windows" ]; then
  33. break
  34. fi
  35. done
  36. # collect PNG screenshot
  37. # * use tool grimshot
  38. find ~/ -mindepth 1 -maxdepth 1 -name '*.png' -exec mv '{}' "$pos.png" ';'
  39. # collect HAR network timing data
  40. # * Open debugger window: F12
  41. # * Select pane "Network"
  42. # * From rightmost pane, select "Save all as HAR"
  43. find ~/data -mindepth 1 -maxdepth 1 -name '*.har' -exec mv '{}' "$pos.har" ';'
  44. # collect data: PNG screenshot, HAR network timing and PDF screenshot
  45. # * use plugin <https://addons.mozilla.org/da/firefox/addon/save-pdf/>
  46. find ~/data -mindepth 1 -maxdepth 1 -name '*' -exec mv '{}' "$pos.pdf" ';'
  47. done
  48. # Post-processing to extract text strings and compress images:
  49. #find -name '*.pdf' -type f -exec pdftotext -raw '{}' \;
  50. #find -name '*.png' -type f -exec heif-enc -A '{}' \;