blob: eebf03a1ca2cfff26749a87dee9fefc89b10653d (
plain)
- #!/bin/sh
- # SPDX-License-Identifier: GPL-3.0-or-later
- # SPDX-FileCopyrightText: 2024 Jonas Smedegaard <dr@jones.dk>
- # iterate through host list load web page for each, pause, and gather data
- #
- # Requires sway, firefox-esr, jq
- # Recommends (for postprocessing): heif-examples, poppler-utils
- # break on error
- set -eu
- # top 1000 domains
- # * go to <https://dataforseo.com/free-seo-stats/top-1000-websites>
- # * select "Denmark"
- # * download
- json_file="ranked_domains.json"
- # count Firefox windows
- firefox_windows() {
- swaymsg -t get_tree | jq '[recurse(.nodes[]? | .nodes[]?)] | map(select(.app_id == "firefox-esr")) | length'
- }
- # count already opened Firefox windows
- baseline_windows=$(firefox_windows)
- # iterate through top domains and collect data about each
- jq -c '.[]' "$json_file" | while read -r item; do
- domain=$(echo "$item" | jq -r '.domain')
- pos=$(echo "$item" | jq -r '.position')
- # skip if screenshot already exist for this domain
- [ ! -e "$pos.png" ] || continue
- # load front page of www host at domain into Firefox
- firefox --new-window "https://www.$domain"
- # wait until firefox window is closed
- while true; do
- sleep 1
- if [ "$(firefox_windows)" -eq "$baseline_windows" ]; then
- break
- fi
- done
- # collect PNG screenshot
- # * use tool grimshot
- find ~/ -mindepth 1 -maxdepth 1 -name '*.png' -exec mv '{}' "$pos.png" ';'
- # collect HAR network timing data
- # * Open debugger window: F12
- # * Select pane "Network"
- # * From rightmost pane, select "Save all as HAR"
- find ~/data -mindepth 1 -maxdepth 1 -name '*.har' -exec mv '{}' "$pos.har" ';'
- # collect data: PNG screenshot, HAR network timing and PDF screenshot
- # * use plugin <https://addons.mozilla.org/da/firefox/addon/save-pdf/>
- find ~/data -mindepth 1 -maxdepth 1 -name '*' -exec mv '{}' "$pos.pdf" ';'
- done
- # Post-processing to extract text strings and compress images:
- #find -name '*.pdf' -type f -exec pdftotext -raw '{}' \;
- #find -name '*.png' -type f -exec heif-enc -A '{}' \;
|