enable daily browser summary on darwin

2026-02-05 20:29:46 +00:00 · 2026-01-25 10:42:53 -05:00
parent a64488093c
commit 430b522c61
3 changed files with 186 additions and 0 deletions
--- a/platforms/nix-darwin/modules/nmasur/presets/services/daily-summary/process-urls.py
+++ b/platforms/nix-darwin/modules/nmasur/presets/services/daily-summary/process-urls.py
@@ -0,0 +1,112 @@
+import requests
+from bs4 import BeautifulSoup
+import json
+import os
+import sys
+from datetime import datetime
+from urllib.parse import urlparse
+
+
+def find_urls(data):
+    urls = []
+    if isinstance(data, dict):
+        for key, value in data.items():
+            if key == 'url' and isinstance(value, str):
+                urls.append(value)
+            else:
+                urls.extend(find_urls(value))
+    elif isinstance(data, list):
+        for item in data:
+            urls.extend(find_urls(item))
+    return urls
+
+
+def main():
+    if len(sys.argv) > 1:
+        base_dir = sys.argv[1]
+        if not os.path.isdir(base_dir):
+            print(f"Error: Directory '{base_dir}' not found.")
+            sys.exit(1)
+    else:
+        base_dir = '.'
+
+    today = datetime.now().strftime('%Y.%m.%d')
+    output_filename = 'todays_urls.md'
+    output_filepath = os.path.join(base_dir, output_filename)
+    url_titles = {}
+
+    print(f"Searching for files in '{base_dir}' "
+          f"starting with 'snapshot-{today}-'")
+
+    with open(output_filepath, 'w') as md_file:
+        md_file.write(f'# URLs from Sidebery Snapshots for '
+                      f'{today.replace(".", "-")}\n\n')
+
+        files_processed = 0
+        for filename in sorted(os.listdir(base_dir)):
+            # Debugging print removed
+            if (filename.startswith(f'snapshot-{today}-')
+                    and filename.endswith('.json')):
+                files_processed += 1
+                print(f"Processing file: "
+                      f"{os.path.join(base_dir, filename)}")
+
+                # Extract and format date and time from filename
+                # Example: snapshot-2026.01.25-13.19.29.json
+                clean_filename = (filename.replace('snapshot-', '')
+                                  .replace('.json', ''))
+                date_time_parts = clean_filename.split('-', 1)
+
+                formatted_date = date_time_parts[0].replace('.', '-')
+                formatted_time = date_time_parts[1].replace('.', ':')
+
+                datetime_str = f"{formatted_date} {formatted_time}"
+
+                md_file.write(f'## {datetime_str}\n\n')
+
+                with open(os.path.join(base_dir, filename), 'r') as json_file:
+                    try:
+                        data = json.load(json_file)
+                        urls = find_urls(data)
+                        print(f"  Found {len(urls)} URLs")
+                        for url in urls:
+                            if url not in url_titles:
+                                try:
+                                    # Get title of URL
+                                    res = requests.get(
+                                       url,
+                                       timeout=10,
+                                       allow_redirects=True
+                                    )
+                                    soup = BeautifulSoup(
+                                        res.text,
+                                        'html.parser'
+                                    )
+                                    if soup.title and soup.title.string:
+                                        title = soup.title.string.strip()
+                                    else:
+                                        domain = urlparse(url).netloc
+                                        title = domain if domain else url
+                                    url_titles[url] = title
+                                except requests.exceptions.InvalidSchema:
+                                    continue
+                                except Exception:
+                                    domain = urlparse(url).netloc
+                                    title = domain if domain else url
+                                    url_titles[url] = title
+                            if url in url_titles:
+                                title = url_titles[url]
+                                md_file.write(f'- [{title}]({url})\n')
+                        md_file.write('\n')
+                    except json.JSONDecodeError:
+                        print(f"  Error decoding JSON in {filename}")
+                        md_file.write('- Error decoding JSON\n\n')
+
+        if files_processed == 0:
+            print("No files found for today.")
+
+    print(f"Processing complete. Output written to {output_filepath}")
+
+
+if __name__ == '__main__':
+    main()