#!/usr/bin/env python3
from http.server import HTTPServer, SimpleHTTPRequestHandler
from urllib.parse import urlparse, parse_qs
import urllib.request
import os
import re
# Whitelist of allowed domains
ALLOWED_DOMAINS = [
'en.wikipedia.org',
'wikipedia.org',
'example.com',
'github.com',
'www.github.com',
]
class Handler(SimpleHTTPRequestHandler):
def do_GET(self):
if self.path.startswith('/proxy?'):
qs = urlparse(self.path).query
params = parse_qs(qs)
if 'url' not in params:
self.send_error(400)
return
url = params['url'][0]
# Security: Validate URL
try:
parsed = urlparse(url)
# Block dangerous schemes
if parsed.scheme not in ('http', 'https'):
self.send_error(403, 'Invalid URL scheme')
return
# Block localhost/private IPs
hostname = parsed.hostname or ''
if hostname in ('localhost', '127.0.0.1', '::1') or hostname.startswith('192.168.') or hostname.startswith('10.'):
self.send_error(403, 'Local network access blocked')
return
# Check whitelist (optional - comment out to allow all)
# if not any(hostname.endswith(domain) for domain in ALLOWED_DOMAINS):
# self.send_error(403, 'Domain not whitelisted')
# return
except Exception as e:
self.send_error(400, 'Invalid URL')
return
try:
req = urllib.request.Request(url, headers={
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X)',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5'
})
response = urllib.request.urlopen(req, timeout=30)
data = response.read(1000000).decode('utf-8', errors='ignore')
# Add base tag for relative URLs
base_tag = f'