获取网站favicon图标程序
in PHP with 0 comment

php.jpg分享一个网站的favicon提取的程序,一个php文件就搞定了,(话说使用ChartGPT效率就是高,一会儿就写出来了嘻嘻~)

<?php

/**
* FaviconFetcher 类 - 用于获取指定 URL 的 favicon 图标
*
* 版本:1.0.9
* 作者:xxy
* 日期:2025年1月25日
*
* 该类通过解析给定的 URL,自动查找并获取 favicon 图标,支持缓存、默认图标输出和错误处理。
*/
class FaviconFetcher
{
    private string $url; // 要获取 favicon 的目标 URL
    private string $cacheDir; // 缓存目录路径
    private string $defaultIconPath; // 默认图标路径
    private int $cacheExpirationTime; // 缓存过期时间(单位:秒)
    private bool $debug = false; // 是否启用DEBUG

    public function __construct(string $url, string $cacheDir, string $defaultIconPath, int $cacheExpirationTime = 86400) {
        // 验证 URL 格式
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
            throw new InvalidArgumentException('提供的 URL 格式无效。');
        }

        $this->url = $url;
        $this->cacheDir = rtrim($cacheDir, '/');
        $this->defaultIconPath = $defaultIconPath;
        $this->cacheExpirationTime = $cacheExpirationTime;

        // 确保缓存目录存在且安全
        if (!is_dir($this->cacheDir)) {
            if (!mkdir($this->cacheDir, 0755, true)) {
                throw new RuntimeException('无法创建缓存目录。');
            }
        }

        // 验证默认图标文件是否存在且可读
        if (!file_exists($this->defaultIconPath) || !is_readable($this->defaultIconPath)) {
            throw new RuntimeException('默认图标文件不存在或不可读。');
        }
    }

    public function fetch(): void
    {
        try {
            if (empty($this->url)) {
                $this->log('未提供 URL,正在输出默认图标。');
                $this->outputDefaultImage();
                return;
            }

            if ($this->checkCache()) {
                return;
            }

            $iconUrl = $this->resolveIconUrl($this->getFavoriteIcon());
            $this->log('已解析图标 URL: ' . $iconUrl);

            if (!$this->validateFileSize($iconUrl)) {
                throw new RuntimeException('图标文件过大。');
            }

            $this->outputImage($iconUrl);
        } catch (Throwable $e) {
            $this->log('发生错误:' . $e->getMessage());
            // 输出默认图标
            $this->outputDefaultImage();
        }
    }

    private function log(string $message): void
    {
        if ($this->debug) {
            file_put_contents('debug.log', "[DEBUG] " . $message . PHP_EOL, FILE_APPEND);
        }
    }

    private function outputDefaultImage(): void
    {
        $this->log('正在输出默认图标。');
        header('Content-type: image/x-icon');
        readfile($this->defaultIconPath);
        exit;
    }

    private function checkCache(): bool
    {
        $cacheFile = $this->getCacheFileName();

        if (!file_exists($cacheFile)) {
            return false;
        }

        $content = file_get_contents($cacheFile);
        $lines = explode("\n", $content, 2);

        $lastModifiedTime = (int)$lines[0];
        $iconContent = $lines[1] ?? '';

        // 计算缓存剩余时间
        $remainingCacheTime = $this->cacheExpirationTime - (time() - $lastModifiedTime);

        // 如果缓存已过期,删除缓存文件并返回 false
        if ($remainingCacheTime <= 0) {
            unlink($cacheFile);
            return false;
        }

        $fileType = $this->getFileType($iconContent);

        // 验证文件类型
        if (!$this->validateFileType($fileType)) {
            unlink($cacheFile); // 删除无效文件
            return false;
        }

        // 设置动态缓存时间
        header('Cache-Control: public, max-age=' . $remainingCacheTime . ', immutable');
        header('Expires: ' . gmdate('D, d M Y H:i:s', time() + $remainingCacheTime) . ' GMT');
        header('X-Icon-Cache: hit');
        header('Content-type: ' . $fileType);
        exit($iconContent);
    }

    private function getFavoriteIcon(): string
    {
        $content = $this->getUrlContent($this->url);
        if (empty($content)) {
            return '/favicon.ico';
        }

        $dom = new DOMDocument();
        @$dom->loadHTML($content);

        $xpath = new DOMXPath($dom);
        $nodes = $xpath->query('//link[@rel="icon" or @rel="shortcut icon"]');

        $bestIcon = '';
        if ($nodes->length > 0) {
            return $nodes->item(0)->getAttribute('href');
        }

        return $bestIcon ?: '/favicon.ico';
    }

    private function resolveIconUrl(string $icon): string
    {
        // 如果 URL 是以 "//" 开头的补全协议
        if (strpos($icon, '//') === 0) {
            $scheme = parse_url($this->url, PHP_URL_SCHEME) ?: 'https'; // 默认使用 HTTPS
            return $scheme . ':' . $icon;
        }

        // 如果是相对路径,补全为绝对路径
        if (strpos($icon, 'http') === false) {
            $base = rtrim($this->url, '/');
            return $base . '/' . ltrim($icon, '/');
        }

        return $icon;
    }

    private function validateFileSize(string $url, int $maxSize = 1048576): bool
    {
        $headers = get_headers($url, 1);
        if (isset($headers['Content-Length']) && $headers['Content-Length'] > $maxSize) {
            return false;
        }
        return true;
    }

    private function getUrlContent(string $url, int $timeout = 3, bool $followRedirects = true, int $maxSize = 1048576): string
    {
        $ch = curl_init();
        $options = [
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => true,
            CURLOPT_FOLLOWLOCATION => $followRedirects,
            CURLOPT_TIMEOUT => $timeout,
            CURLOPT_SSL_VERIFYPEER => false,
            CURLOPT_SSL_VERIFYHOST => false,
            CURLOPT_NOPROGRESS => false,
            CURLOPT_PROGRESSFUNCTION => function ($ch, $downloadSize, $downloaded) use ($maxSize) {
                if ($downloaded > $maxSize) {
                    throw new RuntimeException('文件过大,下载失败。');
                }
            },
        ];

        curl_setopt_array($ch, $options);

        $output = curl_exec($ch);
        if (curl_errno($ch)) {
            throw new RuntimeException('Curl 错误:' . curl_error($ch));
        }
        curl_close($ch);

        return $output;
    }

    private function outputImage(string $url): void
    {
        $content = $this->getUrlContent($url);
        $fileType = $this->getFileType($content);

        // 验证文件类型
        if (!$this->validateFileType($fileType)) {
            throw new RuntimeException('无效的文件类型,无法作为图标。');
        }

        // 设置完整的缓存时间
        header('Cache-Control: public, max-age=' . $this->cacheExpirationTime . ', immutable');
        header('Expires: ' . gmdate('D, d M Y H:i:s', time() + $this->cacheExpirationTime) . ' GMT');
        header('Content-type: ' . $fileType);

        // 保存到缓存文件
        file_put_contents($this->getCacheFileName(), time() . "\n" . $content);
        exit($content);
    }

    private function getCacheFileName(): string
    {
        // 提取根域名(例如 "note.dc24.top")
        $host = parse_url($this->url, PHP_URL_HOST);
        if (empty($host)) {
            $this->log('无法解析 URL 的域名,使用默认缓存文件名。');
            $host = 'default';
        }

        // 防止路径遍历攻击
        $safeHost = preg_replace('/[^a-zA-Z0-9\-\.]/', '_', $host);
        return $this->cacheDir . '/' . hash('sha256', $safeHost) . '.cache';
    }

    private function getFileType(string $content): string
    {
        return finfo_buffer(finfo_open(FILEINFO_MIME_TYPE), $content);
    }

    private function validateFileType(string $fileType): bool
    {
        $supportedFormats = [
            'image/x-icon',
            'image/png',
            'image/svg+xml',
            'image/jpeg',
            'image/webp'];
        $this->log($fileType);
        return in_array($fileType, $supportedFormats);
    }
}

// 使用例子
$url = $_GET['url'] ?? '';
$cacheDir = __DIR__ . '/cache';
$defaultIconPath = __DIR__ . '/default.ico';
$cacheExpirationTime = 86400 * 7; // 缓存过期时间:7天

try {
    $faviconFetcher = new FaviconFetcher($url, $cacheDir, $defaultIconPath, $cacheExpirationTime);
    $faviconFetcher->fetch();
} catch (InvalidArgumentException $e) {
    // 如果 URL 格式无效,直接输出默认图标
    header('Content-type: image/x-icon');
    readfile($defaultIconPath);
}
回复