Files
MyHomepage/backend/Services/FaviconService.cs
T
g82tt 68be41e7a2 初始提交:浏览器首页 MyHomePage 全栈项目
# 项目概述
个人浏览器首页导航应用,支持书签分类管理、搜索引擎快捷搜索、
必应每日壁纸轮播、前后端分离部署,适配 1Panel 服务器(Docker 模式)。

# 技术栈
- 前端:Vue 3 + TypeScript + Vite + Pinia + Capacitor(Android 打包)
- 后端:.NET 8 + SqlSugar(多数据库) + SQLite/MySQL + Swashbuckle
- 部署:1Panel 应用商店自定义应用(Docker Compose 模式)

# 项目结构
- backend/    .NET 8 API 后端(8 个 Controller + 15 个 Service)
- frontend/   Vue 3 前端(19 个组件 + 9 个 API 模块 + 5 个 Store)
- docker/     Docker 部署文件(后端镜像 + Nginx 反代)
- docs/       部署手册(1Panel 实战版)
- scripts/    E2E 测试脚本

# 已实现功能
- 书签管理:增删改查 + 树形分类 + 拖拽排序 + 主色自适应
- 搜索引擎:8 个内置引擎 + 自定义引擎 + favicon 自动抓取
- 必应壁纸:每日轮播 + 多分辨率自动选择 + 1.6MP 质量优先
- 全局设置:主题/行为/数据/工具 4 分类 + 跨设备同步
- 文件上传:图标/书签/通用(容器持久化 + 跨域 URL 拼接)
- 同步:基于变更日志的设备间数据同步
- 跨域部署:前后端分离 + runtime config.json 无需重新编译

# 进度记录
- 已完成 P0~P52 共 53 个开发节点(详细见 说明文档.md)
- 当前版本:v1.0 部署就绪

# 部署文档
- README.md:项目说明 + 快速开始
- 说明文档.md:完整开发进度(中文)
- docs/DEPLOY.md:1Panel 部署手册(Docker 模式)
2026-07-05 05:09:56 +08:00

455 lines
19 KiB
C#
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
using System.Net;
using System.Net.Sockets;
using System.Text.RegularExpressions;
using System.Web;
using Microsoft.Extensions.Caching.Memory;
using MyHomePage.Api.Common;
using MyHomePage.Api.Infrastructure.Configuration;
using Microsoft.Extensions.Options;
namespace MyHomePage.Api.Services;
/// <summary>
/// 自动抓取网站 favicon。
/// P31 主链路:BookmarkService.Create/Update 检测「未指定图标」时调用本服务:
/// 1. HTTP GET 目标页面(限制 5s / 1MBUser-Agent 模拟浏览器)
/// 2. 解析 HTML &lt;link rel="icon"&gt; / apple-touch-icon / shortcut icon
/// 3. 按优先级选最佳 iconapple-touch > sizes 最大 > /favicon.ico 兜底)
/// 4. 下载 icon 图片到 Upload/favicons/ 目录
/// 5. 返回前端可访问的 URL(保存到 bookmark.IconUrl + iconType='favicon'
/// SSRF 防护:拒绝内网 / 本地 / 链路本地地址。
/// 失败时返回 null(不抛异常),由调用方走默认图标。
/// </summary>
public class FaviconService
{
private readonly IUploadService _upload;
private readonly IMemoryCache _cache;
private readonly UploadOptions _uploadOptions;
private readonly ILogger<FaviconService> _logger;
/// <summary>缓存键前缀 + 缓存时长(同一 URL 24h 内不再重抓)</summary>
private static readonly TimeSpan CacheTtl = TimeSpan.FromHours(24);
private const string CacheKeyPrefix = "favicon:";
/// <summary>UA 字符串:模拟常见浏览器,避免被部分站点拒绝</summary>
private const string UserAgent =
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36";
/// <summary>下载的 icon 大小上限(5MB</summary>
private const long MaxIconBytes = 5L * 1024 * 1024;
/// <summary>HttpClient 名字(与 Program.cs AddHttpClient(name) 对应)</summary>
private const string HttpClientName = nameof(FaviconService);
private readonly IHttpClientFactory _httpFactory;
public FaviconService(
IHttpClientFactory httpFactory,
IUploadService upload,
IMemoryCache cache,
IOptions<UploadOptions> uploadOptions,
ILogger<FaviconService> logger)
{
_httpFactory = httpFactory;
_upload = upload;
_cache = cache;
_uploadOptions = uploadOptions.Value;
_logger = logger;
}
/// <summary>每次调用前从 factory 取一个新 HttpClient(短生命周期,由 factory 池化)</summary>
private HttpClient NewClient() => _httpFactory.CreateClient(HttpClientName);
/// <summary>
/// 抓取 pageUrl 的 favicon 并保存到 upload 目录,返回前端可访问的 URL。
/// 任何环节失败均返回 null(不抛异常,由调用方静默用默认图标)。
/// </summary>
public async Task<string?> FetchAndSaveAsync(string pageUrl, CancellationToken ct = default)
{
if (string.IsNullOrWhiteSpace(pageUrl)) return null;
if (!Uri.TryCreate(pageUrl, UriKind.Absolute, out var pageUri)) return null;
if (pageUri.Scheme != Uri.UriSchemeHttp && pageUri.Scheme != Uri.UriSchemeHttps) return null;
var cacheKey = CacheKeyPrefix + pageUri.Host + pageUri.AbsolutePath;
if (_cache.TryGetValue<string?>(cacheKey, out var cached))
{
_logger.LogDebug("Favicon cache hit: {Url} → {Icon}", pageUrl, cached ?? "(null)");
return cached;
}
try
{
var iconUrl = await FetchIconUrlAsync(pageUri, ct);
if (string.IsNullOrEmpty(iconUrl)) { /* P51 临时:禁用负缓存以便重复请求能拿到新结果 CacheNull(cacheKey); */ return null; }
var saved = await DownloadAndSaveAsync(iconUrl, pageUri, ct);
if (saved == null) { /* P51 临时:禁用负缓存以便重复请求能拿到新结果 CacheNull(cacheKey); */ return null; }
_cache.Set(cacheKey, saved, CacheTtl);
_logger.LogInformation("Favicon fetched: {Page} → {Icon}", pageUrl, saved);
return saved;
}
catch (Exception ex)
{
// P51 修复:LogWarning → LogErrordocker logs 默认级别是 Information 看不到 warning 堆栈),
// 并附上 UploadOptions.Path 实际值,方便排查容器内 /uploads 权限 / 路径覆盖问题
_logger.LogError(ex,
"Favicon fetch failed: {Url} | UploadOptions.Path='{OptPath}' (env={Env})",
pageUrl, _uploadOptions.Path, Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT") ?? "(default)");
return null;
}
}
private void CacheNull(string key) => _cache.Set(key, (string?)null, TimeSpan.FromMinutes(10));
/// <summary>
/// 主流程:抓 HTML → 解析 link → 选最佳 icon URL。
/// </summary>
private async Task<string?> FetchIconUrlAsync(Uri pageUri, CancellationToken ct)
{
// 1. GET 页面(限 1MB
var html = await FetchHtmlAsync(pageUri, ct);
if (string.IsNullOrEmpty(html)) return null;
// 2. 解析 link tags
var links = ParseIconLinks(html, pageUri);
// 3. 按优先级选最佳
if (links.Count == 0)
{
// 兜底:直接尝试 /favicon.ico
return new Uri(pageUri, "/favicon.ico").ToString();
}
// 优先级:apple-touch-icon > icon(type=image/* sizes 最大) > shortcut icon > 其他
var best = links
.OrderByDescending(l => l.Priority)
.ThenByDescending(l => l.Score)
.FirstOrDefault();
return best?.Url;
}
/// <summary>抓取页面 HTML(限 1MB5s 超时)</summary>
private async Task<string?> FetchHtmlAsync(Uri pageUri, CancellationToken ct)
{
if (await IsPrivateOrLocalhostAsync(pageUri, ct)) return null;
using var _http = NewClient();
using var req = new HttpRequestMessage(HttpMethod.Get, pageUri);
req.Headers.Add("User-Agent", UserAgent);
req.Headers.Add("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
req.Headers.Add("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
using var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
// P33:详细日志 — 让主人能看清楚拿到的 HTML 是什么(含 location 跳转到哪)
_logger.LogInformation("Favicon fetch HTML: {Url} → {Status} {ContentType} ({Len} bytes)",
pageUri, (int)resp.StatusCode, resp.Content.Headers.ContentType?.MediaType ?? "?",
resp.Content.Headers.ContentLength ?? -1);
if (!resp.IsSuccessStatusCode)
{
_logger.LogDebug("Favicon fetch: {Url} returned {Status}, skip", pageUri, resp.StatusCode);
return null;
}
// 限制 content-length
var contentLength = resp.Content.Headers.ContentLength;
if (contentLength.HasValue && contentLength.Value > 1024 * 1024) return null;
await using var stream = await resp.Content.ReadAsStreamAsync(ct);
var buffer = new byte[1024 * 1024];
var total = 0;
int read;
while (total < buffer.Length && (read = await stream.ReadAsync(buffer.AsMemory(total, buffer.Length - total), ct)) > 0)
{
total += read;
}
// 尝试解析为 HTML(先看 charset
var charset = resp.Content.Headers.ContentType?.CharSet ?? "utf-8";
string html;
try
{
html = System.Text.Encoding.GetEncoding(charset).GetString(buffer, 0, total);
}
catch
{
html = System.Text.Encoding.UTF8.GetString(buffer, 0, total);
}
// P33HTML 长度 + 是否含 favicon 关键字(方便定位"是否真的没找到")
var hasIconTag = html.Contains("rel=\"icon\"", StringComparison.OrdinalIgnoreCase)
|| html.Contains("rel='icon'", StringComparison.OrdinalIgnoreCase)
|| html.Contains("rel=\"alternate icon\"", StringComparison.OrdinalIgnoreCase);
_logger.LogDebug("Favicon HTML scan: {Url} len={Len} contains-icon-link={Has}",
pageUri, total, hasIconTag);
if (!hasIconTag)
{
// 截取 HTML 前 200 字符方便主人看是被什么页面拦了(如 FN Connect 反向代理页)
_logger.LogWarning("Favicon HTML has no <link rel=icon>: {Url} → first 200 chars: {Snippet}",
pageUri, html.Length > 0 ? html.Substring(0, Math.Min(200, html.Length)) : "(empty)");
}
return html;
}
/// <summary>
/// 解析 HTML 中的 favicon 链接。
/// P33 改进:
/// - 正则支持 rel / href 任意顺序(之前要求 rel 在前,对 href 在前的写法失败)
/// - priority 映射支持 `alternate icon` / `fluid-icon` 等包含 icon 关键字的 rel
/// - 同时解析 &lt;meta property="og:image"&gt; 作为兜底
/// - 加详细日志,方便定位"为什么没抓到"
/// </summary>
private List<IconLink> ParseIconLinks(string html, Uri baseUri)
{
var results = new List<IconLink>();
// ===== 第一步:解析 <link rel="..." href="..." [sizes] [type]> =====
// 用 .*? 懒匹配 rel/href 任意顺序;属性值允许 "..."/'...' 两种引号
var linkPattern = new Regex(
@"<link\b([^>]*?)/?>", // 整个 <link ... > 块(包括自闭合 />
RegexOptions.IgnoreCase | RegexOptions.Compiled);
// P33 关键修复:属性名匹配前用 (?<![-\w]) 负向后行断言,
// 避免 `data-base-href` / `data-href` 等自定义 data-* 属性被误识别为 `href`。
// (之前 GitHub 真实 link 有 data-base-href,截断到下一引号,导致 favicon.svg 变成 favicon 然后 404
var attrPattern = new Regex(
@"(?<![-\w])(rel|href|size|sizes|type|as)\s*=\s*[""']([^""']*)[""']",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
foreach (Match linkMatch in linkPattern.Matches(html))
{
var block = linkMatch.Groups[1].Value;
string? rel = null, href = null, sizes = null, type = null;
foreach (Match a in attrPattern.Matches(block))
{
var name = a.Groups[1].Value.ToLowerInvariant();
var val = a.Groups[2].Value.Trim();
switch (name)
{
case "rel": rel = val; break;
case "href": href = val; break;
case "sizes": sizes = val; break;
case "type": type = val; break;
}
}
if (string.IsNullOrEmpty(rel) || string.IsNullOrEmpty(href)) continue;
if (href.StartsWith("data:", StringComparison.OrdinalIgnoreCase)) continue;
var relLower = rel.ToLowerInvariant();
if (!relLower.Contains("icon")) continue;
if (relLower == "mask-icon") continue; // safari pinned tab mask, 不是图片
// mask-icon 之外只要含 icon 都算(含 "apple-touch-icon" / "shortcut icon" / "alternate icon" / "fluid-icon"
// 过滤掉非图片类型(极少出现但保险)
if (!string.IsNullOrEmpty(type) && !type.StartsWith("image/", StringComparison.OrdinalIgnoreCase) && !type.Contains("icon"))
continue;
// 解析 sizes
int maxSize = 0;
if (!string.IsNullOrEmpty(sizes))
{
if (sizes.Trim().Equals("any", StringComparison.OrdinalIgnoreCase))
{
maxSize = 512; // any 通常是 svg/高分辨率
}
else
{
foreach (var s in sizes.Split(' ', StringSplitOptions.RemoveEmptyEntries))
{
var parts = s.Split('x', 2);
if (parts.Length == 2 && int.TryParse(parts[0], out var w) && int.TryParse(parts[1], out var h))
{
var sz = Math.Max(w, h);
if (sz > maxSize) maxSize = sz;
}
}
}
}
// 解析绝对 URL
if (!Uri.TryCreate(baseUri, href, out var absoluteUri)) continue;
if (absoluteUri.Scheme != Uri.UriSchemeHttp && absoluteUri.Scheme != Uri.UriSchemeHttps) continue;
// P33 改进:根据 rel 包含的关键字判定 priority
int priority;
int score;
if (relLower.Contains("apple-touch"))
{
priority = 300;
score = maxSize > 0 ? maxSize : 180;
}
else if (relLower == "shortcut icon")
{
priority = 100;
score = maxSize;
}
else if (relLower == "icon")
{
priority = 200;
score = maxSize;
}
else if (relLower.Contains("icon"))
{
// 兜底:alternate icon / fluid-icon / icon-zzz 等
priority = 150;
score = maxSize;
}
else
{
priority = 50;
score = maxSize;
}
_logger.LogDebug("Favicon link candidate: rel={Rel} href={Href} sizes={Sizes} → priority={P} score={S}",
relLower, absoluteUri, sizes ?? "-", priority, score);
results.Add(new IconLink
{
Url = absoluteUri.ToString(),
Priority = priority,
Score = score
});
}
// ===== 第二步:兜底 <meta property="og:image" content="..."> =====
// 很多现代站点(特别是博客/文档站)有 og:image,作为 icon 兜底
var ogPattern = new Regex(
@"<meta\b[^>]*?\bproperty\s*=\s*[""']og:image[""'][^>]*?\bcontent\s*=\s*[""']([^""']+)[""']",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
// 也匹配 content 在前的写法
var ogPatternAlt = new Regex(
@"<meta\b[^>]*?\bcontent\s*=\s*[""']([^""']+)[""'][^>]*?\bproperty\s*=\s*[""']og:image[""']",
RegexOptions.IgnoreCase | RegexOptions.Compiled);
string? ogImage = null;
var ogMatch = ogPattern.Match(html);
if (ogMatch.Success) ogImage = ogMatch.Groups[1].Value;
else
{
var ogMatchAlt = ogPatternAlt.Match(html);
if (ogMatchAlt.Success) ogImage = ogMatchAlt.Groups[1].Value;
}
if (!string.IsNullOrEmpty(ogImage) && Uri.TryCreate(baseUri, ogImage, out var ogUri)
&& (ogUri.Scheme == Uri.UriSchemeHttp || ogUri.Scheme == Uri.UriSchemeHttps))
{
_logger.LogDebug("Favicon og:image fallback: {Url}", ogUri);
results.Add(new IconLink
{
Url = ogUri.ToString(),
Priority = 30, // 比 link 兜底还低,避免抢了真正的 favicon
Score = 0
});
}
return results;
}
/// <summary>下载 icon 图片并保存到 upload 目录</summary>
private async Task<string?> DownloadAndSaveAsync(string iconUrl, Uri pageUri, CancellationToken ct)
{
if (!Uri.TryCreate(iconUrl, UriKind.Absolute, out var iconUri)) return null;
if (iconUri.Scheme != Uri.UriSchemeHttp && iconUri.Scheme != Uri.UriSchemeHttps) return null;
if (await IsPrivateOrLocalhostAsync(iconUri, ct)) return null;
using var _http = NewClient();
using var req = new HttpRequestMessage(HttpMethod.Get, iconUri);
req.Headers.Add("User-Agent", UserAgent);
req.Headers.Add("Referer", pageUri.Scheme + "://" + pageUri.Host);
using var resp = await _http.SendAsync(req, HttpCompletionOption.ResponseHeadersRead, ct);
if (!resp.IsSuccessStatusCode) return null;
// content-type 校验
var contentType = resp.Content.Headers.ContentType?.MediaType ?? "";
if (!contentType.StartsWith("image/", StringComparison.OrdinalIgnoreCase) &&
!contentType.Equals("application/octet-stream", StringComparison.OrdinalIgnoreCase))
{
return null;
}
// 限制 content-length
var contentLength = resp.Content.Headers.ContentLength;
if (contentLength.HasValue && contentLength.Value > MaxIconBytes) return null;
await using var stream = await resp.Content.ReadAsStreamAsync(ct);
// 用 MemoryStream 缓冲以同时拿到 content-type
using var ms = new MemoryStream();
var buffer = new byte[81920];
long total = 0;
int read;
while (total < MaxIconBytes && (read = await stream.ReadAsync(buffer, 0, (int)Math.Min(buffer.Length, MaxIconBytes - total))) > 0)
{
ms.Write(buffer, 0, read);
total += read;
}
if (total == 0 || total >= MaxIconBytes) return null;
ms.Position = 0;
// 文件名:从 iconUrl 推断,最后一段
var fileName = Path.GetFileName(iconUri.AbsolutePath);
if (string.IsNullOrEmpty(fileName) || fileName == "/") fileName = "favicon";
var result = await _upload.SaveStreamAsync(ms, fileName, contentType, subDir: "favicons");
return result.Url;
}
/// <summary>SSRF 防护:解析域名 IP,拒绝内网/本地/链路本地</summary>
private async Task<bool> IsPrivateOrLocalhostAsync(Uri uri, CancellationToken ct)
{
try
{
// localhost 字面
if (uri.HostNameType == UriHostNameType.Basic)
{
if (uri.Host.Equals("localhost", StringComparison.OrdinalIgnoreCase)) return true;
}
// 解析为 IP
IPAddress[] addresses;
try
{
addresses = await Dns.GetHostAddressesAsync(uri.Host, ct);
}
catch
{
return true; // 解析失败视为不安全
}
foreach (var ip in addresses)
{
if (IsPrivateOrLocalIp(ip)) return true;
}
return false;
}
catch
{
return true;
}
}
private static bool IsPrivateOrLocalIp(IPAddress ip)
{
if (IPAddress.IsLoopback(ip)) return true;
if (ip.AddressFamily == AddressFamily.InterNetwork)
{
var bytes = ip.GetAddressBytes();
// 10.0.0.0/8
if (bytes[0] == 10) return true;
// 172.16.0.0/12
if (bytes[0] == 172 && bytes[1] >= 16 && bytes[1] <= 31) return true;
// 192.168.0.0/16
if (bytes[0] == 192 && bytes[1] == 168) return true;
// 169.254.0.0/16 (link-local)
if (bytes[0] == 169 && bytes[1] == 254) return true;
// 0.0.0.0
if (bytes[0] == 0 && bytes[1] == 0 && bytes[2] == 0 && bytes[3] == 0) return true;
}
return false;
}
private class IconLink
{
public string Url { get; set; } = string.Empty;
public int Priority { get; set; }
public int Score { get; set; }
}
}