优化sh

2025-06-12 18:36:48 +08:00
parent a8498ad24a
commit e0dbf304e2
2 changed files with 98 additions and 425 deletions
--- a/src/main.go
+++ b/src/main.go
@@ -209,61 +209,75 @@ func proxyWithRedirect(c *gin.Context, u string, redirectCount int) {
 		realHost = "https://" + realHost
 	}

-	// 检查是否为gzip压缩内容
-	isGzipCompressed := resp.Header.Get("Content-Encoding") == "gzip"
-	
-	// 使用智能处理器自动处理所有内容
-	processedBody, processedSize, err := ProcessSmart(resp.Body, isGzipCompressed, realHost)
-	if err != nil {
-		// 优雅降级 - 处理失败时使用直接代理模式
-		fmt.Printf("智能处理失败，回退到直接代理: %v\n", err)
+	// 🚀 高性能预筛选：仅对.sh文件进行智能处理
+	if strings.HasSuffix(strings.ToLower(u), ".sh") {
+		// 检查是否为gzip压缩内容
+		isGzipCompressed := resp.Header.Get("Content-Encoding") == "gzip"
 		
-		// 复制原始响应头
+		// 仅对shell脚本使用智能处理器
+		processedBody, processedSize, err := ProcessSmart(resp.Body, isGzipCompressed, realHost)
+		if err != nil {
+			// 优雅降级 - 处理失败时使用直接代理模式
+			fmt.Printf("智能处理失败，回退到直接代理: %v\n", err)
+			processedBody = resp.Body
+			processedSize = 0
+		}
+
+		// 智能设置响应头
+		if processedSize > 0 {
+			// 内容被处理过，清理压缩相关头，使用chunked传输
+			resp.Header.Del("Content-Length")
+			resp.Header.Del("Content-Encoding")
+			resp.Header.Set("Transfer-Encoding", "chunked")
+		}
+
+		// 复制其他响应头
 		for key, values := range resp.Header {
 			for _, value := range values {
 				c.Header(key, value)
 			}
 		}
-		
+
+		if location := resp.Header.Get("Location"); location != "" {
+			if checkURL(location) != nil {
+				c.Header("Location", "/"+location)
+			} else {
+				proxyWithRedirect(c, location, redirectCount+1)
+				return
+			}
+		}
+
 		c.Status(resp.StatusCode)
-		
-		// 直接转发原始内容
-		if _, err := io.Copy(c.Writer, resp.Body); err != nil {
-			fmt.Printf("直接代理模式复制内容失败: %v\n", err)
-		}
-		return
-	}

-	// 智能设置响应头
-	if processedSize > 0 {
-		// 内容被处理过，清理压缩相关头，使用chunked传输
-		resp.Header.Del("Content-Length")
-		resp.Header.Del("Content-Encoding") // 重要：清理压缩头，防止浏览器重复解压
-		resp.Header.Set("Transfer-Encoding", "chunked")
-	}
-
-	// 复制其他响应头
-	for key, values := range resp.Header {
-		for _, value := range values {
-			c.Header(key, value)
-		}
-	}
-
-	if location := resp.Header.Get("Location"); location != "" {
-		if checkURL(location) != nil {
-			c.Header("Location", "/"+location)
-		} else {
-			// 递归处理重定向，增加计数防止无限循环
-			proxyWithRedirect(c, location, redirectCount+1)
+		// 输出处理后的内容
+		if _, err := io.Copy(c.Writer, processedBody); err != nil {
 			return
 		}
-	}
+	} else {
+		// 🔥 非.sh文件：直接高性能流式代理，零内存消耗
+		// 复制所有响应头
+		for key, values := range resp.Header {
+			for _, value := range values {
+				c.Header(key, value)
+			}
+		}

-	c.Status(resp.StatusCode)
+		// 处理重定向
+		if location := resp.Header.Get("Location"); location != "" {
+			if checkURL(location) != nil {
+				c.Header("Location", "/"+location)
+			} else {
+				proxyWithRedirect(c, location, redirectCount+1)
+				return
+			}
+		}

-	// 输出处理后的内容
-	if _, err := io.Copy(c.Writer, processedBody); err != nil {
-		return
+		c.Status(resp.StatusCode)
+
+		// 直接流式转发，零内存拷贝
+		if _, err := io.Copy(c.Writer, resp.Body); err != nil {
+			fmt.Printf("直接代理失败: %v\n", err)
+		}
 	}
 }

--- a/src/proxysh.go
+++ b/src/proxysh.go
@@ -3,229 +3,52 @@ package main
 import (
 	"bytes"
 	"compress/gzip"
-	"crypto/md5"
 	"fmt"
 	"io"
 	"regexp"
 	"strings"
-	"sync"
-	"sync/atomic"
-	"time"
 )

-const (
-	// 性能优化常量
-	SMART_BUFFER_SIZE = 32 * 1024      // 32KB 缓冲区
-	CACHE_DURATION   = 20 * time.Minute // 20分钟缓存
-	MAX_CACHE_ITEMS  = 500              // 最大缓存条目
-	
-	// 处理限制
-	MAX_CONTENT_SIZE = 30 * 1024 * 1024 // 30MB文件大小限制
-	PROCESS_TIMEOUT  = 5 * time.Second   // 5秒处理超时
-)
+// GitHub URL正则表达式
+var githubRegex = regexp.MustCompile(`https?://(?:github\.com|raw\.githubusercontent\.com|raw\.github\.com|gist\.githubusercontent\.com|gist\.github\.com|api\.github\.com)[^\s'"]+`)

-// SmartProcessor 智能处理器 - 核心引擎
-type SmartProcessor struct {
-	// 编译优化
-	githubRegex   *regexp.Regexp      // GitHub URL正则
-	shellPatterns []string            // Shell特征模式
-	
-	// 高性能缓存
-	cache         sync.Map            // 线程安全缓存
-	bufferPool    sync.Pool           // 缓冲池
-	
-	// 原子统计
-	totalRequests int64               // 总请求数
-	cacheHits     int64               // 缓存命中
-	processed     int64               // 实际处理
-	
-	// 控制开关
-	enabled       int32               // 启用状态
-}
-
-// CacheItem 缓存项 - 内存对齐
-type CacheItem struct {
-	content   string                 // 处理后内容
-	timestamp int64                  // 创建时间
-	hits      int32                  // 访问次数
-}
-
-// 全局单例
-var (
-	smartProcessor *SmartProcessor
-	smartOnce      sync.Once
-	cleanupOnce    sync.Once
-)
-
-// GetSmartProcessor 获取智能处理器实例
-func GetSmartProcessor() *SmartProcessor {
-	smartOnce.Do(func() {
-		smartProcessor = &SmartProcessor{
-			githubRegex: regexp.MustCompile(`https?://(?:github\.com|raw\.githubusercontent\.com|raw\.github\.com|gist\.githubusercontent\.com|gist\.github\.com|api\.github\.com)[^\s'"]+`),
-			shellPatterns: []string{
-				"#!/bin/bash", "#!/bin/sh", "#!/usr/bin/env bash",
-				"curl ", "wget ", "git clone", "docker ", "export ",
-			},
-			enabled: 1, // 默认启用
-		}
-		
-		// 初始化缓冲池
-		smartProcessor.bufferPool.New = func() interface{} {
-			return make([]byte, SMART_BUFFER_SIZE)
-		}
-		
-		// 启动后台清理
-		cleanupOnce.Do(func() {
-			go smartProcessor.backgroundCleanup()
-		})
-	})
-	return smartProcessor
-}
-
-// ProcessSmart 智能处理函数
+// ProcessSmart Shell脚本智能处理函数
 func ProcessSmart(input io.ReadCloser, isCompressed bool, host string) (io.Reader, int64, error) {
-	processor := GetSmartProcessor()
+	defer input.Close()
 	
-	// 快速检查是否启用
-	if atomic.LoadInt32(&processor.enabled) == 0 {
-		return input, 0, nil
-	}
-	
-	atomic.AddInt64(&processor.totalRequests, 1)
-	
-	// 快速读取内容
-	content, err := processor.readContent(input, isCompressed)
+	// 读取Shell脚本内容
+	content, err := readShellContent(input, isCompressed)
 	if err != nil {
-		// 优雅降级：读取错误时返回错误，让上层处理
 		return nil, 0, fmt.Errorf("内容读取失败: %v", err)
 	}
+	
 	if len(content) == 0 {
-		// 空内容，返回空读取器
 		return strings.NewReader(""), 0, nil
 	}
 	
-	contentSize := int64(len(content))
-	
-	// 大文件检查 - 自动支持大文件
-	if contentSize > MAX_CONTENT_SIZE {
-		return strings.NewReader(content), contentSize, nil // 直接返回，不处理
+	// Shell脚本大小检查 (10MB限制)
+	if len(content) > 10*1024*1024 {
+		return strings.NewReader(content), int64(len(content)), nil
 	}
 	
-	// 智能内容分析
-	if !processor.needsProcessing(content) {
-		return strings.NewReader(content), contentSize, nil
+	// 快速检查是否包含GitHub URL
+	if !strings.Contains(content, "github.com") && !strings.Contains(content, "githubusercontent.com") {
+		return strings.NewReader(content), int64(len(content)), nil
 	}
 	
-	// 缓存检查
-	cacheKey := processor.getCacheKey(content, host)
-	if cached := processor.getFromCache(cacheKey); cached != nil {
-		atomic.AddInt64(&processor.cacheHits, 1)
-		return strings.NewReader(cached.content), int64(len(cached.content)), nil
-	}
-	
-	// 智能处理
-	processed := processor.processContent(content, host)
-	atomic.AddInt64(&processor.processed, 1)
-	
-	// 存入缓存
-	processor.saveToCache(cacheKey, processed)
+	// 执行GitHub URL替换
+	processed := processGitHubURLs(content, host)
 	
 	return strings.NewReader(processed), int64(len(processed)), nil
 }

-
-// needsProcessing 智能判断是否需要处理
-func (sp *SmartProcessor) needsProcessing(content string) bool {
-	// 快速检查：是否包含GitHub URL
-	if !strings.Contains(content, "github.com") && 
-	   !strings.Contains(content, "githubusercontent.com") {
-		return false
-	}
-	
-	// 智能检查：是否为脚本类内容
-	return sp.isScriptContent(content)
-}
-
-// isScriptContent 智能脚本内容识别
-func (sp *SmartProcessor) isScriptContent(content string) bool {
-	// 空内容检查
-	if len(content) < 10 {
-		return false
-	}
-	
-	// 二进制文件快速排除
-	if sp.isBinaryContent(content) {
-		return false
-	}
-	
-	// 检查前500字符的特征
-	preview := content
-	if len(content) > 500 {
-		preview = content[:500]
-	}
-	
-	// Shell特征检查
-	for _, pattern := range sp.shellPatterns {
-		if strings.Contains(preview, pattern) {
-			return true
-		}
-	}
-	
-	// Dockerfile特征
-	if strings.Contains(preview, "FROM ") || strings.Contains(preview, "RUN ") {
-		return true
-	}
-	
-
-	
-	// 其他脚本特征
-	scriptIndicators := []string{
-		"#!/", "set -e", "function ", "echo ", "mkdir ", "chmod ",
-		"apt-get", "yum ", "brew ", "sudo ", "source ", ". /",
-	}
-	
-	for _, indicator := range scriptIndicators {
-		if strings.Contains(preview, indicator) {
-			return true
-		}
-	}
-	
-	return false
-}
-
-// isBinaryContent 二进制内容检测
-func (sp *SmartProcessor) isBinaryContent(content string) bool {
-	if len(content) < 100 {
-		return false
-	}
-	
-	// 检查前100字节中的空字节
-	for i := 0; i < 100 && i < len(content); i++ {
-		if content[i] == 0 {
-			return true
-		}
-	}
-	
-	return false
-}
-
-// readContent 高效内容读取
-func (sp *SmartProcessor) readContent(input io.ReadCloser, isCompressed bool) (string, error) {
-	defer input.Close()
-	
-	// 使用缓冲池
-	buffer := sp.bufferPool.Get().([]byte)
-	defer sp.bufferPool.Put(buffer)
-	
-	var result strings.Builder
-	result.Grow(SMART_BUFFER_SIZE) // 预分配
-	
+// readShellContent 读取Shell脚本内容
+func readShellContent(input io.ReadCloser, isCompressed bool) (string, error) {
 	var reader io.Reader = input
-	var gzReader *gzip.Reader
 	
-	// 智能压缩处理 - 防止双重解压
+	// 处理gzip压缩
 	if isCompressed {
-		// 先读取一小部分数据来检测是否真的是gzip格式
+		// 读取前2字节检查gzip魔数
 		peek := make([]byte, 2)
 		n, err := input.Read(peek)
 		if err != nil && err != io.EOF {
@@ -234,218 +57,54 @@ func (sp *SmartProcessor) readContent(input io.ReadCloser, isCompressed bool) (s
 		
 		// 检查gzip魔数 (0x1f, 0x8b)
 		if n >= 2 && peek[0] == 0x1f && peek[1] == 0x8b {
-			// 确认是gzip格式，创建MultiReader组合peek数据和剩余数据
+			// 合并peek数据和剩余流
 			combinedReader := io.MultiReader(bytes.NewReader(peek[:n]), input)
-			gzReader, err = gzip.NewReader(combinedReader)
+			gzReader, err := gzip.NewReader(combinedReader)
 			if err != nil {
 				return "", fmt.Errorf("gzip解压失败: %v", err)
 			}
 			defer gzReader.Close()
 			reader = gzReader
 		} else {
-			// 不是gzip格式或者HTTP客户端已经解压，使用原始数据
-			combinedReader := io.MultiReader(bytes.NewReader(peek[:n]), input)
-			reader = combinedReader
+			// 不是gzip格式，合并peek数据
+			reader = io.MultiReader(bytes.NewReader(peek[:n]), input)
 		}
 	}
 	
-	// 读取内容
-	for {
-		n, err := reader.Read(buffer)
-		if n > 0 {
-			result.Write(buffer[:n])
-		}
-		if err == io.EOF {
-			break
-		}
-		if err != nil {
-			return "", fmt.Errorf("读取内容失败: %v", err)
-		}
-		
-		// 大文件保护
-		if result.Len() > MAX_CONTENT_SIZE {
-			break
-		}
+	// 读取全部内容
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		return "", fmt.Errorf("读取内容失败: %v", err)
 	}
 	
-	return result.String(), nil
+	return string(data), nil
 }

-// processContent 智能内容处理
-func (sp *SmartProcessor) processContent(content, host string) string {
-	// 预分配结果缓冲区
-	var result bytes.Buffer
-	result.Grow(len(content) + len(content)/5) // 预留20%空间
-	
-	// 高效替换
-	matches := sp.githubRegex.FindAllStringIndex(content, -1)
-	if len(matches) == 0 {
-		return content // 无需处理
-	}
-	
-	lastIndex := 0
-	for _, match := range matches {
-		// 写入URL前的内容
-		result.WriteString(content[lastIndex:match[0]])
-		
-		// 处理URL
-		originalURL := content[match[0]:match[1]]
-		newURL := sp.transformURL(originalURL, host)
-		result.WriteString(newURL)
-		
-		lastIndex = match[1]
-	}
-	
-	// 写入剩余内容
-	result.WriteString(content[lastIndex:])
-	
-	return result.String()
+// processGitHubURLs 处理GitHub URL替换
+func processGitHubURLs(content, host string) string {
+	return githubRegex.ReplaceAllStringFunc(content, func(url string) string {
+		return transformURL(url, host)
+	})
 }

-// transformURL 智能URL转换
-func (sp *SmartProcessor) transformURL(url, host string) string {
+// transformURL URL转换函数
+func transformURL(url, host string) string {
 	// 避免重复处理
 	if strings.Contains(url, host) {
 		return url
 	}
 	
-	// 协议标准化
-	if !strings.HasPrefix(url, "https://") {
-		if strings.HasPrefix(url, "http://") {
-			url = "https://" + url[7:]
-		} else if !strings.HasPrefix(url, "//") {
-			url = "https://" + url
-		}
+	// 协议标准化为https
+	if strings.HasPrefix(url, "http://") {
+		url = "https" + url[4:]
+	} else if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "//") {
+		url = "https://" + url
 	}
 	
-	// Host清理
+	// 清理host格式
 	cleanHost := strings.TrimPrefix(host, "https://")
 	cleanHost = strings.TrimPrefix(cleanHost, "http://")
 	cleanHost = strings.TrimSuffix(cleanHost, "/")
 	
-	// 构建新URL
 	return cleanHost + "/" + url
-}
-
-// getCacheKey 生成缓存键
-func (sp *SmartProcessor) getCacheKey(content, host string) string {
-	// 使用MD5快速哈希 - 安全的字符串转换
-	hasher := md5.New()
-	hasher.Write([]byte(content))
-	hasher.Write([]byte(host))
-	return fmt.Sprintf("%x", hasher.Sum(nil))[:16]
-}
-
-// getFromCache 从缓存获取
-func (sp *SmartProcessor) getFromCache(key string) *CacheItem {
-	if value, ok := sp.cache.Load(key); ok {
-		item := value.(*CacheItem)
-		
-		// TTL检查
-		if time.Now().UnixNano()-item.timestamp > int64(CACHE_DURATION) {
-			sp.cache.Delete(key)
-			return nil
-		}
-		
-		atomic.AddInt32(&item.hits, 1)
-		return item
-	}
-	return nil
-}
-
-// saveToCache 保存到缓存
-func (sp *SmartProcessor) saveToCache(key, content string) {
-	item := &CacheItem{
-		content:   content,
-		timestamp: time.Now().UnixNano(),
-		hits:      0,
-	}
-	sp.cache.Store(key, item)
-}
-
-
-func (sp *SmartProcessor) backgroundCleanup() {
-	ticker := time.NewTicker(10 * time.Minute)
-	defer ticker.Stop()
-	
-	for range ticker.C {
-		sp.cleanExpiredCache()
-		sp.logStats()
-	}
-}
-
-// cleanExpiredCache 清理过期缓存
-func (sp *SmartProcessor) cleanExpiredCache() {
-	now := time.Now().UnixNano()
-	count := 0
-	
-	sp.cache.Range(func(key, value interface{}) bool {
-		count++
-		item := value.(*CacheItem)
-		
-		// 清理过期项
-		if now-item.timestamp > int64(CACHE_DURATION) {
-			sp.cache.Delete(key)
-			return true
-		}
-		
-		// 限制缓存大小（LRU）
-		if count > MAX_CACHE_ITEMS && item.hits < 2 {
-			sp.cache.Delete(key)
-		}
-		
-		return true
-	})
-}
-
-// 调试日志开关
-var smartDebugLog int32 = 1 // 1=开启, 0=关闭
-
-// logStats 统计日志
-func (sp *SmartProcessor) logStats() {
-	if atomic.LoadInt32(&smartDebugLog) == 0 {
-		return
-	}
-	
-	total := atomic.LoadInt64(&sp.totalRequests)
-	hits := atomic.LoadInt64(&sp.cacheHits)
-	processed := atomic.LoadInt64(&sp.processed)
-	
-	hitRate := float64(0)
-	if total > 0 {
-		hitRate = float64(hits) / float64(total) * 100
-	}
-	
-	fmt.Printf("智能处理器统计: 总请求=%d, 缓存命中=%d(%.1f%%), 实际处理=%d\n",
-		total, hits, hitRate, processed)
-}
-
-// Enable 启用处理器
-func (sp *SmartProcessor) Enable() {
-	atomic.StoreInt32(&sp.enabled, 1)
-}
-
-// Disable 禁用处理器  
-func (sp *SmartProcessor) Disable() {
-	atomic.StoreInt32(&sp.enabled, 0)
-}
-
-// IsEnabled 检查状态
-func (sp *SmartProcessor) IsEnabled() bool {
-	return atomic.LoadInt32(&sp.enabled) == 1
-}
-
-// ClearCache 清空缓存
-func (sp *SmartProcessor) ClearCache() {
-	sp.cache.Range(func(key, value interface{}) bool {
-		sp.cache.Delete(key)
-		return true
-	})
-}
-
-// GetStats 日志
-func (sp *SmartProcessor) GetStats() (total, hits, processed int64) {
-	return atomic.LoadInt64(&sp.totalRequests),
-		   atomic.LoadInt64(&sp.cacheHits),
-		   atomic.LoadInt64(&sp.processed)
 }