mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 11:28:24 +00:00 
			
		
		
		
	Detect encoding changes while parsing diff (#16330)
* Detect encoding changes while parsing diff
This commit is contained in:
		| @@ -32,6 +32,7 @@ import ( | ||||
|  | ||||
| 	"github.com/sergi/go-diff/diffmatchpatch" | ||||
| 	stdcharset "golang.org/x/net/html/charset" | ||||
| 	"golang.org/x/text/encoding" | ||||
| 	"golang.org/x/text/transform" | ||||
| ) | ||||
|  | ||||
| @@ -883,35 +884,46 @@ parsingLoop: | ||||
|  | ||||
| 	} | ||||
|  | ||||
| 	// FIXME: There are numerous issues with this: | ||||
| 	// TODO: There are numerous issues with this: | ||||
| 	// - we might want to consider detecting encoding while parsing but... | ||||
| 	// - we're likely to fail to get the correct encoding here anyway as we won't have enough information | ||||
| 	// - and this doesn't really account for changes in encoding | ||||
| 	var buf bytes.Buffer | ||||
| 	var diffLineTypeBuffers = make(map[DiffLineType]*bytes.Buffer, 3) | ||||
| 	var diffLineTypeDecoders = make(map[DiffLineType]*encoding.Decoder, 3) | ||||
| 	diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer) | ||||
| 	diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer) | ||||
| 	diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer) | ||||
| 	for _, f := range diff.Files { | ||||
| 		buf.Reset() | ||||
| 		for _, buffer := range diffLineTypeBuffers { | ||||
| 			buffer.Reset() | ||||
| 		} | ||||
| 		for _, sec := range f.Sections { | ||||
| 			for _, l := range sec.Lines { | ||||
| 				if l.Type == DiffLineSection { | ||||
| 					continue | ||||
| 				} | ||||
| 				buf.WriteString(l.Content[1:]) | ||||
| 				buf.WriteString("\n") | ||||
| 				diffLineTypeBuffers[l.Type].WriteString(l.Content[1:]) | ||||
| 				diffLineTypeBuffers[l.Type].WriteString("\n") | ||||
| 			} | ||||
| 		} | ||||
| 		charsetLabel, err := charset.DetectEncoding(buf.Bytes()) | ||||
| 		if charsetLabel != "UTF-8" && err == nil { | ||||
| 			encoding, _ := stdcharset.Lookup(charsetLabel) | ||||
| 			if encoding != nil { | ||||
| 				d := encoding.NewDecoder() | ||||
| 				for _, sec := range f.Sections { | ||||
| 					for _, l := range sec.Lines { | ||||
| 						if l.Type == DiffLineSection { | ||||
| 							continue | ||||
| 						} | ||||
| 						if c, _, err := transform.String(d, l.Content[1:]); err == nil { | ||||
| 							l.Content = l.Content[0:1] + c | ||||
| 						} | ||||
| 		for lineType, buffer := range diffLineTypeBuffers { | ||||
| 			diffLineTypeDecoders[lineType] = nil | ||||
| 			if buffer.Len() == 0 { | ||||
| 				continue | ||||
| 			} | ||||
| 			charsetLabel, err := charset.DetectEncoding(buffer.Bytes()) | ||||
| 			if charsetLabel != "UTF-8" && err == nil { | ||||
| 				encoding, _ := stdcharset.Lookup(charsetLabel) | ||||
| 				if encoding != nil { | ||||
| 					diffLineTypeDecoders[lineType] = encoding.NewDecoder() | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 		for _, sec := range f.Sections { | ||||
| 			for _, l := range sec.Lines { | ||||
| 				decoder := diffLineTypeDecoders[l.Type] | ||||
| 				if decoder != nil { | ||||
| 					if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { | ||||
| 						l.Content = l.Content[0:1] + c | ||||
| 					} | ||||
| 				} | ||||
| 			} | ||||
|   | ||||
		Reference in New Issue
	
	Block a user