mirror of
				https://github.com/go-gitea/gitea
				synced 2025-10-31 11:28:24 +00:00 
			
		
		
		
	* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
		
			
				
	
	
		
			183 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
			
		
		
	
	
			183 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			Go
		
	
	
	
		
			Vendored
		
	
	
	
| package brotli
 | |
| 
 | |
| func utf8Position(last uint, c uint, clamp uint) uint {
 | |
| 	if c < 128 {
 | |
| 		return 0 /* Next one is the 'Byte 1' again. */
 | |
| 	} else if c >= 192 { /* Next one is the 'Byte 2' of utf-8 encoding. */
 | |
| 		return brotli_min_size_t(1, clamp)
 | |
| 	} else {
 | |
| 		/* Let's decide over the last byte if this ends the sequence. */
 | |
| 		if last < 0xE0 {
 | |
| 			return 0 /* Completed two or three byte coding. */ /* Next one is the 'Byte 3' of utf-8 encoding. */
 | |
| 		} else {
 | |
| 			return brotli_min_size_t(2, clamp)
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func decideMultiByteStatsLevel(pos uint, len uint, mask uint, data []byte) uint {
 | |
| 	var counts = [3]uint{0} /* should be 2, but 1 compresses better. */
 | |
| 	var max_utf8 uint = 1
 | |
| 	var last_c uint = 0
 | |
| 	var i uint
 | |
| 	for i = 0; i < len; i++ {
 | |
| 		var c uint = uint(data[(pos+i)&mask])
 | |
| 		counts[utf8Position(last_c, c, 2)]++
 | |
| 		last_c = c
 | |
| 	}
 | |
| 
 | |
| 	if counts[2] < 500 {
 | |
| 		max_utf8 = 1
 | |
| 	}
 | |
| 
 | |
| 	if counts[1]+counts[2] < 25 {
 | |
| 		max_utf8 = 0
 | |
| 	}
 | |
| 
 | |
| 	return max_utf8
 | |
| }
 | |
| 
 | |
| func estimateBitCostsForLiteralsUTF8(pos uint, len uint, mask uint, data []byte, cost []float32) {
 | |
| 	var max_utf8 uint = decideMultiByteStatsLevel(pos, uint(len), mask, data)
 | |
| 	/* Bootstrap histograms. */
 | |
| 	var histogram = [3][256]uint{[256]uint{0}}
 | |
| 	var window_half uint = 495
 | |
| 	var in_window uint = brotli_min_size_t(window_half, uint(len))
 | |
| 	var in_window_utf8 = [3]uint{0}
 | |
| 	/* max_utf8 is 0 (normal ASCII single byte modeling),
 | |
| 	   1 (for 2-byte UTF-8 modeling), or 2 (for 3-byte UTF-8 modeling). */
 | |
| 
 | |
| 	var i uint
 | |
| 	{
 | |
| 		var last_c uint = 0
 | |
| 		var utf8_pos uint = 0
 | |
| 		for i = 0; i < in_window; i++ {
 | |
| 			var c uint = uint(data[(pos+i)&mask])
 | |
| 			histogram[utf8_pos][c]++
 | |
| 			in_window_utf8[utf8_pos]++
 | |
| 			utf8_pos = utf8Position(last_c, c, max_utf8)
 | |
| 			last_c = c
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Compute bit costs with sliding window. */
 | |
| 	for i = 0; i < len; i++ {
 | |
| 		if i >= window_half {
 | |
| 			var c uint
 | |
| 			var last_c uint
 | |
| 			if i < window_half+1 {
 | |
| 				c = 0
 | |
| 			} else {
 | |
| 				c = uint(data[(pos+i-window_half-1)&mask])
 | |
| 			}
 | |
| 			if i < window_half+2 {
 | |
| 				last_c = 0
 | |
| 			} else {
 | |
| 				last_c = uint(data[(pos+i-window_half-2)&mask])
 | |
| 			}
 | |
| 			/* Remove a byte in the past. */
 | |
| 
 | |
| 			var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
 | |
| 			histogram[utf8_pos2][data[(pos+i-window_half)&mask]]--
 | |
| 			in_window_utf8[utf8_pos2]--
 | |
| 		}
 | |
| 
 | |
| 		if i+window_half < len {
 | |
| 			var c uint = uint(data[(pos+i+window_half-1)&mask])
 | |
| 			var last_c uint = uint(data[(pos+i+window_half-2)&mask])
 | |
| 			/* Add a byte in the future. */
 | |
| 
 | |
| 			var utf8_pos2 uint = utf8Position(last_c, c, max_utf8)
 | |
| 			histogram[utf8_pos2][data[(pos+i+window_half)&mask]]++
 | |
| 			in_window_utf8[utf8_pos2]++
 | |
| 		}
 | |
| 		{
 | |
| 			var c uint
 | |
| 			var last_c uint
 | |
| 			if i < 1 {
 | |
| 				c = 0
 | |
| 			} else {
 | |
| 				c = uint(data[(pos+i-1)&mask])
 | |
| 			}
 | |
| 			if i < 2 {
 | |
| 				last_c = 0
 | |
| 			} else {
 | |
| 				last_c = uint(data[(pos+i-2)&mask])
 | |
| 			}
 | |
| 			var utf8_pos uint = utf8Position(last_c, c, max_utf8)
 | |
| 			var masked_pos uint = (pos + i) & mask
 | |
| 			var histo uint = histogram[utf8_pos][data[masked_pos]]
 | |
| 			var lit_cost float64
 | |
| 			if histo == 0 {
 | |
| 				histo = 1
 | |
| 			}
 | |
| 
 | |
| 			lit_cost = fastLog2(in_window_utf8[utf8_pos]) - fastLog2(histo)
 | |
| 			lit_cost += 0.02905
 | |
| 			if lit_cost < 1.0 {
 | |
| 				lit_cost *= 0.5
 | |
| 				lit_cost += 0.5
 | |
| 			}
 | |
| 
 | |
| 			/* Make the first bytes more expensive -- seems to help, not sure why.
 | |
| 			   Perhaps because the entropy source is changing its properties
 | |
| 			   rapidly in the beginning of the file, perhaps because the beginning
 | |
| 			   of the data is a statistical "anomaly". */
 | |
| 			if i < 2000 {
 | |
| 				lit_cost += 0.7 - (float64(2000-i) / 2000.0 * 0.35)
 | |
| 			}
 | |
| 
 | |
| 			cost[i] = float32(lit_cost)
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| func estimateBitCostsForLiterals(pos uint, len uint, mask uint, data []byte, cost []float32) {
 | |
| 	if isMostlyUTF8(data, pos, mask, uint(len), kMinUTF8Ratio) {
 | |
| 		estimateBitCostsForLiteralsUTF8(pos, uint(len), mask, data, cost)
 | |
| 		return
 | |
| 	} else {
 | |
| 		var histogram = [256]uint{0}
 | |
| 		var window_half uint = 2000
 | |
| 		var in_window uint = brotli_min_size_t(window_half, uint(len))
 | |
| 		var i uint
 | |
| 		/* Bootstrap histogram. */
 | |
| 		for i = 0; i < in_window; i++ {
 | |
| 			histogram[data[(pos+i)&mask]]++
 | |
| 		}
 | |
| 
 | |
| 		/* Compute bit costs with sliding window. */
 | |
| 		for i = 0; i < len; i++ {
 | |
| 			var histo uint
 | |
| 			if i >= window_half {
 | |
| 				/* Remove a byte in the past. */
 | |
| 				histogram[data[(pos+i-window_half)&mask]]--
 | |
| 
 | |
| 				in_window--
 | |
| 			}
 | |
| 
 | |
| 			if i+window_half < len {
 | |
| 				/* Add a byte in the future. */
 | |
| 				histogram[data[(pos+i+window_half)&mask]]++
 | |
| 
 | |
| 				in_window++
 | |
| 			}
 | |
| 
 | |
| 			histo = histogram[data[(pos+i)&mask]]
 | |
| 			if histo == 0 {
 | |
| 				histo = 1
 | |
| 			}
 | |
| 			{
 | |
| 				var lit_cost float64 = fastLog2(in_window) - fastLog2(histo)
 | |
| 				lit_cost += 0.029
 | |
| 				if lit_cost < 1.0 {
 | |
| 					lit_cost *= 0.5
 | |
| 					lit_cost += 0.5
 | |
| 				}
 | |
| 
 | |
| 				cost[i] = float32(lit_cost)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 |