1
1
mirror of https://github.com/go-gitea/gitea synced 2025-01-05 15:34:25 +00:00
gitea/services/gitdiff/csv.go
Richard Mahn 98f7013756
Prevent NPE in CSV diff rendering when column removed (#17018)
Fixes #16837 if a column is deleted.

We were clobbering the columns that were added by looping through the aline (base) and then when bline (head) was looped through, it clobbered what was in the "cells" array that is show in the diff, and then left a nil cell because nothing was shifted.

This fix properly shifts the cells, and properly puts the b cell either at its location or after, according to what the aline placed in the cells.

This includes test, adding a new test function since adding/removing cells works best with three columns, not two, which results in 4 columns of the resulting cells because it has a deleted column and an added column. If you try this locally, you can try those cases and others, such as adding a column.

There was no need to do anything special for the rows when `aline == 0 || bline == 0` so that was removed. This allows the same code to be used for removed or added lines, with the bcell text always being the RightCell, acell text being the LeftCell.

I still added the patch zeripath gave at https://github.com/go-gitea/gitea/issues/16837#issuecomment-913007382 so that just in case for some reason a cell is nil (which shouldn't happen now) it doesn't throw a 500 error, so the user can at least view the raw diff.

Also fixes in the [view.go](https://github.com/go-gitea/gitea/pull/17018/files#diff-43a7f4747c7ba8bff888c9be11affaafd595fd55d27f3333840eb19df9fad393L521) file how if a CSV file is empty (either created empty or if you edit it and remove all contents) it throws a huge 500 error when you then save it (when you view the file). Since we allow creating, saving and pushing empty files, we shouldn't throw an error on an empty CSV file, but just show its empty contents. This doesn't happen if it is a Markdown file or other type of file that is empty.
EDIT: Now handled in the markup/csv renderer code
2021-10-20 20:10:03 +01:00

471 lines
14 KiB
Go

// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gitdiff
import (
"encoding/csv"
"errors"
"io"
"code.gitea.io/gitea/modules/util"
)
const unmappedColumn = -1
const maxRowsToInspect int = 10
const minRatioToMatch float32 = 0.8
// TableDiffCellType represents the type of a TableDiffCell.
type TableDiffCellType uint8
// TableDiffCellType possible values.
const (
TableDiffCellUnchanged TableDiffCellType = iota + 1
TableDiffCellChanged
TableDiffCellAdd
TableDiffCellDel
TableDiffCellMovedUnchanged
TableDiffCellMovedChanged
)
// TableDiffCell represents a cell of a TableDiffRow
type TableDiffCell struct {
LeftCell string
RightCell string
Type TableDiffCellType
}
// TableDiffRow represents a row of a TableDiffSection.
type TableDiffRow struct {
RowIdx int
Cells []*TableDiffCell
}
// TableDiffSection represents a section of a DiffFile.
type TableDiffSection struct {
Rows []*TableDiffRow
}
// csvReader wraps a csv.Reader which buffers the first rows.
type csvReader struct {
reader *csv.Reader
buffer [][]string
line int
eof bool
}
// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
var ErrorUndefinedCell = errors.New("undefined cell")
// createCsvReader creates a csvReader and fills the buffer
func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
csv := &csvReader{reader: reader}
csv.buffer = make([][]string, bufferRowCount)
for i := 0; i < bufferRowCount && !csv.eof; i++ {
row, err := csv.readNextRow()
if err != nil {
return nil, err
}
csv.buffer[i] = row
}
csv.line = bufferRowCount
return csv, nil
}
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
func (csv *csvReader) GetRow(row int) ([]string, error) {
if row < len(csv.buffer) && row >= 0 {
return csv.buffer[row], nil
}
if csv.eof {
return nil, nil
}
for {
fields, err := csv.readNextRow()
if err != nil {
return nil, err
}
if csv.eof {
return nil, nil
}
csv.line++
if csv.line-1 == row {
return fields, nil
}
}
}
func (csv *csvReader) readNextRow() ([]string, error) {
if csv.eof {
return nil, nil
}
row, err := csv.reader.Read()
if err != nil {
if err != io.EOF {
return nil, err
}
csv.eof = true
}
return row, nil
}
// CreateCsvDiff creates a tabular diff based on two CSV readers.
func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
if baseReader != nil && headReader != nil {
return createCsvDiff(diffFile, baseReader, headReader)
}
if baseReader != nil {
return createCsvDiffSingle(baseReader, TableDiffCellDel)
}
return createCsvDiffSingle(headReader, TableDiffCellAdd)
}
// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
var rows []*TableDiffRow
i := 1
for {
row, err := reader.Read()
if err != nil {
if err == io.EOF {
break
}
return nil, err
}
cells := make([]*TableDiffCell, len(row))
for j := 0; j < len(row); j++ {
if celltype == TableDiffCellDel {
cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
} else {
cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
}
}
rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
i++
}
return []*TableDiffSection{{Rows: rows}}, nil
}
func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
// Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
if err != nil {
return nil, err
}
headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
if err != nil {
return nil, err
}
// Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
// Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
if len(a2bColMap) < len(b2aColMap) {
numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
}
// createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
// in the base or head respectively.
// Returns a TableDiffRow which has the row index
createDiffTableRow := func(aLineNum int, bLineNum int) (*TableDiffRow, error) {
// diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
// the same size as the head table or bigger
diffTableCells := make([]*TableDiffCell, numDiffTableCols)
var bRow *[]string
if bLineNum > 0 {
row, err := headCSVReader.GetRow(bLineNum - 1)
if err != nil {
return nil, err
}
bRow = &row
}
var aRow *[]string
if aLineNum > 0 {
row, err := baseCSVReader.GetRow(aLineNum - 1)
if err != nil {
return nil, err
}
aRow = &row
}
if aRow == nil && bRow == nil {
// No content
return nil, nil
}
aIndex := 0 // tracks where we are in the a2bColMap
bIndex := 0 // tracks where we are in the b2aColMap
colsAdded := 0 // incremented whenever we found a column was added
colsDeleted := 0 // incrememted whenever a column was deleted
// We loop until both the aIndex and bIndex are greater than their col map, which then we are done
for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
// Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
var aCell string
if aRow != nil {
if cell, err := getCell(*aRow, aIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
aCell = cell
}
}
diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
aIndex++
colsDeleted++
}
// aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
// we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
aIndex++
}
// Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
var bCell string
cellType := TableDiffCellAdd
if bRow != nil {
if cell, err := getCell(*bRow, bIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
bCell = cell
}
} else {
cellType = TableDiffCellDel
}
diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
bIndex++
colsAdded++
}
// aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
// we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
var diffTableCell TableDiffCell
var aCell *string
// get the aCell value if the aRow exists
if aRow != nil {
if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
aCell = &cell
diffTableCell.LeftCell = cell
}
} else {
diffTableCell.Type = TableDiffCellAdd
}
var bCell *string
// get the bCell value if the bRow exists
if bRow != nil {
if cell, err := getCell(*bRow, bIndex); err != nil {
if err != ErrorUndefinedCell {
return nil, err
}
} else {
bCell = &cell
diffTableCell.RightCell = cell
}
} else {
diffTableCell.Type = TableDiffCellDel
}
// if both a and b have a row that exists, compare the value and determine if the row has moved
if aCell != nil && bCell != nil {
moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
if *aCell != *bCell {
if moved {
diffTableCell.Type = TableDiffCellMovedChanged
} else {
diffTableCell.Type = TableDiffCellChanged
}
} else {
if moved {
diffTableCell.Type = TableDiffCellMovedUnchanged
} else {
diffTableCell.Type = TableDiffCellUnchanged
}
diffTableCell.LeftCell = ""
}
}
// Add the diff column to the diff row
diffTableCells[bIndex+colsDeleted] = &diffTableCell
bIndex++
}
}
return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
}
// diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
var diffTableSections []*TableDiffSection
for i, section := range diffFile.Sections {
// Each section has multiple diffTableRows
var diffTableRows []*TableDiffRow
lines := tryMergeLines(section.Lines)
// Loop through the merged lines to get each row of the CSV diff table for this section
for j, line := range lines {
if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
diffTableRow, err := createDiffTableRow(1, 1)
if err != nil {
return nil, err
}
if diffTableRow != nil {
diffTableRows = append(diffTableRows, diffTableRow)
}
}
diffTableRow, err := createDiffTableRow(line[0], line[1])
if err != nil {
return nil, err
}
if diffTableRow != nil {
diffTableRows = append(diffTableRows, diffTableRow)
}
}
if len(diffTableRows) > 0 {
diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
}
}
return diffTableSections, nil
}
// getColumnMapping creates a mapping of columns between a and b
func getColumnMapping(baseCSVReader *csvReader, headCSVReader *csvReader) ([]int, []int) {
baseRow, _ := baseCSVReader.GetRow(0)
headRow, _ := headCSVReader.GetRow(0)
base2HeadColMap := []int{}
head2BaseColMap := []int{}
if baseRow != nil {
base2HeadColMap = make([]int, len(baseRow))
}
if headRow != nil {
head2BaseColMap = make([]int, len(headRow))
}
// Initializes all head2base mappings to be unmappedColumn (-1)
for i := 0; i < len(head2BaseColMap); i++ {
head2BaseColMap[i] = unmappedColumn
}
// Loops through the baseRow and see if there is a match in the head row
for i := 0; i < len(baseRow); i++ {
base2HeadColMap[i] = unmappedColumn
baseCell, err := getCell(baseRow, i)
if err == nil {
for j := 0; j < len(headRow); j++ {
if head2BaseColMap[j] == -1 {
headCell, err := getCell(headRow, j)
if err == nil && baseCell == headCell {
base2HeadColMap[i] = j
head2BaseColMap[j] = i
break
}
}
}
}
}
tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
return base2HeadColMap, head2BaseColMap
}
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
for i := 0; i < len(base2HeadColMap); i++ {
headStart := 0
for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
if head2BaseColMap[headStart] == unmappedColumn {
rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
same := 0
for j := 1; j <= rows; j++ {
baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
if baseErr == nil && headErr == nil && baseCell == headCell {
same++
}
}
if (float32(same) / float32(rows)) > minRatioToMatch {
base2HeadColMap[i] = headStart
head2BaseColMap[headStart] = i
}
}
headStart++
}
}
}
// getCell returns the specific cell or nil if not present.
func getCell(row []string, column int) (string, error) {
if column < len(row) {
return row[column], nil
}
return "", ErrorUndefinedCell
}
// countUnmappedColumns returns the count of unmapped columns.
func countUnmappedColumns(mapping []int) int {
count := 0
for i := 0; i < len(mapping); i++ {
if mapping[i] == unmappedColumn {
count++
}
}
return count
}
// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
func tryMergeLines(lines []*DiffLine) [][2]int {
ids := make([][2]int, len(lines))
i := 0
for _, line := range lines {
if line.Type != DiffLineSection {
ids[i][0] = line.LeftIdx
ids[i][1] = line.RightIdx
i++
}
}
ids = ids[:i]
result := make([][2]int, len(ids))
j := 0
for i = 0; i < len(ids); i++ {
if ids[i][0] == 0 {
if j > 0 && result[j-1][1] == 0 {
temp := j
for temp > 0 && result[temp-1][1] == 0 {
temp--
}
result[temp][1] = ids[i][1]
continue
}
}
result[j] = ids[i]
j++
}
return result[:j]
}