add csv parsing

This commit is contained in:
Pedro Pérez 2025-10-23 00:17:53 +02:00
parent 213c9480e7
commit 4eef117330
11 changed files with 360 additions and 2 deletions

View File

@ -2,10 +2,17 @@ FROM golang:1.25.2-alpine3.22 AS builder
WORKDIR /app WORKDIR /app
COPY go.mod ./ COPY go.mod go.sum ./
COPY server/ ./server/ COPY server/ ./server/
COPY internal/ ./internal/
COPY assets/ ./assets/
RUN go mod download RUN go mod download
RUN go test ./... -v
RUN rm -rf ./assets/
RUN go build -o /app/service_a ./server/main.go RUN go build -o /app/service_a ./server/main.go
FROM alpine:latest FROM alpine:latest

View File

@ -0,0 +1,2 @@
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
2025/10/12;Madrid;11,55;6,25;0;10
1 Fecha Ciudad Temperatura Máxima (C) Temperatura Mínima (C) Precipitación (mm) Nubosidad (%)
2 2025/10/12 Madrid 11,55 6,25 0 10

View File

@ -0,0 +1,2 @@
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
2025/10/12; Madrid;11,55;6,25;0;10
1 Fecha Ciudad Temperatura Máxima (C) Temperatura Mínima (C) Precipitación (mm) Nubosidad (%)
2 2025/10/12 Madrid 11,55 6,25 0 10

View File

@ -0,0 +1,2 @@
Fecha;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
2025/10/12;11,55;6,25;0;10
1 Fecha Temperatura Máxima (C) Temperatura Mínima (C) Precipitación (mm) Nubosidad (%)
2 2025/10/12 11,55 6,25 0 10

View File

@ -0,0 +1 @@
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
1 Fecha Ciudad Temperatura Máxima (C) Temperatura Mínima (C) Precipitación (mm) Nubosidad (%)

View File

@ -1,3 +1,11 @@
module servicea module servicea
go 1.25.2 go 1.25.2
require github.com/stretchr/testify v1.11.1
require (
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)

10
service_a/go.sum Normal file
View File

@ -0,0 +1,10 @@
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -0,0 +1,52 @@
package meteo
import (
"errors"
"time"
)
type H map[string]any
type MeteoData struct {
Timestamp time.Time `csv:"fecha"`
Location string `csv:"ciudad"`
MaxTemp float32 `csv:"temperatura maxima"`
MinTemp float32 `csv:"temperatura minima"`
Rainfall float32 `csv:"precipitacion"`
Cloudiness float32 `csv:"nubosidad"`
}
type RejectedMeteoData struct {
RowValue string
Reason string
}
type FileStats struct {
RowsInserted int `json:"rows_inserted"`
RowsRejected int `json:"rows_rejected"`
ElapsedMS int `json:"elapsed_ms"`
FileChecksum string `json:"file_checksum"`
}
var (
ErrCannotParseFile = errors.New("cannot parse file")
ErrValidateRecord = errors.New("error validating record")
ErrRecordNotValid = errors.New("record not valid")
ErrInvalidDateFormat = errors.New("invalid date format")
ErrReadingCSVHeader = errors.New("error reading CSV header")
ErrReadingCSVRow = errors.New("error reading CSV row")
ErrMissingDateField = errors.New("missing date field")
ErrMissingCityField = errors.New("missing city field")
ErrMissingMaxTempField = errors.New("missing max temp field")
ErrMissingMinTempField = errors.New("missing min temp field")
ErrMissingRainfallField = errors.New("missing rainfall field")
ErrMissingCloudinessField = errors.New("missing cloudiness field")
ErrInvalidMaxTemp = errors.New("invalid max temp")
ErrInvalidMinTemp = errors.New("invalid min temp")
ErrInvalidRainfall = errors.New("invalid rainfall")
ErrInvalidCloudiness = errors.New("invalid cloudiness")
ErrMaxTempOutOfRange = errors.New("max temp out of range (must be <= 60°C)")
ErrMinTempOutOfRange = errors.New("min temp out of range (must be >= -20°C)")
ErrRainfallOutOfRange = errors.New("rainfall out of range (must be 0-500 mm)")
ErrCloudinessOutOfRange = errors.New("cloudiness out of range (must be 0-100%)")
)

View File

@ -0,0 +1,170 @@
package meteo
import (
"encoding/csv"
"fmt"
"io"
"strconv"
"strings"
"time"
)
func (mt *MeteoData) validate() error {
if mt.MaxTemp > 60 {
return ErrMaxTempOutOfRange
}
if mt.MinTemp < -20 {
return ErrMinTempOutOfRange
}
if mt.Rainfall < 0 || mt.Rainfall > 80 {
return ErrRainfallOutOfRange
}
if mt.Cloudiness < 0 || mt.Cloudiness > 100 {
return ErrCloudinessOutOfRange
}
return nil
}
type FileIngest interface {
Parse(io io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error)
}
type CSV struct{}
var _ FileIngest = (*CSV)(nil)
func (c *CSV) Parse(r io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error) {
reader := csv.NewReader(r)
reader.Comma = ';'
reader.TrimLeadingSpace = true
header, err := reader.Read()
if err != nil {
return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVHeader, err)
}
var meteoDataList []MeteoData
var rejectedDataList []RejectedMeteoData
for {
row, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVRow, err)
}
if len(row) == 0 || (len(row) == 1 && row[0] == "") {
continue
}
rowValue := strings.Join(row, ";")
record := make(H)
for i, value := range row {
if i < len(header) {
record[header[i]] = value
}
}
meteoData, err := normalize(record)
if err != nil {
fs.RowsRejected++
rejectedDataList = append(rejectedDataList, RejectedMeteoData{
RowValue: rowValue,
Reason: err.Error(),
})
continue
}
if err := meteoData.validate(); err != nil {
fs.RowsRejected++
rejectedDataList = append(rejectedDataList, RejectedMeteoData{
RowValue: rowValue,
Reason: err.Error(),
})
continue
}
meteoDataList = append(meteoDataList, *meteoData)
fs.RowsInserted++
}
return meteoDataList, rejectedDataList, nil
}
func normalize(record H) (*MeteoData, error) {
meteoData := &MeteoData{}
if dateStr, ok := record["Fecha"].(string); ok {
t, err := time.Parse("2006/01/02", dateStr)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidDateFormat, err)
}
meteoData.Timestamp = t
} else {
return nil, ErrMissingDateField
}
if location, ok := record["Ciudad"].(string); ok {
meteoData.Location = location
} else {
return nil, ErrMissingCityField
}
if maxTempStr, ok := record["Temperatura Máxima (C)"].(string); ok {
maxTemp, err := parseFloat(maxTempStr)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidMaxTemp, err)
}
meteoData.MaxTemp = maxTemp
} else {
return nil, ErrMissingMaxTempField
}
if minTempStr, ok := record["Temperatura Mínima (C)"].(string); ok {
minTemp, err := parseFloat(minTempStr)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidMinTemp, err)
}
meteoData.MinTemp = minTemp
} else {
return nil, ErrMissingMinTempField
}
if rainfallStr, ok := record["Precipitación (mm)"].(string); ok {
rainfall, err := parseFloat(rainfallStr)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidRainfall, err)
}
meteoData.Rainfall = rainfall
} else {
return nil, ErrMissingRainfallField
}
if cloudinessStr, ok := record["Nubosidad (%)"].(string); ok {
cloudiness, err := parseFloat(cloudinessStr)
if err != nil {
return nil, fmt.Errorf("%w: %v", ErrInvalidCloudiness, err)
}
meteoData.Cloudiness = cloudiness
} else {
return nil, ErrMissingCloudinessField
}
return meteoData, nil
}
func parseFloat(s string) (float32, error) {
s = strings.Replace(s, ",", ".", 1)
f, err := strconv.ParseFloat(s, 32)
if err != nil {
return 0, err
}
return float32(f), nil
}

View File

@ -0,0 +1,104 @@
package meteo_test
import (
"os"
"servicea/internal/domains/meteo"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
func Test_CSV_ParseFile(t *testing.T) {
tests := []struct {
name string
filePath string
expectedInserted int
expectedRejected int
validateInserted func(t *testing.T, inserted []meteo.MeteoData)
validateRejected func(t *testing.T, rejected []meteo.RejectedMeteoData)
}{
{
name: "valid record",
filePath: "./../../../assets/test_1.csv",
expectedInserted: 1,
expectedRejected: 0,
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
assert.Equal(t, 1, len(inserted))
record := inserted[0]
assert.Equal(t, time.Date(2025, 10, 12, 0, 0, 0, 0, time.UTC), record.Timestamp)
assert.Equal(t, "Madrid", record.Location)
assert.Equal(t, float32(11.55), record.MaxTemp)
assert.Equal(t, float32(6.25), record.MinTemp)
assert.Equal(t, float32(0), record.Rainfall)
assert.Equal(t, float32(10), record.Cloudiness)
},
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
assert.Empty(t, rejected)
},
},
{
name: "record with leading spaces",
filePath: "./../../../assets/test_2.csv",
expectedInserted: 1,
expectedRejected: 0,
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
assert.Equal(t, 1, len(inserted))
// TrimLeadingSpace should handle the spaces before Madrid
assert.Equal(t, "Madrid", inserted[0].Location)
},
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
assert.Empty(t, rejected)
},
},
{
name: "missing city column",
filePath: "./../../../assets/test_3.csv",
expectedInserted: 0,
expectedRejected: 1,
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
assert.Empty(t, inserted)
},
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
assert.Equal(t, 1, len(rejected))
assert.Contains(t, rejected[0].Reason, "missing city field")
assert.Equal(t, "2025/10/12;11,55;6,25;0;10", rejected[0].RowValue)
},
},
{
name: "empty file with only header",
filePath: "./../../../assets/test_4.csv",
expectedInserted: 0,
expectedRejected: 0,
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
assert.Empty(t, inserted)
},
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
assert.Empty(t, rejected)
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
file, err := os.Open(tt.filePath)
assert.NoError(t, err)
defer file.Close()
csvIngest := &meteo.CSV{}
fileStats := &meteo.FileStats{}
inserted, rejected, err := csvIngest.Parse(file, fileStats)
assert.NoError(t, err)
assert.Equal(t, tt.expectedInserted, fileStats.RowsInserted)
assert.Equal(t, tt.expectedRejected, fileStats.RowsRejected)
if tt.validateInserted != nil {
tt.validateInserted(t, inserted)
}
if tt.validateRejected != nil {
tt.validateRejected(t, rejected)
}
})
}
}

View File

@ -1,7 +1,7 @@
create table public.locations create table public.locations
( (
id serial primary key, id serial primary key,
location_name varchar(255) not null location_name varchar(255) not null unique
); );