From 4eef117330399d37c21aa282375b410c52d4617e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20P=C3=A9rez?= Date: Thu, 23 Oct 2025 00:17:53 +0200 Subject: [PATCH] add csv parsing --- service_a/Dockerfile | 9 +- service_a/assets/test_1.csv | 2 + service_a/assets/test_2.csv | 2 + service_a/assets/test_3.csv | 2 + service_a/assets/test_4.csv | 1 + service_a/go.mod | 8 + service_a/go.sum | 10 ++ service_a/internal/domains/meteo/domain.go | 52 ++++++ service_a/internal/domains/meteo/file.go | 170 ++++++++++++++++++ service_a/internal/domains/meteo/file_test.go | 104 +++++++++++ .../database/migrations/001_data.up.sql | 2 +- 11 files changed, 360 insertions(+), 2 deletions(-) create mode 100644 service_a/assets/test_1.csv create mode 100644 service_a/assets/test_2.csv create mode 100644 service_a/assets/test_3.csv create mode 100644 service_a/assets/test_4.csv create mode 100644 service_a/go.sum create mode 100644 service_a/internal/domains/meteo/domain.go create mode 100644 service_a/internal/domains/meteo/file.go create mode 100644 service_a/internal/domains/meteo/file_test.go diff --git a/service_a/Dockerfile b/service_a/Dockerfile index 11a17ff..ed3374e 100644 --- a/service_a/Dockerfile +++ b/service_a/Dockerfile @@ -2,10 +2,17 @@ FROM golang:1.25.2-alpine3.22 AS builder WORKDIR /app -COPY go.mod ./ +COPY go.mod go.sum ./ COPY server/ ./server/ +COPY internal/ ./internal/ +COPY assets/ ./assets/ RUN go mod download + +RUN go test ./... -v + +RUN rm -rf ./assets/ + RUN go build -o /app/service_a ./server/main.go FROM alpine:latest diff --git a/service_a/assets/test_1.csv b/service_a/assets/test_1.csv new file mode 100644 index 0000000..857f7cf --- /dev/null +++ b/service_a/assets/test_1.csv @@ -0,0 +1,2 @@ +Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%) +2025/10/12;Madrid;11,55;6,25;0;10 diff --git a/service_a/assets/test_2.csv b/service_a/assets/test_2.csv new file mode 100644 index 0000000..65ce3f7 --- /dev/null +++ b/service_a/assets/test_2.csv @@ -0,0 +1,2 @@ +Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%) +2025/10/12; Madrid;11,55;6,25;0;10 diff --git a/service_a/assets/test_3.csv b/service_a/assets/test_3.csv new file mode 100644 index 0000000..9b6139f --- /dev/null +++ b/service_a/assets/test_3.csv @@ -0,0 +1,2 @@ +Fecha;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%) +2025/10/12;11,55;6,25;0;10 diff --git a/service_a/assets/test_4.csv b/service_a/assets/test_4.csv new file mode 100644 index 0000000..d105c5b --- /dev/null +++ b/service_a/assets/test_4.csv @@ -0,0 +1 @@ +Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%) diff --git a/service_a/go.mod b/service_a/go.mod index ade2ac6..24622c1 100644 --- a/service_a/go.mod +++ b/service_a/go.mod @@ -1,3 +1,11 @@ module servicea go 1.25.2 + +require github.com/stretchr/testify v1.11.1 + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) diff --git a/service_a/go.sum b/service_a/go.sum new file mode 100644 index 0000000..c4c1710 --- /dev/null +++ b/service_a/go.sum @@ -0,0 +1,10 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/service_a/internal/domains/meteo/domain.go b/service_a/internal/domains/meteo/domain.go new file mode 100644 index 0000000..1215674 --- /dev/null +++ b/service_a/internal/domains/meteo/domain.go @@ -0,0 +1,52 @@ +package meteo + +import ( + "errors" + "time" +) + +type H map[string]any + +type MeteoData struct { + Timestamp time.Time `csv:"fecha"` + Location string `csv:"ciudad"` + MaxTemp float32 `csv:"temperatura maxima"` + MinTemp float32 `csv:"temperatura minima"` + Rainfall float32 `csv:"precipitacion"` + Cloudiness float32 `csv:"nubosidad"` +} + +type RejectedMeteoData struct { + RowValue string + Reason string +} + +type FileStats struct { + RowsInserted int `json:"rows_inserted"` + RowsRejected int `json:"rows_rejected"` + ElapsedMS int `json:"elapsed_ms"` + FileChecksum string `json:"file_checksum"` +} + +var ( + ErrCannotParseFile = errors.New("cannot parse file") + ErrValidateRecord = errors.New("error validating record") + ErrRecordNotValid = errors.New("record not valid") + ErrInvalidDateFormat = errors.New("invalid date format") + ErrReadingCSVHeader = errors.New("error reading CSV header") + ErrReadingCSVRow = errors.New("error reading CSV row") + ErrMissingDateField = errors.New("missing date field") + ErrMissingCityField = errors.New("missing city field") + ErrMissingMaxTempField = errors.New("missing max temp field") + ErrMissingMinTempField = errors.New("missing min temp field") + ErrMissingRainfallField = errors.New("missing rainfall field") + ErrMissingCloudinessField = errors.New("missing cloudiness field") + ErrInvalidMaxTemp = errors.New("invalid max temp") + ErrInvalidMinTemp = errors.New("invalid min temp") + ErrInvalidRainfall = errors.New("invalid rainfall") + ErrInvalidCloudiness = errors.New("invalid cloudiness") + ErrMaxTempOutOfRange = errors.New("max temp out of range (must be <= 60°C)") + ErrMinTempOutOfRange = errors.New("min temp out of range (must be >= -20°C)") + ErrRainfallOutOfRange = errors.New("rainfall out of range (must be 0-500 mm)") + ErrCloudinessOutOfRange = errors.New("cloudiness out of range (must be 0-100%)") +) diff --git a/service_a/internal/domains/meteo/file.go b/service_a/internal/domains/meteo/file.go new file mode 100644 index 0000000..f70aeae --- /dev/null +++ b/service_a/internal/domains/meteo/file.go @@ -0,0 +1,170 @@ +package meteo + +import ( + "encoding/csv" + "fmt" + "io" + "strconv" + "strings" + "time" +) + +func (mt *MeteoData) validate() error { + if mt.MaxTemp > 60 { + return ErrMaxTempOutOfRange + } + + if mt.MinTemp < -20 { + return ErrMinTempOutOfRange + } + + if mt.Rainfall < 0 || mt.Rainfall > 80 { + return ErrRainfallOutOfRange + } + + if mt.Cloudiness < 0 || mt.Cloudiness > 100 { + return ErrCloudinessOutOfRange + } + + return nil +} + +type FileIngest interface { + Parse(io io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error) +} + +type CSV struct{} + +var _ FileIngest = (*CSV)(nil) + +func (c *CSV) Parse(r io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error) { + reader := csv.NewReader(r) + reader.Comma = ';' + reader.TrimLeadingSpace = true + + header, err := reader.Read() + if err != nil { + return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVHeader, err) + } + + var meteoDataList []MeteoData + var rejectedDataList []RejectedMeteoData + + for { + row, err := reader.Read() + if err == io.EOF { + break + } + if err != nil { + return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVRow, err) + } + + if len(row) == 0 || (len(row) == 1 && row[0] == "") { + continue + } + + rowValue := strings.Join(row, ";") + + record := make(H) + for i, value := range row { + if i < len(header) { + record[header[i]] = value + } + } + + meteoData, err := normalize(record) + if err != nil { + fs.RowsRejected++ + rejectedDataList = append(rejectedDataList, RejectedMeteoData{ + RowValue: rowValue, + Reason: err.Error(), + }) + continue + } + + if err := meteoData.validate(); err != nil { + fs.RowsRejected++ + rejectedDataList = append(rejectedDataList, RejectedMeteoData{ + RowValue: rowValue, + Reason: err.Error(), + }) + continue + } + + meteoDataList = append(meteoDataList, *meteoData) + fs.RowsInserted++ + } + + return meteoDataList, rejectedDataList, nil +} + +func normalize(record H) (*MeteoData, error) { + meteoData := &MeteoData{} + + if dateStr, ok := record["Fecha"].(string); ok { + t, err := time.Parse("2006/01/02", dateStr) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidDateFormat, err) + } + meteoData.Timestamp = t + } else { + return nil, ErrMissingDateField + } + + if location, ok := record["Ciudad"].(string); ok { + meteoData.Location = location + } else { + return nil, ErrMissingCityField + } + + if maxTempStr, ok := record["Temperatura Máxima (C)"].(string); ok { + maxTemp, err := parseFloat(maxTempStr) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidMaxTemp, err) + } + meteoData.MaxTemp = maxTemp + } else { + return nil, ErrMissingMaxTempField + } + + if minTempStr, ok := record["Temperatura Mínima (C)"].(string); ok { + minTemp, err := parseFloat(minTempStr) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidMinTemp, err) + } + meteoData.MinTemp = minTemp + } else { + return nil, ErrMissingMinTempField + } + + if rainfallStr, ok := record["Precipitación (mm)"].(string); ok { + rainfall, err := parseFloat(rainfallStr) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidRainfall, err) + } + meteoData.Rainfall = rainfall + } else { + return nil, ErrMissingRainfallField + } + + if cloudinessStr, ok := record["Nubosidad (%)"].(string); ok { + cloudiness, err := parseFloat(cloudinessStr) + if err != nil { + return nil, fmt.Errorf("%w: %v", ErrInvalidCloudiness, err) + } + meteoData.Cloudiness = cloudiness + } else { + return nil, ErrMissingCloudinessField + } + + return meteoData, nil +} + +func parseFloat(s string) (float32, error) { + s = strings.Replace(s, ",", ".", 1) + f, err := strconv.ParseFloat(s, 32) + if err != nil { + return 0, err + } + return float32(f), nil +} diff --git a/service_a/internal/domains/meteo/file_test.go b/service_a/internal/domains/meteo/file_test.go new file mode 100644 index 0000000..3a33c76 --- /dev/null +++ b/service_a/internal/domains/meteo/file_test.go @@ -0,0 +1,104 @@ +package meteo_test + +import ( + "os" + "servicea/internal/domains/meteo" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func Test_CSV_ParseFile(t *testing.T) { + tests := []struct { + name string + filePath string + expectedInserted int + expectedRejected int + validateInserted func(t *testing.T, inserted []meteo.MeteoData) + validateRejected func(t *testing.T, rejected []meteo.RejectedMeteoData) + }{ + { + name: "valid record", + filePath: "./../../../assets/test_1.csv", + expectedInserted: 1, + expectedRejected: 0, + validateInserted: func(t *testing.T, inserted []meteo.MeteoData) { + assert.Equal(t, 1, len(inserted)) + record := inserted[0] + assert.Equal(t, time.Date(2025, 10, 12, 0, 0, 0, 0, time.UTC), record.Timestamp) + assert.Equal(t, "Madrid", record.Location) + assert.Equal(t, float32(11.55), record.MaxTemp) + assert.Equal(t, float32(6.25), record.MinTemp) + assert.Equal(t, float32(0), record.Rainfall) + assert.Equal(t, float32(10), record.Cloudiness) + }, + validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) { + assert.Empty(t, rejected) + }, + }, + { + name: "record with leading spaces", + filePath: "./../../../assets/test_2.csv", + expectedInserted: 1, + expectedRejected: 0, + validateInserted: func(t *testing.T, inserted []meteo.MeteoData) { + assert.Equal(t, 1, len(inserted)) + // TrimLeadingSpace should handle the spaces before Madrid + assert.Equal(t, "Madrid", inserted[0].Location) + }, + validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) { + assert.Empty(t, rejected) + }, + }, + { + name: "missing city column", + filePath: "./../../../assets/test_3.csv", + expectedInserted: 0, + expectedRejected: 1, + validateInserted: func(t *testing.T, inserted []meteo.MeteoData) { + assert.Empty(t, inserted) + }, + validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) { + assert.Equal(t, 1, len(rejected)) + assert.Contains(t, rejected[0].Reason, "missing city field") + assert.Equal(t, "2025/10/12;11,55;6,25;0;10", rejected[0].RowValue) + }, + }, + { + name: "empty file with only header", + filePath: "./../../../assets/test_4.csv", + expectedInserted: 0, + expectedRejected: 0, + validateInserted: func(t *testing.T, inserted []meteo.MeteoData) { + assert.Empty(t, inserted) + }, + validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) { + assert.Empty(t, rejected) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + file, err := os.Open(tt.filePath) + assert.NoError(t, err) + defer file.Close() + + csvIngest := &meteo.CSV{} + fileStats := &meteo.FileStats{} + inserted, rejected, err := csvIngest.Parse(file, fileStats) + + assert.NoError(t, err) + assert.Equal(t, tt.expectedInserted, fileStats.RowsInserted) + assert.Equal(t, tt.expectedRejected, fileStats.RowsRejected) + + if tt.validateInserted != nil { + tt.validateInserted(t, inserted) + } + if tt.validateRejected != nil { + tt.validateRejected(t, rejected) + } + }) + } +} diff --git a/service_a/server/database/migrations/001_data.up.sql b/service_a/server/database/migrations/001_data.up.sql index 1b85500..d06c82a 100644 --- a/service_a/server/database/migrations/001_data.up.sql +++ b/service_a/server/database/migrations/001_data.up.sql @@ -1,7 +1,7 @@ create table public.locations ( id serial primary key, - location_name varchar(255) not null + location_name varchar(255) not null unique );