add csv parsing
This commit is contained in:
parent
213c9480e7
commit
4eef117330
@ -2,10 +2,17 @@ FROM golang:1.25.2-alpine3.22 AS builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY go.mod ./
|
||||
COPY go.mod go.sum ./
|
||||
COPY server/ ./server/
|
||||
COPY internal/ ./internal/
|
||||
COPY assets/ ./assets/
|
||||
|
||||
RUN go mod download
|
||||
|
||||
RUN go test ./... -v
|
||||
|
||||
RUN rm -rf ./assets/
|
||||
|
||||
RUN go build -o /app/service_a ./server/main.go
|
||||
|
||||
FROM alpine:latest
|
||||
|
||||
2
service_a/assets/test_1.csv
Normal file
2
service_a/assets/test_1.csv
Normal file
@ -0,0 +1,2 @@
|
||||
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
|
||||
2025/10/12;Madrid;11,55;6,25;0;10
|
||||
|
2
service_a/assets/test_2.csv
Normal file
2
service_a/assets/test_2.csv
Normal file
@ -0,0 +1,2 @@
|
||||
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
|
||||
2025/10/12; Madrid;11,55;6,25;0;10
|
||||
|
2
service_a/assets/test_3.csv
Normal file
2
service_a/assets/test_3.csv
Normal file
@ -0,0 +1,2 @@
|
||||
Fecha;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
|
||||
2025/10/12;11,55;6,25;0;10
|
||||
|
1
service_a/assets/test_4.csv
Normal file
1
service_a/assets/test_4.csv
Normal file
@ -0,0 +1 @@
|
||||
Fecha;Ciudad;Temperatura Máxima (C);Temperatura Mínima (C);Precipitación (mm);Nubosidad (%)
|
||||
|
@ -1,3 +1,11 @@
|
||||
module servicea
|
||||
|
||||
go 1.25.2
|
||||
|
||||
require github.com/stretchr/testify v1.11.1
|
||||
|
||||
require (
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.1 // indirect
|
||||
)
|
||||
|
||||
10
service_a/go.sum
Normal file
10
service_a/go.sum
Normal file
@ -0,0 +1,10 @@
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
52
service_a/internal/domains/meteo/domain.go
Normal file
52
service_a/internal/domains/meteo/domain.go
Normal file
@ -0,0 +1,52 @@
|
||||
package meteo
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"time"
|
||||
)
|
||||
|
||||
type H map[string]any
|
||||
|
||||
type MeteoData struct {
|
||||
Timestamp time.Time `csv:"fecha"`
|
||||
Location string `csv:"ciudad"`
|
||||
MaxTemp float32 `csv:"temperatura maxima"`
|
||||
MinTemp float32 `csv:"temperatura minima"`
|
||||
Rainfall float32 `csv:"precipitacion"`
|
||||
Cloudiness float32 `csv:"nubosidad"`
|
||||
}
|
||||
|
||||
type RejectedMeteoData struct {
|
||||
RowValue string
|
||||
Reason string
|
||||
}
|
||||
|
||||
type FileStats struct {
|
||||
RowsInserted int `json:"rows_inserted"`
|
||||
RowsRejected int `json:"rows_rejected"`
|
||||
ElapsedMS int `json:"elapsed_ms"`
|
||||
FileChecksum string `json:"file_checksum"`
|
||||
}
|
||||
|
||||
var (
|
||||
ErrCannotParseFile = errors.New("cannot parse file")
|
||||
ErrValidateRecord = errors.New("error validating record")
|
||||
ErrRecordNotValid = errors.New("record not valid")
|
||||
ErrInvalidDateFormat = errors.New("invalid date format")
|
||||
ErrReadingCSVHeader = errors.New("error reading CSV header")
|
||||
ErrReadingCSVRow = errors.New("error reading CSV row")
|
||||
ErrMissingDateField = errors.New("missing date field")
|
||||
ErrMissingCityField = errors.New("missing city field")
|
||||
ErrMissingMaxTempField = errors.New("missing max temp field")
|
||||
ErrMissingMinTempField = errors.New("missing min temp field")
|
||||
ErrMissingRainfallField = errors.New("missing rainfall field")
|
||||
ErrMissingCloudinessField = errors.New("missing cloudiness field")
|
||||
ErrInvalidMaxTemp = errors.New("invalid max temp")
|
||||
ErrInvalidMinTemp = errors.New("invalid min temp")
|
||||
ErrInvalidRainfall = errors.New("invalid rainfall")
|
||||
ErrInvalidCloudiness = errors.New("invalid cloudiness")
|
||||
ErrMaxTempOutOfRange = errors.New("max temp out of range (must be <= 60°C)")
|
||||
ErrMinTempOutOfRange = errors.New("min temp out of range (must be >= -20°C)")
|
||||
ErrRainfallOutOfRange = errors.New("rainfall out of range (must be 0-500 mm)")
|
||||
ErrCloudinessOutOfRange = errors.New("cloudiness out of range (must be 0-100%)")
|
||||
)
|
||||
170
service_a/internal/domains/meteo/file.go
Normal file
170
service_a/internal/domains/meteo/file.go
Normal file
@ -0,0 +1,170 @@
|
||||
package meteo
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
func (mt *MeteoData) validate() error {
|
||||
if mt.MaxTemp > 60 {
|
||||
return ErrMaxTempOutOfRange
|
||||
}
|
||||
|
||||
if mt.MinTemp < -20 {
|
||||
return ErrMinTempOutOfRange
|
||||
}
|
||||
|
||||
if mt.Rainfall < 0 || mt.Rainfall > 80 {
|
||||
return ErrRainfallOutOfRange
|
||||
}
|
||||
|
||||
if mt.Cloudiness < 0 || mt.Cloudiness > 100 {
|
||||
return ErrCloudinessOutOfRange
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type FileIngest interface {
|
||||
Parse(io io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error)
|
||||
}
|
||||
|
||||
type CSV struct{}
|
||||
|
||||
var _ FileIngest = (*CSV)(nil)
|
||||
|
||||
func (c *CSV) Parse(r io.Reader, fs *FileStats) ([]MeteoData, []RejectedMeteoData, error) {
|
||||
reader := csv.NewReader(r)
|
||||
reader.Comma = ';'
|
||||
reader.TrimLeadingSpace = true
|
||||
|
||||
header, err := reader.Read()
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVHeader, err)
|
||||
}
|
||||
|
||||
var meteoDataList []MeteoData
|
||||
var rejectedDataList []RejectedMeteoData
|
||||
|
||||
for {
|
||||
row, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, nil, fmt.Errorf("%w: %v", ErrReadingCSVRow, err)
|
||||
}
|
||||
|
||||
if len(row) == 0 || (len(row) == 1 && row[0] == "") {
|
||||
continue
|
||||
}
|
||||
|
||||
rowValue := strings.Join(row, ";")
|
||||
|
||||
record := make(H)
|
||||
for i, value := range row {
|
||||
if i < len(header) {
|
||||
record[header[i]] = value
|
||||
}
|
||||
}
|
||||
|
||||
meteoData, err := normalize(record)
|
||||
if err != nil {
|
||||
fs.RowsRejected++
|
||||
rejectedDataList = append(rejectedDataList, RejectedMeteoData{
|
||||
RowValue: rowValue,
|
||||
Reason: err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
if err := meteoData.validate(); err != nil {
|
||||
fs.RowsRejected++
|
||||
rejectedDataList = append(rejectedDataList, RejectedMeteoData{
|
||||
RowValue: rowValue,
|
||||
Reason: err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
|
||||
meteoDataList = append(meteoDataList, *meteoData)
|
||||
fs.RowsInserted++
|
||||
}
|
||||
|
||||
return meteoDataList, rejectedDataList, nil
|
||||
}
|
||||
|
||||
func normalize(record H) (*MeteoData, error) {
|
||||
meteoData := &MeteoData{}
|
||||
|
||||
if dateStr, ok := record["Fecha"].(string); ok {
|
||||
t, err := time.Parse("2006/01/02", dateStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrInvalidDateFormat, err)
|
||||
}
|
||||
meteoData.Timestamp = t
|
||||
} else {
|
||||
return nil, ErrMissingDateField
|
||||
}
|
||||
|
||||
if location, ok := record["Ciudad"].(string); ok {
|
||||
meteoData.Location = location
|
||||
} else {
|
||||
return nil, ErrMissingCityField
|
||||
}
|
||||
|
||||
if maxTempStr, ok := record["Temperatura Máxima (C)"].(string); ok {
|
||||
maxTemp, err := parseFloat(maxTempStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrInvalidMaxTemp, err)
|
||||
}
|
||||
meteoData.MaxTemp = maxTemp
|
||||
} else {
|
||||
return nil, ErrMissingMaxTempField
|
||||
}
|
||||
|
||||
if minTempStr, ok := record["Temperatura Mínima (C)"].(string); ok {
|
||||
minTemp, err := parseFloat(minTempStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrInvalidMinTemp, err)
|
||||
}
|
||||
meteoData.MinTemp = minTemp
|
||||
} else {
|
||||
return nil, ErrMissingMinTempField
|
||||
}
|
||||
|
||||
if rainfallStr, ok := record["Precipitación (mm)"].(string); ok {
|
||||
rainfall, err := parseFloat(rainfallStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrInvalidRainfall, err)
|
||||
}
|
||||
meteoData.Rainfall = rainfall
|
||||
} else {
|
||||
return nil, ErrMissingRainfallField
|
||||
}
|
||||
|
||||
if cloudinessStr, ok := record["Nubosidad (%)"].(string); ok {
|
||||
cloudiness, err := parseFloat(cloudinessStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%w: %v", ErrInvalidCloudiness, err)
|
||||
}
|
||||
meteoData.Cloudiness = cloudiness
|
||||
} else {
|
||||
return nil, ErrMissingCloudinessField
|
||||
}
|
||||
|
||||
return meteoData, nil
|
||||
}
|
||||
|
||||
func parseFloat(s string) (float32, error) {
|
||||
s = strings.Replace(s, ",", ".", 1)
|
||||
f, err := strconv.ParseFloat(s, 32)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return float32(f), nil
|
||||
}
|
||||
104
service_a/internal/domains/meteo/file_test.go
Normal file
104
service_a/internal/domains/meteo/file_test.go
Normal file
@ -0,0 +1,104 @@
|
||||
package meteo_test
|
||||
|
||||
import (
|
||||
"os"
|
||||
"servicea/internal/domains/meteo"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func Test_CSV_ParseFile(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
filePath string
|
||||
expectedInserted int
|
||||
expectedRejected int
|
||||
validateInserted func(t *testing.T, inserted []meteo.MeteoData)
|
||||
validateRejected func(t *testing.T, rejected []meteo.RejectedMeteoData)
|
||||
}{
|
||||
{
|
||||
name: "valid record",
|
||||
filePath: "./../../../assets/test_1.csv",
|
||||
expectedInserted: 1,
|
||||
expectedRejected: 0,
|
||||
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
|
||||
assert.Equal(t, 1, len(inserted))
|
||||
record := inserted[0]
|
||||
assert.Equal(t, time.Date(2025, 10, 12, 0, 0, 0, 0, time.UTC), record.Timestamp)
|
||||
assert.Equal(t, "Madrid", record.Location)
|
||||
assert.Equal(t, float32(11.55), record.MaxTemp)
|
||||
assert.Equal(t, float32(6.25), record.MinTemp)
|
||||
assert.Equal(t, float32(0), record.Rainfall)
|
||||
assert.Equal(t, float32(10), record.Cloudiness)
|
||||
},
|
||||
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
|
||||
assert.Empty(t, rejected)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "record with leading spaces",
|
||||
filePath: "./../../../assets/test_2.csv",
|
||||
expectedInserted: 1,
|
||||
expectedRejected: 0,
|
||||
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
|
||||
assert.Equal(t, 1, len(inserted))
|
||||
// TrimLeadingSpace should handle the spaces before Madrid
|
||||
assert.Equal(t, "Madrid", inserted[0].Location)
|
||||
},
|
||||
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
|
||||
assert.Empty(t, rejected)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "missing city column",
|
||||
filePath: "./../../../assets/test_3.csv",
|
||||
expectedInserted: 0,
|
||||
expectedRejected: 1,
|
||||
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
|
||||
assert.Empty(t, inserted)
|
||||
},
|
||||
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
|
||||
assert.Equal(t, 1, len(rejected))
|
||||
assert.Contains(t, rejected[0].Reason, "missing city field")
|
||||
assert.Equal(t, "2025/10/12;11,55;6,25;0;10", rejected[0].RowValue)
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "empty file with only header",
|
||||
filePath: "./../../../assets/test_4.csv",
|
||||
expectedInserted: 0,
|
||||
expectedRejected: 0,
|
||||
validateInserted: func(t *testing.T, inserted []meteo.MeteoData) {
|
||||
assert.Empty(t, inserted)
|
||||
},
|
||||
validateRejected: func(t *testing.T, rejected []meteo.RejectedMeteoData) {
|
||||
assert.Empty(t, rejected)
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
file, err := os.Open(tt.filePath)
|
||||
assert.NoError(t, err)
|
||||
defer file.Close()
|
||||
|
||||
csvIngest := &meteo.CSV{}
|
||||
fileStats := &meteo.FileStats{}
|
||||
inserted, rejected, err := csvIngest.Parse(file, fileStats)
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, tt.expectedInserted, fileStats.RowsInserted)
|
||||
assert.Equal(t, tt.expectedRejected, fileStats.RowsRejected)
|
||||
|
||||
if tt.validateInserted != nil {
|
||||
tt.validateInserted(t, inserted)
|
||||
}
|
||||
if tt.validateRejected != nil {
|
||||
tt.validateRejected(t, rejected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -1,7 +1,7 @@
|
||||
create table public.locations
|
||||
(
|
||||
id serial primary key,
|
||||
location_name varchar(255) not null
|
||||
location_name varchar(255) not null unique
|
||||
);
|
||||
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user