我正在尝试使用Golang使用goroutines加载一个大的CSV文件.csv的维数是(254882,100).但是当我解析csv并将其存储到2D列表时使用我的goroutine,我得到的行小于254882,并且每次运行的数量都不同.我觉得这是由于goroutines正在发生,但似乎无法指出原因.谁能帮帮我吗.我也是Golang的新人.这是我的代码如下
func loadCSV(csvFile string) (*[][]float64, error) {
startTime := time.Now()
var dataset [][]float64
f, err := os.Open(csvFile)
if err != nil {
return &dataset, err
}
r := csv.NewReader(bufio.NewReader(f))
counter := 0
var wg sync.WaitGroup
for {
record, err := r.Read()
if err == io.EOF {
break
}
if counter != 0 {
wg.Add(1)
go func(r []string, dataset *[][]float64) {
var temp []float64
for _, each := range record {
f, err := strconv.ParseFloat(each, 64)
if err == nil {
temp = append(temp, f)
}
}
*dataset = append(*dataset, temp)
wg.Done()
}(record, &dataset)
}
counter++
}
wg.Wait()
duration := time.Now().Sub(startTime)
log.Printf("Loaded %d rows in %v seconds", counter, duration)
return &dataset, nil
}
Run Code Online (Sandbox Code Playgroud)
我的主要功能如下所示
func main() {
// runtime.GOMAXPROCS(4)
dataset, err := loadCSV("AvgW2V_train.csv")
if err != nil {
panic(err)
}
fmt.Println(len(*dataset))
}
Run Code Online (Sandbox Code Playgroud)
如果有人也需要下载CSV,请点击下面的链接(485 MB) https://drive.google.com/file/d/1G4Nw6JyeC-i0R1exWp5BtRtGM1Fwyelm/view?usp=sharing
您的结果未定义,因为您有数据竞争.
~/gopath/src$ go run -race racer.go
==================
WARNING: DATA RACE
Write at 0x00c00008a060 by goroutine 6:
runtime.mapassign_faststr()
/home/peter/go/src/runtime/map_faststr.go:202 +0x0
main.main.func2()
/home/peter/gopath/src/racer.go:16 +0x6a
Previous write at 0x00c00008a060 by goroutine 5:
runtime.mapassign_faststr()
/home/peter/go/src/runtime/map_faststr.go:202 +0x0
main.main.func1()
/home/peter/gopath/src/racer.go:11 +0x6a
Goroutine 6 (running) created at:
main.main()
/home/peter/gopath/src/racer.go:14 +0x88
Goroutine 5 (running) created at:
main.main()
/home/peter/gopath/src/racer.go:9 +0x5b
==================
fatal error: concurrent map writes
==================
WARNING: DATA RACE
Write at 0x00c00009a088 by goroutine 6:
main.main.func2()
/home/peter/gopath/src/racer.go:16 +0x7f
Previous write at 0x00c00009a088 by goroutine 5:
main.main.func1()
/home/peter/gopath/src/racer.go:11 +0x7f
Goroutine 6 (running) created at:
main.main()
/home/peter/gopath/src/racer.go:14 +0x88
Goroutine 5 (running) created at:
main.main()
/home/peter/gopath/src/racer.go:9 +0x5b
==================
goroutine 34 [running]:
runtime.throw(0x49e156, 0x15)
/home/peter/go/src/runtime/panic.go:608 +0x72 fp=0xc000094718 sp=0xc0000946e8 pc=0x44b342
runtime.mapassign_faststr(0x48ace0, 0xc00008a060, 0x49c9c3, 0x8, 0xc00009a088)
/home/peter/go/src/runtime/map_faststr.go:211 +0x46c fp=0xc000094790 sp=0xc000094718 pc=0x43598c
main.main.func1(0x49c9c3, 0x8)
/home/peter/gopath/src/racer.go:11 +0x6b fp=0xc0000947d0 sp=0xc000094790 pc=0x47ac6b
runtime.goexit()
/home/peter/go/src/runtime/asm_amd64.s:1340 +0x1 fp=0xc0000947d8 sp=0xc0000947d0 pc=0x473061
created by main.main
/home/peter/gopath/src/racer.go:9 +0x5c
goroutine 1 [sleep]:
time.Sleep(0x5f5e100)
/home/peter/go/src/runtime/time.go:105 +0x14a
main.main()
/home/peter/gopath/src/racer.go:19 +0x96
goroutine 35 [runnable]:
main.main.func2(0x49c9c3, 0x8)
/home/peter/gopath/src/racer.go:16 +0x6b
created by main.main
/home/peter/gopath/src/racer.go:14 +0x89
exit status 2
~/gopath/src$
Run Code Online (Sandbox Code Playgroud)
racer.go:
package main
import (
"bufio"
"encoding/csv"
"fmt"
"io"
"log"
"os"
"strconv"
"sync"
"time"
)
func loadCSV(csvFile string) (*[][]float64, error) {
startTime := time.Now()
var dataset [][]float64
f, err := os.Open(csvFile)
if err != nil {
return &dataset, err
}
r := csv.NewReader(bufio.NewReader(f))
counter := 0
var wg sync.WaitGroup
for {
record, err := r.Read()
if err == io.EOF {
break
}
if counter != 0 {
wg.Add(1)
go func(r []string, dataset *[][]float64) {
var temp []float64
for _, each := range record {
f, err := strconv.ParseFloat(each, 64)
if err == nil {
temp = append(temp, f)
}
}
*dataset = append(*dataset, temp)
wg.Done()
}(record, &dataset)
}
counter++
}
wg.Wait()
duration := time.Now().Sub(startTime)
log.Printf("Loaded %d rows in %v seconds", counter, duration)
return &dataset, nil
}
func main() {
// runtime.GOMAXPROCS(4)
dataset, err := loadCSV("/home/peter/AvgW2V_train.csv")
if err != nil {
panic(err)
}
fmt.Println(len(*dataset))
}
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
133 次 |
| 最近记录: |