请大神帮忙看看代码,我想写一个从数据库导出数据到 excel 的工具,目前虽然实现了但是执行效率很慢 - V2EX
V2EX = way to explore
V2EX 是一个关于分享和探索的地方
现在注册
已注册用户请  登录
The Go Programming Language
http://golang.org/
Go Playground
Go Projects
Revel Web Framework
praynise
V2EX    Go 编程语言

请大神帮忙看看代码,我想写一个从数据库导出数据到 excel 的工具,目前虽然实现了是执行效率很慢

  •  
  •   praynise 2018-03-27 19:01:36 +08:00 1814 次点击
    这是一个创建于 2784 天前的主题,其中的信息可能已经有所发展或是发生改变。
    代码如下:
    package main

    import (
    _ "gopkg.in/goracle.v2"
    _ "github.com/asifjalil/cli"
    "github.com/jmoiron/sqlx"
    "flag"
    "fmt"
    "github.com/axgle/mahonia"
    "strings"
    "os"
    "strconv"
    "io/ioutil"
    "database/sql"
    "bufio"
    "bytes"
    "time"
    "github.com/360EntSecGroup-Skylar/excelize"
    "runtime"
    )

    func DataGetter(db *sqlx.DB,query string,rowChan chan <- []interface{},columnChan chan <- []string){
    defer db.Close()
    row,err := db.Queryx(query)
    if err != nil{
    panic(err)
    }
    defer row.Close()
    columns,err := row.Columns()
    columnChan <- columns
    close(columnChan)
    if err !=nil {
    panic(fmt.Sprint("failed to add sheet:%s",err.Error()))
    }
    for row.Next(){
    r,err := row.SliceScan()
    if err !=nil{
    panic("db row query failed")
    }
    rowChan <- r
    }
    close(rowChan)
    }

    func ExcelWriter(sheetHead string,fileName string,rowChan <- chan[]interface{},columnChan <- chan[]string){
    cnt := 2
    sheetcnt := 1
    var r []interface{}
    columns := <- columnChan
    hasNext := true
    excel := excelize.NewFile()
    excel.NewSheet(sheetHead)
    excel.SetSheetRow(sheetHead,"A1",columns)
    //excel.SetSheetRow(sheetHead,"A"+strconv.Itoa(cnt),columns)
    for hasNext{
    r,hasNext = <- rowChan
    for a := 0;a<len(columns);a++{
    t,ok := r[a].(time.Time)
    if ok{
    if t.IsZero(){
    excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),"")
    }else{
    excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),t)
    }
    }else{
    excel.SetCellValue(sheetHead,excelize.ToAlphaString(a)+strconv.Itoa(cnt),r[a])
    }
    }
    cnt = cnt + 1
    if cnt >= 100000{
    excel.NewSheet(sheetHead+strconv.Itoa(sheetcnt))
    sheetHead = sheetHead+strconv.Itoa(sheetcnt)
    excel.SetSheetRow(sheetHead,"A1",columns)
    cnt = 2
    sheetcnt = sheetcnt + 1
    }
    }
    excel.SaveAs(fileName+".xlsx")
    }



    func getConn(dbconn string,dbtype string)(db *sqlx.DB){
    if dbtype == "oracle"{
    driver := "goracle"
    return sqlx.MustOpen(driver,strings.Replace(dbconn,":","/",1))
    }else if dbtype == "db2"{
    driver := "cli"
    userPart := strings.Split(dbconn,"@")[0]
    username := strings.Split(userPart,":")[0]
    password := strings.Split(userPart,":")[1]
    dbPart := strings.Split(dbconn,"@")[0]
    dbname := strings.Split(dbPart,"/")[1]
    dbip := strings.Split(strings.Split(dbPart,"/")[0],":")[0]
    dbport := strings.Split(strings.Split(dbPart,"/")[0],":")[1]
    connString := fmt.Sprintf("Driver={IBM DB2 ODBC Driver};Hostname=%s;Port=%s;Protocol=TCPIP;Database=%s;CurrentSchema=%s;UID=%s;PWD=%s;",
    dbip,dbport,dbname,username,password)
    return sqlx.MustOpen(driver,connString)
    }else if dbtype == "postgres"{
    driver := "postgres"
    connString := "postgres://" + dbconn
    return sqlx.MustOpen(driver,connString)
    }else{
    fmt.Println("dbtype not matched!")
    os.Exit(-1)
    return
    }
    }

    func main() {
    //输入参数解析
    dbconn := flag.String("d","",`Database connect string,use "user:password@ip:port/dbname" for db2 or "user:password@tnsname" for oracle`)
    dbtype := flag.String("t","","Database type:oracle db2 mysql mssql")
    filetype := flag.String("f","xlsx","exported file type:xlsx or txt default:xlsx")
    //xlsx require options
    sheetname := flag.String("h","Sheet1","sheet name: default Sheet1")
    //txt require options
    //charset := flag.String("c","utf-8","charset for exported text file:gbk utf-8 and so on")
    //separator := flag.String("s","/","separator: default:/")
    //sql options
    query := flag.String("q","","sql in one line")
    sqlfile := flag.String("l","","sqlfile")
    filename := flag.String("n",time.Now().Format("20060102150405"),"filename")
    flag.Parse()

    if *dbcOnn== "" || *dbtype == "" || *filetype == ""{
    flag.Usage()
    return
    }
    if *query == "" && *sqlfile == ""{
    flag.Usage()
    return
    }
    if *sqlfile != "" {
    sqlbyte,err := ioutil.ReadFile(*sqlfile)
    if err != nil{
    panic("read sqlfile failed!")
    }
    utf8 := mahonia.NewEncoder("utf-8")
    *query = utf8.ConvertString(string(sqlbyte))
    }
    runtime.GOMAXPROCS(2)
    if *filetype == "xlsx"{
    rowChan := make(chan []interface{},50000)
    columnsChan := make(chan []string)
    db := getConn(*dbconn,*dbtype)
    go DataGetter(db,*query,rowChan,columnsChan)
    ExcelWriter(*sheetname,*filename,rowChan,columnsChan)
    //}else if *filetype == "txt"{
    // db := getConn(*dbconn,*dbtype)
    // TextFileExporter(db,*charset,*separator,*filename,*query)
    //}else{
    flag.Usage()
    return
    }

    }
    2 条回复    2018-04-10 18:11:03 +08:00
    praynise
        1
    praynise  
    OP
       2018-03-27 19:01:40 +08:00
    代码说明如下:
    概述:小弟的程序可以通过输入数据库类型、数据库连接字符串、执行 sql 或者含有 sql 的文件等,将数据库中的 sql 查询出来并写入 excel
    性能:目前导出 25W 条数据,每行数据 5 个字段,数据库类型 oracle,驱动 goracle,excel 生成库使用 excelize,用时 1 分 33 秒;同环境使用 python3.6,数据库驱动 cx_Oracle,excel 生成库使用 pyexcelerate,同样的查询语句,用时 51s (是的…没干过 python/(ㄒoㄒ)/~~)
    程序描述:为了提升执行效率,小弟使用了 goruntine,一个线程专门执行 sql 并将结果生成[]interface{}并装入通道,另外一个线程不断的从通道中取出[]interface{}并写入 excel
    个人感觉可能存在的问题点:
    1、golang 的数据库查询方式只能一条一条生成,同时,机制用到了反射,而不像 python 可以通过 fetchmany 一次性获取大量数据,不知道此处是否会有性能差距
    2、当字段类型是 date 类型时,当字段为空时,如果不做 isZero 判断,输出到 excel 的日期零值很异常(值为-5XXXXX,显示为##########)。所以每取出一条[]interface{},都需要挨个判断类型是不是日期,如果是日期的话,是不是零值,此处可能会影响效率。而 python 没有这个问题

    请各位 golang 大大给提点优化意见吧,谢谢大家
    praynise
        2
    praynise  
    OP
       2018-04-10 18:11:03 +08:00
    回来填个坑,留一个自己的优化思路和成果…
    查了一些资料,使用 interface 的性能会受到较大的影响,以前的程序中使用 interface 过多,直接影响了效率
    新的思路如下
    首先查询出各字段类型,针对不同的字段类型直接例如 excel 库的方法插入对应类型的数据。
    具体为:使用一个线程查询数据库,使用[]sql.Rawbyte 作为接收数据库数据的载体,通过一个 interface{}列表进行 scan,不断将[]sql.Rawbyte 存入通道
    另外一个线程不断获取[]sql.Rawbyte 并根据字段类型进行转换。为了进一步加快速度,首先可以声明一个 map[int]func(raw sql.Rawbyte,cell *xlsx.Cell),利用字段位置直接设置好每个位置该调用什么方法
    经过测试,改进后的程序执行效率大幅度提高,导出数据效率比 python 版本快了一倍,耗时缩短一半
    关于     帮助文档     自助推广系统     博客     API     FAQ     Solana     1109 人在线   最高记录 6679       Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 31ms UTC 17:25 PVG 01:25 LAX 09:25 JFK 12:25
    Do have faith in what you're doing.
    ubao msn snddm index pchome yahoo rakuten mypaper meadowduck bidyahoo youbao zxmzxm asda bnvcg cvbfg dfscv mmhjk xxddc yybgb zznbn ccubao uaitu acv GXCV ET GDG YH FG BCVB FJFH CBRE CBC GDG ET54 WRWR RWER WREW WRWER RWER SDG EW SF DSFSF fbbs ubao fhd dfg ewr dg df ewwr ewwr et ruyut utut dfg fgd gdfgt etg dfgt dfgd ert4 gd fgg wr 235 wer3 we vsdf sdf gdf ert xcv sdf rwer hfd dfg cvb rwf afb dfh jgh bmn lgh rty gfds cxv xcv xcs vdas fdf fgd cv sdf tert sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf shasha9178 shasha9178 shasha9178 shasha9178 shasha9178 liflif2 liflif2 liflif2 liflif2 liflif2 liblib3 liblib3 liblib3 liblib3 liblib3 zhazha444 zhazha444 zhazha444 zhazha444 zhazha444 dende5 dende denden denden2 denden21 fenfen9 fenf619 fen619 fenfe9 fe619 sdf sdf sdf sdf sdf zhazh90 zhazh0 zhaa50 zha90 zh590 zho zhoz zhozh zhozho zhozho2 lislis lls95 lili95 lils5 liss9 sdf0ty987 sdft876 sdft9876 sdf09876 sd0t9876 sdf0ty98 sdf0976 sdf0ty986 sdf0ty96 sdf0t76 sdf0876 df0ty98 sf0t876 sd0ty76 sdy76 sdf76 sdf0t76 sdf0ty9 sdf0ty98 sdf0ty987 sdf0ty98 sdf6676 sdf876 sd876 sd876 sdf6 sdf6 sdf9876 sdf0t sdf06 sdf0ty9776 sdf0ty9776 sdf0ty76 sdf8876 sdf0t sd6 sdf06 s688876 sd688 sdf86