go 协程比 Python 多进程快好多! - V2EX
V2EX = way to explore
V2EX 是一个关于分享和探索的地方
Sign Up Now
For Existing Member  Sign In
请不要在回答技术问题时复制粘贴 AI 生成的内容
777777

go 协程比 Python 多进程快好多!

  •  
  •   777777 Dec 13, 2023 1391 views
    This topic created in 868 days ago, the information mentioned may be changed or developed.

    需求:2 个 list(alist,blist),alist 每个值与 blist 每个值做字符串相似度计算,两个 list 数量级为 20 万 下面为 python 和 go 的代码片段 python:

    # 计算两个字符串的相似度 def similar(a, b): """计算两个字符串的相似度。如果有一个是 None ,则返回 0 。""" if a is None or b is None: return 0 similarity = fuzz.token_set_ratio(a, b.lower()) / 100 print("similar a:", a, ",", "b:", b, ", similarity:", similarity) return similarity def compute_similarity(args): record_name, name = args return similar(record_name, name), name # 更新数据库记录 def update_database(cursor, name_mapping, csv_data): update_sql = "UPDATE tweb_fingerprint_test SET factory = %s WHERE uuid = %s" num = 0 # 创建一个反向映射,使我们可以快速地通过名称查找 UUID name_to_uuid = defaultdict(list) for uuid, names in name_mapping.items(): for name in names: if name: # 检查 name 是否为 None 或空 name_to_uuid[name].append(uuid) updates = [] with ProcessPoolExecutor() as executor: for row in csv_data: vendor_name = row.get("vendor") record_name = row.get("name") print("record_name:", record_name) if ( vendor_name is None or record_name is None or vendor_name in ["未知", "None"] ): continue # 跳过这行数据 # 直接查找名称 uuids_to_update = name_to_uuid.get(record_name, []) # 如果没有直接匹配,尝试查找相似度超过 98%的名称 if not uuids_to_update: tasks = [(record_name, name) for name in name_to_uuid] results = executor.map(compute_similarity, tasks) uuids_to_update.extend( name_to_uuid[name] for similarity, name in results if similarity > 0.98 ) # 如果找到 UUID ,加入到更新列表中 for uuid_to_update in uuids_to_update: updates.append((vendor_name, uuid_to_update)) # 批量更新 if updates: cursor.executemany(update_sql, updates) num = len(updates) # 返回更新的记录数 return num 

    go:

    // Similar calculates the similarity between two strings func Similar(a, b string) float64 { return smetrics.JaroWinkler(a, b, 0.7, 4) } // UpdateDatabase updates the database with the new vendor information // UpdateDatabase updates the database with the new vendor information func UpdateDatabase(db *sql.DB, vendors map[string]Vendor, records []CSVRecord) (int, error) { fmt.Println("records", len(records)) fmt.Println("vendors", len(vendors)) stmt, err := db.Prepare("UPDATE tweb_fingerprint SET factory = ? WHERE uuid = ?") if err != nil { return 0, err } defer stmt.Close() var wg sync.WaitGroup updates := make(chan Updatedata, len(records)) for _, record := range records { wg.Add(1) go func(record CSVRecord) { defer wg.Done() // fmt.Println(record.Name) for _, vendor := range vendors { if record.Name == vendor.Name.String || Simila(record.Name, vendor.Name.String) > SimilarityThreshold { updates <- Updatedata{ UUID: vendor.UUID, Factory: record.Vendor, } } } }(record) } go func() { wg.Wait() close(updates) }() count := 0 for update := range updates { fmt.Println("update:", update) if _, err := stmt.Exec(update.Factory, update.UUID); err != nil { return count, err } count++ } return count, nil } 
    Baloneo
        1
    Baloneo  
       Dec 13, 2023
    快多少?
    777777
        2
    777777  
    OP
       Dec 13, 2023
    @Baloneo 至少 10 倍吧,python CPU 都打不满,没跑完我就重构成 go 了,go 十分钟就跑完了
    About     Help     Advertise     Blog     API     FAQ     Solana     3223 Online   Highest 6679       Select Language
    创意工作者们的社区
    World is powered by solitude
    VERSION: 3.9.8.5 31ms UTC 13:28 PVG 21:28 LAX 06:28 JFK 09:28
    Do have faith in what you're doing.
    ubao msn snddm index pchome yahoo rakuten mypaper meadowduck bidyahoo youbao zxmzxm asda bnvcg cvbfg dfscv mmhjk xxddc yybgb zznbn ccubao uaitu acv GXCV ET GDG YH FG BCVB FJFH CBRE CBC GDG ET54 WRWR RWER WREW WRWER RWER SDG EW SF DSFSF fbbs ubao fhd dfg ewr dg df ewwr ewwr et ruyut utut dfg fgd gdfgt etg dfgt dfgd ert4 gd fgg wr 235 wer3 we vsdf sdf gdf ert xcv sdf rwer hfd dfg cvb rwf afb dfh jgh bmn lgh rty gfds cxv xcv xcs vdas fdf fgd cv sdf tert sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf sdf shasha9178 shasha9178 shasha9178 shasha9178 shasha9178 liflif2 liflif2 liflif2 liflif2 liflif2 liblib3 liblib3 liblib3 liblib3 liblib3 zhazha444 zhazha444 zhazha444 zhazha444 zhazha444 dende5 dende denden denden2 denden21 fenfen9 fenf619 fen619 fenfe9 fe619 sdf sdf sdf sdf sdf zhazh90 zhazh0 zhaa50 zha90 zh590 zho zhoz zhozh zhozho zhozho2 lislis lls95 lili95 lils5 liss9 sdf0ty987 sdft876 sdft9876 sdf09876 sd0t9876 sdf0ty98 sdf0976 sdf0ty986 sdf0ty96 sdf0t76 sdf0876 df0ty98 sf0t876 sd0ty76 sdy76 sdf76 sdf0t76 sdf0ty9 sdf0ty98 sdf0ty987 sdf0ty98 sdf6676 sdf876 sd876 sd876 sdf6 sdf6 sdf9876 sdf0t sdf06 sdf0ty9776 sdf0ty9776 sdf0ty76 sdf8876 sdf0t sd6 sdf06 s688876 sd688 sdf86