Python将两个EXCEL去除(或保留)重复数据

import openpyxl as vb
from openpyxl import load_workbook

#此函数可以获取excel整行数据
def get_row_value(ws,row):
    col_num = ws.max_column
    row_data = []
    for i in range(1,col_num+1):
        cell_value = ws.cell(row=row, column=i).value
        row_data.append(cell_value)
    return row_data

wblist_1 = []
wb1 = vb.load_workbook('data_all.xlsx')
wbsheet_1 = wb1.active
for cell in wbsheet_1['A']:    #只按照第一列筛选重复值,可以根据需求改变列号
    wblist_1.append(str(cell.value).strip())

wblist_2 = []
wb2 = vb.load_workbook('test1.xlsx')
wbsheet_2 = wb2.active
for cell in wbsheet_2['A']:
    wblist_2.append(str(cell.value).strip())

wb3 = vb.Workbook()
wbsheet_3 = wb3.active

j = 1
row_count = 0
for i in wblist_2:
    row_count += 1
    if i not in wblist_1:   #如果要保留重复项只需要去掉not
        wbsheet_3.append(get_row_value(wbsheet_2,row_count))
        j += 1

wb3.save('result.xlsx')

实际上只要掌握excel文件的读取和创建格式,可以实现很多excel的操作。


版权声明:本文为Nuemann_N21原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。