使用python扫描遍历所有excel配置表检测是否含有某个关键词、敏感词

#-*- coding: UTF-8 -*-import xlrdimport osdef walk_all_excel(path):for root, dirs, fs in os.walk(path):for f in fs:if f.endswith('xlsx') or f.endswith('xls'):yield os.path.join(root, f)def check_excel

林新发

5510人浏览 · 2020-09-25 17:45:22

林新发 · 2020-09-25 17:45:22 发布

文章目录

一、问题

策划有个需求，需要扫描所有excel配置表，检测是否含有关键词或敏感词

二、解决办法

使用python的xlrd库，可以很方便的读取excel文件，读取每个sheet中每行每列的单元格的具体数据。

三、具体执行

1、excel配置表

如下，假设我们要扫描这三个配置表，有xls格式的，也有xlsx格式的。
在这里插入图片描述
配置表中的内容，比如像这样子

2、python环境

python和xlrd库的安装这里就不啰嗦啦。
python官网：https://www.python.org/

3、python代码

考虑到有的同学可能安装的是python2版本，我特意写了python2和python3两个版本的代码。

3.1、python3版本

import xlrd
import os

def walk_all_excel(path):
    for root, dirs, fs in os.walk(path):
        for f in fs:
            if f.endswith('xlsx') or f.endswith('xls'):
                yield os.path.join(root, f)

def check_excel(f_path, target_strs):
	#打开excel文件
    workbook = xlrd.open_workbook(f_path)
    #excel的sheet数组
    sheet_names = workbook.sheet_names()
    for sheet_name in sheet_names:
        sheet = workbook.sheet_by_name(sheet_name)
        #行数
        nrows = sheet.nrows
        for i in range(nrows):
        	#一行中的所有单元格
            blocks = sheet.row_values(i)
            col = 0
            for block in blocks:
            	#block就是一行中的一个个单元格
                if isinstance(block, str):
                    for target_str in target_strs:
                        if target_str in block:
                            print(f_path, ":",target_str,":", i+1, col+1)
                col = col + 1

if __name__ == '__main__':
    target_strs = [u'棒棒糖',u'甜甜圈',u'加粗',u'ARMv8']
    for f in walk_all_excel(u'./'):
        check_excel(f, target_strs)

3.2、python2版本

#-*- coding: UTF-8 -*- 
import xlrd
import os

def walk_all_excel(path):
    for root, dirs, fs in os.walk(path):
        for f in fs:
            if f.endswith('xlsx') or f.endswith('xls'):
                yield os.path.join(root, f)

def check_excel(f_path, target_strs):
	#打开excel文件
    workbook = xlrd.open_workbook(f_path)
    #excel的sheet数组
    sheet_names = workbook.sheet_names()
    for sheet_name in sheet_names:
        sheet = workbook.sheet_by_name(sheet_name)
        #行数
        nrows = sheet.nrows
        for i in range(nrows):
        	#一行中的所有单元格
            blocks = sheet.row_values(i)
            col = 0
            for block in blocks:
            	#block就是一行中的一个个单元格
                if isinstance(block, (unicode,str)):
                    for target_str in target_strs:
                        if target_str in block:
                            print f_path.encode('gbk'), ":",target_str,":", i+1, col+1
                col = col + 1

            
        
if __name__ == '__main__':
    target_strs = [u'棒棒糖',u'甜甜圈',u'加粗',u'ARMv8']
    for f in walk_all_excel(u'./'):
        check_excel(f, target_strs)