Nin*_*ail 9 python regex file-io python-2.7
在这里得到一些帮助,我的工作几乎完全符合我的要求.现在我需要能够在比较文件之前添加从文件中删除数据的功能.
原因是每次保存文件时我知道删除的字符串"数据"都不同.
我写了一个正则表达式来选择我要删除的确切文本,但是我在使用当前代码时无法实现它.
以下是三个主要功能
HOSTNAME_RE = re.compile(r'hostname +(\S+)')
def get_file_info_from_lines(filename, file_lines):
hostname = None
a_hash = hashlib.sha1()
for line in file_lines:
a_hash.update(line.encode('utf-8'))
match = HOSTNAME_RE.match(line)
if match:
hostname = match.group(1)
return hostname, filename, a_hash.hexdigest()
def get_file_info(filename):
if filename.endswith(('.cfg', '.startup', '.confg')):
with open(filename, "r+") as in_file:
#filename = re.sub(REMOVE_RE, subst, filename, 0, re.MULTILINE)
return get_file_info_from_lines(filename, in_file.readlines())
def hostname_parse(directory):
results = {}
i = 0
l = len(os.listdir(directory))
for filename in os.listdir(directory):
filename = os.path.join(directory, filename)
sleep(0.001)
i += 1
progress_bar(i, l, prefix = 'Progress:', suffix = 'Complete', barLength = 50)
info = get_file_info(filename)
if info is not None:
results[info[0]] = info
return results
Run Code Online (Sandbox Code Playgroud)
这是用于查找要删除的字符串的正则表达式.
REMOVE_RE = r"((?:\bCurrent configuration)(?:.*\n?){6})"
subst = ""
Run Code Online (Sandbox Code Playgroud)
EXAMPLE_FILE_BEFORE_DATA_REMOVED:
Building configuration...
Current configuration : 45617 bytes
!
! Last configuration change at 00:22:36 UTC Sun Jan 22 2017 by user
! NVRAM config last updated at 00:22:43 UTC Sun Jan 22 2017 by user
!
version 15.0
no service pad
!
no logging console
enable secret 5 ***encrypted password***
!
username admin privilege 15 password 7 ***encrypted password***
username sadmin privilege 15 secret 5 ***encrypted password***
aaa new-model
!
ip ftp username ***encrypted password***
ip ftp password 7 ***encrypted password***
ip ssh version 2
!
line con 0
password 7 ***encrypted password***
login authentication maint
line vty 0 4
password 7 ***encrypted password***
length 0
transport input ssh
line vty 5 15
password 7 ***encrypted password***
transport input ssh
!
Run Code Online (Sandbox Code Playgroud)
EXAMPLE_FILE_AFTER_DATA_REMOVED:
Building configuration...
!
no service pad
!
no logging console
enable
!
username admin privilege 15
username gisadmin privilege 15
aaa new-model
!
ip ftp username cfgftp
ip ftp
ip ssh version 2
!
line con 0
login authentication maint
line vty 0 4
length 0
transport input ssh
line vty 5 15
transport input ssh
!
Run Code Online (Sandbox Code Playgroud)
我已经尝试在get_file_info和get_file_info_from_lines中执行类似#filename = re.sub(REMOVE_RE,subst,filename,0,re.MULTILINE)之类的操作,但我显然没有正确实现它.
任何帮助都会受到赞赏,因为我只是在学习.
运行比较:
results1 = hostname_parse('test1.txt')
results2 = hostname_parse('test2.txt')
for hostname, filename, filehash in results1.values():
if hostname in results2:
_, filename2, filehash2 = results2[hostname]
if filehash != filehash2:
print("%s has a change (%s, %s)" % (
hostname, filehash, filehash2))
print(filename)
print(filename2)
print()
Run Code Online (Sandbox Code Playgroud)
我不想修改当前文件.如果所有这些都可以在内存中完成,或者临时文件会很棒.
完整代码:
import hashlib
import os
import re
HOSTNAME_RE = re.compile(r'hostname +(\S+)')
REMOVE_RE = re.compile(r"((?:\bCurrent configuration)(?:.*\n?){6})")
def get_file_info_from_lines(filename, file_lines):
hostname = None
a_hash = hashlib.sha1()
for line in file_lines:
#match = HOSTNAME_RE.match(line)
if not re.match(REMOVE_RE, line):
a_hash.update(line.encode('utf-8'))
#=======================================================================
# if match:
# hostname = match.group(1)
#=======================================================================
return hostname, filename, a_hash.hexdigest()
def get_file_info(filename):
if filename.endswith(('.cfg', '.startup', '.confg')):
with open(filename, "r+") as in_file:
return get_file_info_from_lines(filename, in_file.readlines())
def hostname_parse(directory):
results = {}
for filename in os.listdir(directory):
filename = os.path.join(directory, filename)
info = get_file_info(filename)
if info is not None:
results[info[0]] = info
return results
results1 = hostname_parse('test1') #Directory of test files
results2 = hostname_parse('test2') #Directory of test files 2
for hostname, filename, filehash in results1.values():
if hostname in results2:
_, filename2, filehash2 = results2[hostname]
if filehash != filehash2:
print("%s has a change (%s, %s)" % (
hostname, filehash, filehash2))
print(filename)
print(filename2)
print()
Run Code Online (Sandbox Code Playgroud)
我能够找到解决正则表达式的方法。我只是通过匹配行来删除行。
def get_file_info_from_lines(filename, file_lines):
hostname = None
a_hash = hashlib.sha1()
for line in file_lines:
if "! Last " in line:
line = ''
if "! NVRAM " in line:
line = ''
a_hash.update(line.encode('utf-8'))
match = HOSTNAME_RE.match(line)
if match:
hostname = match.group(1)
Run Code Online (Sandbox Code Playgroud)