Python小练习

一个小工具,发现有一个开源的诗词库( https://github.com/michaelliao/shici/tree/master/web/src/main/resources/text  ),考虑导入到数据库中,诗词库是通过文件夹方式组织,所以用python发现很快的实现这个需求,主要功能包括递归枚举文件夹,然后将文件内容读出来写到数据库中。


代码记录如下:

 
#导入依赖库
 
import os,sys  
from datetime import datetime
import pymysql
 
 
 
def readpoem(path):
    str=''
    f = open(path, 'r')               
    line = f.readline()                 
    while line: 
        oldline = line
        line = f.readline() 
        if 'form' in oldline:
            continue
        if 'tags' in oldline:
            continue
        if oldline.strip()=='':
            continue
        #oldline=oldline.strip('\n')
        str += oldline 
    f.close() 
    return str
 
def readmeta(path):
    str=''
    f = open(path, 'r')               
    line = f.readline()                 
    while line: 
        oldline = line
        line = f.readline() 
        if 'birth' in oldline:
            continue
        if 'death' in oldline:
            continue
        if oldline.strip()=='':
            continue
        oldline=oldline.strip('\n')
        str += oldline 
    f.close() 
    return str
 
def listdir(path, list_result):  
    result={}
    containFile=False
    for file in os.listdir(path):  
        file_path = os.path.join(path, file)  
        if os.path.isdir(file_path):  
            listdir(file_path, list_result)  
        elif os.path.splitext(file_path)[1]=='.txt':
            if 'meta' in file_path:
                meta = readmeta(file_path)
                if meta.strip()!='':
                    result['meta']=meta 
            else:
                #filepath,fullflname = os.path.split(file_path)
                #fname,ext = os.path.splitext(fullflname)
                containFile=True 
 
                content = readpoem(file_path)
                if result.get('poem', None) is None:
                    result['poem']=[]
                poem_detail={}
 
                if content.strip()!='':
                    poem_detail['content'] =content.replace('\'', '')
 
                #Get the poem name
                poem_name=os.path.splitext(file_path)[0]
                poem_temp_array = poem_name.split('/')
                poem_titles = poem_temp_array[-3:]
                index = 0
                for title in poem_titles:
                    if poem_detail.get('title', None) is None:
                        poem_detail['title'] = '[{0}]'.format(title.split('.')[1].replace('\'', ''))
                    else:
                        if index == 1:
                            poem_detail['title'] += title.strip()
                        else:
                            poem_detail['title'] += '-{0}'.format(title.strip())                            
                    index += 1
 
                #wirite the poem detail info
                result['poem'].append(poem_detail);
                #print('name:{0}, content:{1}'.format(poem_detail['title'], poem_detail['content']))
            #list_result.append(file_path)
        else:
            pass
    if containFile:
        print('poem.numb:{0}'.format(len(result['poem'])))
    else:
        pass
    if len(result) >0:
        list_result.append(result)
    else:
        pass
 
 
 
defaultencoding = 'utf-8'
if sys.getdefaultencoding() != defaultencoding:
    reload(sys)
    sys.setdefaultencoding(defaultencoding)
 
result_poems=[]
listdir(sys.path[0], result_poems)
 
print("size:%d" %len(result_poems))
#for item in result_file:
#    print(item.get('meta', ''))
#    for poem in item['poem']:
#        print('name:{0}, content:{1}'.format(poem['title'], poem['content']))
 
 
 
#写入数据库
 
#from datetime import datetime
#connect db
conn = pymysql.connect(host='localhost', port=3306, user='root', passwd='python', db='myschool', charset='utf8')
 
cur = conn.cursor() 
 
dt=datetime.now()  
now =  dt.strftime( '%Y%m%d%H' )
 
#注意转义符
sql = "insert into shici(`name`,`content`,`time`) values (\'{0}\', \'{1}\', {2})"
 
 
#for item in result['poem']:
#    insert_sql = match.format(item['title'], item['content'], )
#conn.close()
try:
    for item in result_poems:
            print(item.get('meta', ''))
            for poem in item['poem']:
                    print('name:{0}, content:{1}'.format(poem['title'], poem['content']))
            insert_sql = sql.format(poem['title'].encode('utf-8'), poem['content'].encode('utf-8'), now)
            cur.execute(insert_sql)
            conn.commit()
except:
    conn.rollback()
 
 
conn.commit()
conn.close()


白俊遥博客

请先登录后发表评论
  • 最新评论
  • 总共0条评论