データセット処理のpython生成.lstファイル

2249 ワード

# -- coding:utf-8 --
import fnmatch
import os
import pandas as pd
import numpy as np

def mergeFile():
    file1 = open("2.lst", "r",encoding='UTF-8')
    file2 = open("1.lst", "r",encoding='UTF-8')
    file_list1 = file1.readlines()  #          file_list1
    file_list2 = file2.readlines()  #          file_list2
    file_list=[]
    for i in range(file_list1.__len__()):
        a=str(file_list1[i])
        a=a.replace('
','').replace('\\','/') b = str(file_list2[i]) b = b.replace('
','').replace('\\', '/').replace('goundTruth','groundTruth') file_list.append(a + ' ' + b) df = pd.DataFrame(file_list, columns=['one']) df.to_csv('trian.lst', columns=['one'], index=False, header=False) # file = open("train_pair.lst", "w") # file.writelines(file_list) file1.close() file2.close() # file.close() def ReadSaveAddr(Stra,Strb): df = pd.DataFrame(np.arange(0).reshape(0,1),columns=['Addr']) print(df) path = InputStra for dirpath,dirnames,filenames in os.walk(path): filenames_len=filenames.__len__() for i in range(filenames_len): filenames[i]=filenames[i][:-4] # a_list = fnmatch.filter(os.listdir(dirpath),Strb) if filenames_len: dft = pd.DataFrame(np.arange(filenames_len).reshape((filenames_len,1)),columns=['Addr']) dft.Addr = filenames dft.Addr = dirpath.replace('D:/qq_file/2275316862/FileRecv/','') + '/' + dft.Addr # frames = [df,dft] df = pd.concat(frames) print(df.shape) df.to_csv('2.lst',columns=['Addr'],index=False,header=False)#***.lst , print("Write To Get.lst !") if __name__ == '__main__': #InputStra="D:/qq_file/2275316862/FileRecv/data/data/train/trainingset"# InputStra="D:/qq_file/2275316862/FileRecv/test" InputStrb="*.png" ReadSaveAddr(InputStra,InputStrb) #mergeFile()