其他分享
首页 > 其他分享> > 获取所有图片的MD5值,并根据MD5值去重整合

获取所有图片的MD5值,并根据MD5值去重整合

作者:互联网

# filedeal.py
#!/usr/bin/env python
# -*- coding:utf-8 -*-

import os
import shutil
from PIL import Image
import io
import requests
import datetime
import hashlib
import time

# 获取所有文件
def getAllFiles(fire_dir):
    filepath_list = []
    for root,folder_names,file_names in os.walk(fire_dir):
        for file_name in file_names:
            file_path = root+os.sep+file_name
            filepath_list.append(file_path)
            print(file_path)
    print(filepath_list)
    return filepath_list



# 获取图片的像素
def getPicsize(pic_file):
    pic_file =pic_file
    img = Image.open(pic_file)
    w = img.width
    h =img.height
    geshi = img.format

    image_size = os.path.getsize(pic_file)
    print(image_size)

    print(w)
    print(h)
    return w,h,image_size

def getBaseName(file_name):
    file_base_name = os.path.basename(file_name)
    return  file_base_name

def getNewName(old_file_name):
    file_base_name = os.path.basename(old_file_name)
    timestrhaomiao = datetime.datetime.now().strftime('%Y%m%d%H%M%S%f_')
    new_file_name = old_file_name.split(file_base_name)[0]+timestrhaomiao+file_base_name
    print(new_file_name)
    return new_file_name

#获取文件md5值
def getmd5(file):
    if not os.path.isfile(file):
        return
    fd = open(file,'rb')
    md5 = hashlib.md5()
    md5.update(fd.read())
    fd.close()
    return md5.hexdigest()


#将所有文件打出md5值,并进行md5值排序
#src为文件夹
def paixuMd5(src):
    all_with_many_file_list = []
    md5_not_same_list = []
    md5_same_list_all =[]
    all_file_with_md5_list = []
    all_file_with_not_same_md5_list = []
    all_file_with_same_md5_list = []
    #然后再次获取所有文件内容
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        one_f_md5_list = []
        f_md5 = getmd5(f)
        one_f_md5_list.append(f_md5)
        one_f_md5_list.append(f)
        all_file_with_md5_list.append(one_f_md5_list)
        if f_md5 not in md5_not_same_list:
            md5_not_same_list.append(f_md5)
            all_file_with_not_same_md5_list.append(one_f_md5_list)
        else:
            all_file_with_same_md5_list.append(one_f_md5_list)
        print("-----------------")

    for j in range(0,len(all_file_with_same_md5_list)):
        one_with_many_file_list = []
        one_same_f_md5 = all_file_with_same_md5_list[j][0]
        one_same_f = all_file_with_same_md5_list[j][1]
        for i in range(0,len(all_file_with_not_same_md5_list)):
            not_same_f_md5 = all_file_with_not_same_md5_list[i][0]
            not_same_f = all_file_with_not_same_md5_list[i][1]
            if one_same_f_md5 == not_same_f_md5:
                one_with_many_file_list.append(not_same_f_md5)
                one_with_many_file_list.append(not_same_f)
                one_with_many_file_list.append(one_same_f)
                all_with_many_file_list.append(one_with_many_file_list)
                break
        print("........")


    print("all_with_many_file_list:")
    print(all_with_many_file_list)
    print(len(all_with_many_file_list))

    with open("youchong.txt",'w',encoding="utf-8") as f:
        for one_list in all_with_many_file_list:
            print(one_list)
            f.write(str(one_list)+"\n")

    #将目录中相同的图片移动到一个目录下
    dst2_list = [src + "\\相同"]
    for one_dst2 in dst2_list:
        if not os.path.isdir(one_dst2):
            os.mkdir(one_dst2)

    #移动相同图片到同一个目录下:
    for one_list in all_file_with_same_md5_list:
        one_f = one_list[1]
        shutil.move(one_f, src + "\\相同")

    return all_file_with_md5_list












#对文件重命名并且分类
def deal_file(src,dst1,dst2,dst3):
    # 区分jpg和mp4
    mp4 = []
    jpg = []
    png = []
    jpeg = []
    qita=[]
    #先给所有文件重命名
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        old_file_name =f
        new_file_name = getNewName(old_file_name)
        os.rename(old_file_name,new_file_name)

    #然后再次获取所有文件内容
    filepath_list = getAllFiles(src)
    for f in filepath_list:
        print(f)
    # for f in os.listdir(src):
    #根据具体需求更改后缀识别参数(.mp4和jpg等)
        if f.endswith('.mp4'):
            mp4.append(f)
        elif f.endswith('.jpg'):
            jpg.append(f)
        elif f.endswith('.png'):
            png.append(f)
        elif f.endswith('.jpeg'):
            jpeg.append(f)
        else:
            qita.append(f)
    # 创建目标文件夹
    if not os.path.isdir(dst1):
        os.mkdir(dst1)
    dst2_list = [dst2,
                 dst2 + "\\横图",
                 dst2 + "\\横图\\大于等于1M",
                 dst2 + "\\横图\\小于1M",
                 dst2 + "\\竖图",
                 dst2 + "\\竖图\\大于等于1M",
                 dst2 + "\\竖图\\小于1M"

    ]
    for one_dst2 in dst2_list:
        if not os.path.isdir(one_dst2):
            os.mkdir(one_dst2)


    if not os.path.isdir(dst3):
        os.mkdir(dst3)
    # 拷贝文件到目标文件夹
    for m in mp4:
        try:
            _mp4 = os.path.join(src,m)
            shutil.move(_mp4,dst1)
        except Exception as e:
            print(e)
    for j in jpg:
        try:
            _jpg = os.path.join(src,j)
            w,h,image_size = getPicsize(pic_file=_jpg)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_jpg,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_jpg, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_jpg, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_jpg, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for p in png:
        try:
            _png = os.path.join(src,p)
            w,h,image_size = getPicsize(pic_file=_png)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_png,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_png, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_png, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_png, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for jp in jpeg:
        try:
            _jpeg = os.path.join(src,jp)
            w,h,image_size = getPicsize(pic_file=_jpeg)
            if w>h:
                if image_size < 1024000:
                    shutil.move(_jpeg,dst2+"\\横图\\小于1M")
                else:
                    shutil.move(_jpeg, dst2 + "\\横图\\大于等于1M")
            else:
                if image_size < 1024000:
                    shutil.move(_jpeg, dst2 + "\\竖图\\小于1M")
                else:
                    shutil.move(_jpeg, dst2 + "\\竖图\\大于等于1M")
        except Exception as e:
            print(e)

    for q in qita:
        try:
            _qita = os.path.join(src,q)
            shutil.move(_qita,dst3)
        except Exception as e:
            print(e)

if __name__ == "__main__":
    file = r"F:\存储盘\其他\people\image"
    paixuMd5(src=file)

 

标签:list,值去,same,dst2,获取,file,MD5,os,md5
来源: https://www.cnblogs.com/jingzaixin/p/16536399.html