微信搜索superit|邀请体验:大数据, 数据管理、OLAP分析与可视化平台 | 赞助作者:赞助作者

vtt2lrcv2

 

import os
import json

def time_to_milliseconds(time_str):
    minutes, rest = time_str.split(':')
    seconds, milliseconds = rest.split('.')

    return int(minutes) * 60000 + int(seconds) * 1000 + int(milliseconds)

# 处理时间格式
def deal_time(timeStamp=''):
    if len(timeStamp) < 1:
        return None
    ttt = timeStamp.split("-->")

    startTime = ttt[0].strip()
    endTime = ttt[1].strip()

    times = startTime.split(":")
    times[-1] = times[-1][0: 5]

    timee = endTime.split(":")
    timee[-1] = timee[-1][0: 5]

    # print([startTime, endTime])
    # print(times)
    # print(timee)

    ans = "{}:{}".format(times[0], times[1])
    anEnd = "{}:{}".format(timee[0], timee[1])
    # print(ans)

    return time_to_milliseconds(ans),time_to_milliseconds(anEnd)

# 进行转换
def convert_vtt_to_lrc(vtt_file_path):
    lines = []
    # 打开vtt文件进行读取
    with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
        # 读取所有行
        lines = vtt_file.readlines()

    # 创建对应的lrc文件名
    ppp = os.path.splitext(vtt_file_path)
    print(ppp)
    lrc_file_path = ppp[0] + '.txt'
    print(lrc_file_path)
    # lrc_file_path = "{}{}".format(lrc_file_path.split(".")[0], ".lrc")
    # print(lrc_file_path)
    # return
    timeStampStart = ''
    timeStampEnd = ''
    arrJson = []
    # 打开lrc文件进行写入
    with open(lrc_file_path, 'w', encoding='utf-8') as lrc_file:
        for line in lines:
            if line.strip().isnumeric():
                continue
            if line.strip() == "\n":
                continue

            # 如果行以时间戳格式开始,则跳过(vtt文件中的时间戳行不需要复制到lrc文件)
            if line.strip().startswith(('00:', '01:', '02:', '03:', '04:', '05:', '06:', '07:', '08:', '09:')):
                timeStampStart,timeStampEnd = deal_time(line.strip())
                print(timeStampStart)
                continue

            # 将其他行写入lrc文件
            arrJson.append({
                      "startTime": timeStampStart,
                      "endTime": timeStampEnd,
                      "detail": line,
                      "role": "A",
                    })

            # print(timeStamp + line)
            timeStampStart = ""
            timeStampEnd = ""

        lrc_file.write(json.dumps(arrJson, ensure_ascii=False) + '\n')

    # remove_blank_lines(vtt_file_path)

def convert_vtt_files_to_lrc(directory):
    # 获取目录下的所有文件和文件夹
    for root, dirs, files in os.walk(directory):
        for file in files:
            # 拼接文件的完整路径
            file_path = os.path.join(root, file)
            # 如果文件是vtt文件,则进行转换
            if file.endswith('.vtt'):
                print(f"Converting {file} to LRC...")
                convert_vtt_to_lrc(file_path)
            # elif file.endswith('.lrc'):
            #     cut(file_path)

# 测试程序
if __name__ == "__main__":
    # 输入要处理的目录路径
    directory_path = input("请输入要处理的目录路径:")
    # 调用函数将vtt文件转换为lrc文件
    convert_vtt_files_to_lrc(directory_path)
    print("Conversion completed!")

 

 

 

import os

# 处理时间格式
def deal_time(timeStamp=''):
    if len(timeStamp) < 1:
        return None

    startTime = timeStamp.split("-->")[0].strip()
    times = startTime.split(":")
    times[-1] = times[-1][0: 5]
    print(startTime)
    print(times)
    ans = "[{}:{}]".format(times[0], times[1])
    # print(ans)
    return ans

# 进行转换
def convert_vtt_to_lrc(vtt_file_path):
    lines = []
    # 打开vtt文件进行读取
    with open(vtt_file_path, 'r', encoding='utf-8') as vtt_file:
        # 读取所有行
        lines = vtt_file.readlines()

    # 创建对应的lrc文件名
    lrc_file_path = os.path.splitext(vtt_file_path)[0].strip('.mp3') + '.lrc'
    print(lrc_file_path)
    # lrc_file_path = "{}{}".format(lrc_file_path.split(".")[0], ".lrc")
    # print(lrc_file_path)
    # return
    timeStamp = ''
    # 打开lrc文件进行写入
    with open(lrc_file_path, 'w', encoding='utf-8') as lrc_file:
        for line in lines:
            if line.strip().isnumeric():
                continue

            # 如果行以时间戳格式开始,则跳过(vtt文件中的时间戳行不需要复制到lrc文件)
            if line.strip().startswith(('00:', '01:', '02:', '03:', '04:', '05:', '06:', '07:', '08:', '09:')):
                timeStamp = deal_time(line.strip())
                print(timeStamp)
                continue

            # 将其他行写入lrc文件
            lrc_file.write(timeStamp + line)
            # print(timeStamp + line)
            timeStamp = ""

    # remove_blank_lines(vtt_file_path)

def convert_vtt_files_to_lrc(directory):
    # 获取目录下的所有文件和文件夹
    for root, dirs, files in os.walk(directory):
        for file in files:
            # 拼接文件的完整路径
            file_path = os.path.join(root, file)
            # 如果文件是vtt文件,则进行转换
            if file.endswith('.vtt'):
                print(f"Converting {file} to LRC...")
                convert_vtt_to_lrc(file_path)
            # elif file.endswith('.lrc'):
            #     cut(file_path)

# 测试程序
if __name__ == "__main__":
    # 输入要处理的目录路径
    directory_path = input("请输入要处理的目录路径:")
    # 调用函数将vtt文件转换为lrc文件
    convert_vtt_files_to_lrc(directory_path)
    print("Conversion completed!")

 

转载请注明:SuperIT » vtt2lrcv2

喜欢 (0)or分享 (0)

您必须 登录 才能发表评论!