0


[Anonymous GitHub]代码下载方法,一次性全下载

  • 工具分享:

https://github.com/kynehc/clone_anonymous_github​github.com/kynehc/clone_anonymous_github

  • 注意:下载链接格式一定要确保结尾有个/

注意:下载链接格式要弄成readme之前的链接:例如下面的链接:

https://anonymous.4open.science/r/GraphCL-7105/README.md/

你应该写为:

https://anonymous.4open.science/r/GraphCL-7105/

使用:

注意原来的代码有问题,用我这个

python download.py --url https://anonymous.4open.science/r/GraphCL-7105/ --dir data/
import argparse
import time

import requests
import os
from time import sleep
import concurrent.futures

def parse_args():
    parser = argparse.ArgumentParser(description='Clone from the https://anonymous.4open.science')
    parser.add_argument('--dir', type=str, default='master',
                        help='save dir')
    parser.add_argument('--url', type=str,
                        help='target anonymous github link eg., https://anonymous.4open.science/r/840c8c57-3c32-451e-bf12-0e20be300389/')
    parser.add_argument('--max-conns', type=int, default=128,
                        help='max connections number')
    return parser.parse_args()

def dict_parse(dic, pre=None):
    pre = pre[:] if pre else []
    if isinstance(dic, dict):
        for key, value in dic.items():
            if isinstance(value, dict):
                for d in dict_parse(value, pre + [key]):
                    yield d
            else:
                yield pre + [key, value]
    else:
        yield pre + [dic]

def req_url(dl_file, max_retry=5):
    url = dl_file[0]
    save_path = dl_file[1]
    save_dir = '/'.join(save_path.split('/')[:-1])
    if not os.path.exists(save_dir) and save_dir:
        try:
            os.makedirs(save_dir)
        except OSError:
            pass

    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15"
    }
    for i in range(max_retry):
        try:
            r = requests.get(url, headers=headers)
            with open(save_path, "wb") as f:
                f.write(r.content)
            return
        except Exception as e:
            print('file request exception (retry {}): {} - {}'.format(i, e, save_path))
            sleep(0.4)

if __name__ == '__main__':
    args = parse_args()
    assert args.url, '\nPlese specifipy your target anonymous github link, \n e.g:    ' \
                     + 'python download.py --target https://anonymous.4open.science/r/840c8c57-3c32-451e-bf12-0e20be300389/'

    url = args.url
    name = url.split('/')[-2]
    max_conns = args.max_conns

    print("[*] cloning project:" + name)

    list_url = "https://anonymous.4open.science/api/repo/" + name + "/files/"
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Safari/605.1.15"
    }
    resp = requests.get(url=list_url, headers=headers)
    file_list = resp.json()

    print("[*] downloading files:")

    dl_url = "https://anonymous.4open.science/api/repo/" + name + "/file/"
    files = []
    out = []
    for file in dict_parse(file_list):
        file_path = os.path.join(*file[-len(file):-2])  # * operator to unpack the arguments out of a list
        save_path = os.path.join(args.dir, file_path)
        file_url = dl_url + file_path
        files.append((file_url, save_path))

    with concurrent.futures.ThreadPoolExecutor(max_workers=max_conns) as executor:
        future_to_url = (executor.submit(req_url, dl_file) for dl_file in files)
        for future in concurrent.futures.as_completed(future_to_url):
            time.sleep(2)
            try:
                data = future.result()
            except Exception as exc:
                data = str(type(exc))
            finally:
                out.append(data)

                print("The current file is :", str(len(out)), end="\r")

    print("[*] files saved to:" + args.dir)

本文转载自: https://blog.csdn.net/weixin_43135178/article/details/129203176
版权归原作者 HealthScience 所有, 如有侵权,请联系我们删除。

“[Anonymous GitHub]代码下载方法,一次性全下载”的评论:

还没有评论