Source code for concordancer.demo

import os
import pathlib
import zipfile
import urllib.request

DEMO_DATA = 'https://raw.githubusercontent.com/liao961120/concordancer/main/test-data/demo_corpus.jsonl.zip'


[docs]def download_demo_corpus(to:str='.'): """Dowload demo corpus data Parameters ---------- to : str, optional Path to the directory to save the corpus, by default '.' Returns ------- str File path to the corpus file ``demo_corpus.jsonl`` Notes ----- Demo data `download link <https://raw.githubusercontent.com/liao961120/concordancer/main/test-data/demo_corpus.jsonl.zip>`_ """ fp = DEMO_DATA.split('/')[-1] tgt_dir = pathlib.Path(to).expanduser() # Download data urllib.request.urlretrieve(DEMO_DATA, fp) # Extract zip file with zipfile.ZipFile(fp, 'r') as zip_ref: zip_ref.extractall(tgt_dir) os.remove(fp) out_fp = (tgt_dir / fp.strip('.zip')).absolute() print(f"Corpus downloaded to {out_fp}") return str(out_fp)