Source code for geograpy.utils

import gzip
import os
import shutil
import time
import urllib.request

import jellyfish


[docs] class Download: """ Utility functions for downloading data """
[docs] @staticmethod def getURLContent(url: str): with urllib.request.urlopen(url) as urlResponse: content = urlResponse.read().decode() return content
[docs] @staticmethod def getFileContent(path: str): with open(path, "r") as file: content = file.read() return content
[docs] @staticmethod def needsDownload(filePath: str, force: bool = False) -> bool: """ check if a download of the given filePath is necessary that is the file does not exist has a size of zero or the download should be forced Args: filePath(str): the path of the file to be checked force(bool): True if the result should be forced to True Return: bool: True if a download for this file needed """ if not os.path.isfile(filePath): result = True else: stats = os.stat(filePath) size = stats.st_size result = force or size == 0 return result
[docs] @staticmethod def downloadBackupFile( url: str, fileName: str, targetDirectory: str, force: bool = False ): """ Downloads from the given url the zip-file and extracts the file corresponding to the given fileName. Args: url: url linking to a downloadable gzip file fileName: Name of the file that should be extracted from gzip file targetDirectory(str): download the file this directory force (bool): True if the download should be forced Returns: Name of the extracted file with path to the backup directory """ extractTo = f"{targetDirectory}/{fileName}" # we might want to check whether a new version is available if Download.needsDownload(extractTo, force=force): if not os.path.isdir(targetDirectory): os.makedirs(targetDirectory) zipped = f"{extractTo}.gz" print(f"Downloading {zipped} from {url} ... this might take a few seconds") urllib.request.urlretrieve(url, zipped) print(f"Unzipping {extractTo} from {zipped}") with gzip.open(zipped, "rb") as gzipped: with open(extractTo, "wb") as unzipped: shutil.copyfileobj(gzipped, unzipped) print("Extracting completed") if not os.path.isfile(extractTo): raise (f"could not extract {fileName} from {zipped}") return extractTo
[docs] class Profiler: """ simple profiler """ def __init__(self, msg, profile=True): """ construct me with the given msg and profile active flag Args: msg(str): the message to show if profiling is active profile(bool): True if messages should be shown """ self.msg = msg self.profile = profile self.starttime = time.time() if profile: print(f"Starting {msg} ...")
[docs] def time(self, extraMsg=""): """ time the action and print if profile is active """ elapsed = time.time() - self.starttime if self.profile: print(f"{self.msg}{extraMsg} took {elapsed:5.1f} s") return elapsed
[docs] def remove_non_ascii(s): """ Remove non ascii chars from the given string Args: s: string: The string to remove chars from Returns: string: The result string with non-ascii chars removed Hat tip: http://stackoverflow.com/a/1342373/2367526 """ return "".join(i for i in s if ord(i) < 128)
[docs] def fuzzy_match(s1, s2, max_dist=0.8): """ Fuzzy match the given two strings with the given maximum distance jellyfish jaro_winkler_similarity based on https://en.wikipedia.org/wiki/Jaro-Winkler_distance Args: s1: string: First string s2: string: Second string max_dist: float: The distance - default: 0.8 Returns: True if the match is greater equals max_dist. Otherwise false """ return jellyfish.jaro_winkler_similarity(s1, s2) >= max_dist