Source code for geograpy.places

from collections import Counter

from geograpy.locator import City, Locator, Region

from .utils import fuzzy_match, remove_non_ascii

"""
Takes a list of place names and works place designation (country, region, etc) 
and relationships between places (city is inside region is inside country, etc)
"""


[docs] class PlaceContext(Locator): """ Adds context information to a place name """ def __init__( self, place_names: list, setAll: bool = True, correctMisspelling: bool = False ): """ Constructor Args: place_names: list: The place names to check setAll: boolean: True if all context information should immediately be set db_file: string: Path to the database file to be used - if None the default "locs.db" will be used """ super().__init__() self.correctMisspelling = correctMisspelling self.places = self.normalizePlaces(place_names) if setAll: self.setAll() def __str__(self): """ Return a string representation of me """ text = "countries=%s\nregions=%s\ncities=%s\nother=%s" % ( self.countries, self.regions, self.cities, self.other, ) return text
[docs] def getRegions(self, countryName: str) -> list: """ get a list of regions for the given countryName countryName(str): the countryName to check """ regions = [] queryString = """SELECT r.* FROM COUNTRIES c JOIN regions r ON r.countryId=c.wikidataid WHERE c.name=(?)""" params = (countryName,) regionRecords = self.sqlDB.query(queryString, params) for regionRecord in regionRecords: region = Region.fromRecord(regionRecord) regions.append(region) return regions
[docs] def get_region_names(self, countryName: str) -> list: """ get region names for the given country Args: countryName(str): the name of the country """ if self.correctMisspelling: countryName = self.correct_country_misspelling(countryName) regionOfCountryQuery = """SELECT name FROM regions WHERE countryId IN ( SELECT wikidataid FROM countries WHERE name LIKE (?) OR wikidataid IN ( SELECT wikidataid FROM country_labels WHERE label LIKE (?) ) )""" regionRecords = self.sqlDB.query( regionOfCountryQuery, params=( countryName, countryName, ), ) return [r.get("name") for r in regionRecords]
[docs] def setAll(self): """ Set all context information """ self.set_countries() self.set_regions() self.set_cities() self.set_other()
[docs] def set_countries(self): """ get the country information from my places """ countries = [] for place in self.places: country = self.getCountry(place) if country is not None: countries.append(country.name) self.country_mentions = Counter(countries).most_common() self.countries = list(set(countries)) pass
[docs] def set_regions(self): """ get the region information from my places (limited to the already identified countries) """ regions = [] self.country_regions = {} region_names = {} if not self.countries: self.set_countries() def region_match(place_name: str, region_name: str) -> bool: """ Tests the similarity of the given strings after removing non ascii characters. Args: place_name(str): Place name region_name(str): valid region name to test against Returns: True if the similarity of both values is greater equals 80%. Otherwise False """ return fuzzy_match( remove_non_ascii(place_name), remove_non_ascii(region_name) ) def is_region(place_name: str, region_names: list): """ Filters out the regions that are not similar to the given place_name Args: place_name(str): place name to check against the regions region_names(list): List of valid region names Returns: List of regions that are similar to the given place_name """ return any([region_match(place_name, rn) for rn in region_names]) for country in self.countries: region_names = self.get_region_names(country) matched_regions = [ p for p in set(self.places) if is_region(p, region_names) ] regions += matched_regions self.country_regions[country] = list(set(matched_regions)) self.region_mentions = Counter(regions).most_common() self.regions = list(set(regions))
[docs] def set_cities(self): """ set the cities information """ self.cities = [] self.country_cities = {} self.address_strings = [] if not self.countries: self.set_countries() if not self.regions: self.set_regions() if not self.db_has_data(): self.populate_db() # ToDo: Duplicate with Locator.city_for_name e.g. extend method to support multiple names placesWithoutDuplicates = set(self.places) params = ",".join("?" * len(placesWithoutDuplicates)) query = "SELECT * FROM CityLookup WHERE name IN (" + params + ")" cityLookupRecords = self.sqlDB.query(query, list(placesWithoutDuplicates)) cityLookupRecords.sort( key=lambda cityRecord: float(cityRecord.get("pop")) if cityRecord.get("pop") is not None else 0.0, reverse=True, ) for cityLookupRecord in cityLookupRecords: city = City.fromCityLookup(cityLookupRecord) if city.name not in self.cities: self.cities.append(city.name) countryName = city.country.name if countryName not in self.countries: self.countries.append(countryName) self.country_mentions.append((countryName, 1)) if countryName not in self.country_cities: self.country_cities[countryName] = [] if city.name not in self.country_cities[countryName]: self.country_cities[countryName].append(city.name) regionName = city.region.name if ( countryName in self.country_regions and regionName in self.country_regions[countryName] ): address = f"{city.name}, {regionName}, {countryName}" self.address_strings.append(address) all_cities = [p for p in self.places if p in self.cities] self.city_mentions = Counter(all_cities).most_common()
[docs] def set_other(self): if not self.cities: self.set_cities() def unused(place_name): places = [self.countries, self.cities, self.regions] return all( self.correct_country_misspelling(place_name) not in l for l in places ) self.other = [p for p in self.places if unused(p)]