From 2cf406bdaaa43651b1010fab4447331896ec94ec Mon Sep 17 00:00:00 2001 From: colttaine Date: Wed, 8 Mar 2023 12:13:20 +1100 Subject: [PATCH] Minor change to category system --- masterscraper/__init__.py | 12 +++++++++--- scrape_single.py | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/masterscraper/__init__.py b/masterscraper/__init__.py index 9980196..9a2f453 100644 --- a/masterscraper/__init__.py +++ b/masterscraper/__init__.py @@ -136,8 +136,8 @@ class scrape: tmp_key = re.sub('\[.*\]', '', tmp_key) tmp_key = re.sub('\(.*\)', '', tmp_key) tmp_key = re.sub('km2', '', tmp_key) + tmp_key = re.sub('km', '', tmp_key) tmp_key = re.sub('mi2', '', tmp_key) - tmp_key = re.sub('ha', '', tmp_key) tmp_key = re.sub('hectares', '', tmp_key) tmp_key = re.sub('\ in\ ', '', tmp_key) tmp_key = re.sub('US\ \$', '', tmp_key) @@ -236,6 +236,10 @@ class scrape: key.find('rate') >=0 ): key_unit.append('deaths/100k population') + elif( key.find('life') >=0 and + key.find('expectancy') >=0 ): + key_unit.append('years') + elif( key.find('birth') >=0 and key.find('rate') >=0 ): key_unit.append('births/1k population') @@ -360,7 +364,7 @@ class scrape: self.meta['category'] = 'geographic' self.meta['subcategory'] = 'area' - if( (search.find('arable') >=0 or + elif( (search.find('arable') >=0 or search.find('farm') >=0 or search.find('forrested') >=0) and search.find('land') >=0 ): @@ -419,8 +423,10 @@ class scrape: search.find('import') <0 and search.find('export') <0 and search.find('invest') <0 and + search.find('spending') <0 and search.find('manufactur') <0 and search.find('military') <0 and + search.find('education') <0 and search.find('health') <0 ): self.meta['category'] = 'economic' self.meta['subcategory'] = 'gdp' @@ -552,7 +558,7 @@ class scrape: if isinstance(self.data[x][y], str): self.data[x][y] = self.data[x][y].strip() if any(i.isdigit() for i in self.data[x][y]): - self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.']) + self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.' or i=='-']) # Convert To Float if isfloat(self.data[x][y]): diff --git a/scrape_single.py b/scrape_single.py index b13636b..77fab79 100644 --- a/scrape_single.py +++ b/scrape_single.py @@ -5,7 +5,7 @@ import masterscraper as ms -scrape = ms.scrape('https://www.macrotrends.net/countries/ISL/iceland/economic-growth-rate') +scrape = ms.scrape('https://www.macrotrends.net/countries/CHN/china/net-migration') scrape.get_meta() scrape.clean() scrape.save()