Minor change to category system

This commit is contained in:
colttaine 2023-03-08 12:13:20 +11:00
parent 29ed01d50b
commit 2cf406bdaa
2 changed files with 10 additions and 4 deletions

View file

@ -136,8 +136,8 @@ class scrape:
tmp_key = re.sub('\[.*\]', '', tmp_key)
tmp_key = re.sub('\(.*\)', '', tmp_key)
tmp_key = re.sub('km2', '', tmp_key)
tmp_key = re.sub('km', '', tmp_key)
tmp_key = re.sub('mi2', '', tmp_key)
tmp_key = re.sub('ha', '', tmp_key)
tmp_key = re.sub('hectares', '', tmp_key)
tmp_key = re.sub('\ in\ ', '', tmp_key)
tmp_key = re.sub('US\ \$', '', tmp_key)
@ -236,6 +236,10 @@ class scrape:
key.find('rate') >=0 ):
key_unit.append('deaths/100k population')
elif( key.find('life') >=0 and
key.find('expectancy') >=0 ):
key_unit.append('years')
elif( key.find('birth') >=0 and
key.find('rate') >=0 ):
key_unit.append('births/1k population')
@ -360,7 +364,7 @@ class scrape:
self.meta['category'] = 'geographic'
self.meta['subcategory'] = 'area'
if( (search.find('arable') >=0 or
elif( (search.find('arable') >=0 or
search.find('farm') >=0 or
search.find('forrested') >=0) and
search.find('land') >=0 ):
@ -419,8 +423,10 @@ class scrape:
search.find('import') <0 and
search.find('export') <0 and
search.find('invest') <0 and
search.find('spending') <0 and
search.find('manufactur') <0 and
search.find('military') <0 and
search.find('education') <0 and
search.find('health') <0 ):
self.meta['category'] = 'economic'
self.meta['subcategory'] = 'gdp'
@ -552,7 +558,7 @@ class scrape:
if isinstance(self.data[x][y], str):
self.data[x][y] = self.data[x][y].strip()
if any(i.isdigit() for i in self.data[x][y]):
self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.'])
self.data[x][y] = ''.join([i for i in self.data[x][y] if i.isdigit() or i=='.' or i=='-'])
# Convert To Float
if isfloat(self.data[x][y]):

View file

@ -5,7 +5,7 @@ import masterscraper as ms
scrape = ms.scrape('https://www.macrotrends.net/countries/ISL/iceland/economic-growth-rate')
scrape = ms.scrape('https://www.macrotrends.net/countries/CHN/china/net-migration')
scrape.get_meta()
scrape.clean()
scrape.save()