Data sources

text

In [1]:
import requests
fortunes = requests.get('https://raw.githubusercontent.com/JKirchartz/fortune/master/jung').text
print(fortunes[0:500])
The word 'happiness' would lose its meaning if it were not balanced by sadness.
 -- Carl Jung
%
One looks back with appreciation to the brilliant teachers, but with gratitude to those who touched our human feelings. The curriculum is so much necessary raw material, but warmth is the vital element for the growing plant and for the soul of the child.
 -- Carl Jung
%
Knowing your own darkness is the best method for dealing with the darknesses of other people.
 -- Carl Jung
%
Everything that irritat

reddit

In [2]:
import json
reddit_response = requests.get(
    'http://api.reddit.com/r/funny', 
    headers={'User-Agent': 'Mozilla/5 Data Science Intro'})
reddit_data = json.loads(reddit_response.text)

reddit_page2 =  json.loads(requests.get(
    'http://api.reddit.com/r/funny', 
    params={"after": reddit_data["data"]["after"]},
    headers={'User-Agent': 'Mozilla/5.0 Data Science Workshop at inf.ku.dk'}).text)

reddit_page2["data"]["children"][0]
Out[2]:
{'data': {'approved_at_utc': None,
  'approved_by': None,
  'archived': False,
  'author': 'fizfilson',
  'author_flair_css_class': None,
  'author_flair_text': None,
  'banned_at_utc': None,
  'banned_by': None,
  'brand_safe': True,
  'can_gild': False,
  'can_mod_post': False,
  'clicked': False,
  'contest_mode': False,
  'created': 1519331929.0,
  'created_utc': 1519303129.0,
  'distinguished': None,
  'domain': 'i.redd.it',
  'downs': 0,
  'edited': False,
  'gilded': 0,
  'hidden': False,
  'hide_score': False,
  'id': '7zel4y',
  'is_crosspostable': False,
  'is_reddit_media_domain': True,
  'is_self': False,
  'is_video': False,
  'likes': None,
  'link_flair_css_class': None,
  'link_flair_text': None,
  'locked': False,
  'media': None,
  'media_embed': {},
  'mod_note': None,
  'mod_reason_by': None,
  'mod_reason_title': None,
  'mod_reports': [],
  'name': 't3_7zel4y',
  'num_comments': 7,
  'num_crossposts': 0,
  'num_reports': None,
  'over_18': False,
  'parent_whitelist_status': 'all_ads',
  'permalink': '/r/funny/comments/7zel4y/no_one/',
  'pinned': False,
  'post_hint': 'image',
  'preview': {'enabled': True,
   'images': [{'id': 'bwpWeUiwdTar4duXPbe_pWy4oreN6mQl9HebST2eIE0',
     'resolutions': [{'height': 80,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=108&fm=jpg&s=7a821d531b55d6501961a3ed38b87ba4',
       'width': 108},
      {'height': 161,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=216&fm=jpg&s=aa2c4067812dccd22576dea2eebc8707',
       'width': 216},
      {'height': 239,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=320&fm=jpg&s=14a61c48ed135113557a4347ccd8850f',
       'width': 320},
      {'height': 478,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=640&fm=jpg&s=0bb66920cca7aa32f85e390fd37b118f',
       'width': 640},
      {'height': 717,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=960&fm=jpg&s=ca8c31d4362460edc91861965c7f3636',
       'width': 960},
      {'height': 806,
       'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fit=crop&crop=faces%2Centropy&arh=2&w=1080&fm=jpg&s=f9faa4f57e8cc691bdac54a8a8d975da',
       'width': 1080}],
     'source': {'height': 928,
      'url': 'https://i.redditmedia.com/7AwbyjEh-PV8YJH3yIwnbckAqUjwZlNdsiP5mHzIs_s.jpg?fm=jpg&s=6e5fc136bcb70b793e5d4effd5bf3a43',
      'width': 1242},
     'variants': {}}]},
  'quarantine': False,
  'removal_reason': None,
  'report_reasons': None,
  'saved': False,
  'score': 270,
  'secure_media': None,
  'secure_media_embed': {},
  'selftext': '',
  'selftext_html': None,
  'spoiler': False,
  'stickied': False,
  'subreddit': 'funny',
  'subreddit_id': 't5_2qh33',
  'subreddit_name_prefixed': 'r/funny',
  'subreddit_type': 'public',
  'suggested_sort': None,
  'thumbnail': 'https://b.thumbs.redditmedia.com/VqrQYo4vDJRbAMPNriSEpKq6FQI8I78BdV_cgb5xpow.jpg',
  'thumbnail_height': 104,
  'thumbnail_width': 140,
  'title': 'No one.',
  'ups': 270,
  'url': 'https://i.redd.it/9caglpxperh01.jpg',
  'user_reports': [],
  'view_count': None,
  'visited': False,
  'whitelist_status': 'all_ads'},
 'kind': 't3'}

worldbank

In [3]:
result = json.loads(requests.get(
    'http://api.worldbank.org/v2/countries/DNK/indicators/SP.POP.TOTL', 
    params = {
        "per_page": 100,
        "format": "json"
    }).text)
print(result)
[{'page': 1, 'pages': 1, 'per_page': 100, 'lastupdated': '2018-02-22', 'total': 58}, [{'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2017', 'value': None, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2016', 'value': 5731118, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2015', 'value': 5683483, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2014', 'value': 5643475, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2013', 'value': 5614932, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2012', 'value': 5591572, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2011', 'value': 5570572, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2010', 'value': 5547683, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2009', 'value': 5523095, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2008', 'value': 5493621, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2007', 'value': 5461438, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2006', 'value': 5437272, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2005', 'value': 5419432, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2004', 'value': 5404523, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2003', 'value': 5390574, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2002', 'value': 5375931, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2001', 'value': 5358783, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '2000', 'value': 5339616, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1999', 'value': 5321799, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1998', 'value': 5304219, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1997', 'value': 5284991, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1996', 'value': 5263074, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1995', 'value': 5233373, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1994', 'value': 5206180, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1993', 'value': 5188628, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1992', 'value': 5171370, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1991', 'value': 5154298, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1990', 'value': 5140939, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1989', 'value': 5132594, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1988', 'value': 5129516, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1987', 'value': 5127024, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1986', 'value': 5120534, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1985', 'value': 5113691, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1984', 'value': 5111619, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1983', 'value': 5114297, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1982', 'value': 5117810, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1981', 'value': 5121572, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1980', 'value': 5123027, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1979', 'value': 5116801, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1978', 'value': 5104248, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1977', 'value': 5088419, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1976', 'value': 5072596, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1975', 'value': 5059862, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1974', 'value': 5045297, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1973', 'value': 5021861, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1972', 'value': 4991596, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1971', 'value': 4963126, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1970', 'value': 4928757, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1969', 'value': 4891860, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1968', 'value': 4864883, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1967', 'value': 4835354, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1966', 'value': 4797381, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1965', 'value': 4759012, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1964', 'value': 4722072, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1963', 'value': 4684483, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1962', 'value': 4647727, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1961', 'value': 4611687, 'unit': '', 'obs_status': '', 'decimal': 0}, {'indicator': {'id': 'SP.POP.TOTL', 'value': 'Population, total'}, 'country': {'id': 'DK', 'value': 'Denmark'}, 'countryiso3code': 'DNK', 'date': '1960', 'value': 4579603, 'unit': '', 'obs_status': '', 'decimal': 0}]]

Wikipedia

In [4]:
title = "Philosophy"
result = json.loads(requests.get(
    'https://en.wikipedia.org/w/api.php', 
    params = {
        "action": "query",
        "titles": title,
        "prop": "links",
        "format": "json",
        "plnamespace": [0],
        "pllimit": 5
    }).text)
[link["title"] for link in list(result["query"]["pages"].values())[0]["links"]]
result
Out[4]:
{'continue': {'continue': '||', 'plcontinue': '13692155|0|Absolute_idealism'},
 'query': {'pages': {'13692155': {'links': [{'ns': 0,
      'title': '19th-century philosophy'},
     {'ns': 0, 'title': 'A.C. Grayling'},
     {'ns': 0, 'title': 'A priori knowledge'},
     {'ns': 0, 'title': 'Abhidharma'},
     {'ns': 0, 'title': 'Absolute (philosophy)'}],
    'ns': 0,
    'pageid': 13692155,
    'title': 'Philosophy'}}}}
In [5]:
result["query"]["pages"]
Out[5]:
{'13692155': {'links': [{'ns': 0, 'title': '19th-century philosophy'},
   {'ns': 0, 'title': 'A.C. Grayling'},
   {'ns': 0, 'title': 'A priori knowledge'},
   {'ns': 0, 'title': 'Abhidharma'},
   {'ns': 0, 'title': 'Absolute (philosophy)'}],
  'ns': 0,
  'pageid': 13692155,
  'title': 'Philosophy'}}
In [6]:
list(result["query"]["pages"].values())[0]
Out[6]:
{'links': [{'ns': 0, 'title': '19th-century philosophy'},
  {'ns': 0, 'title': 'A.C. Grayling'},
  {'ns': 0, 'title': 'A priori knowledge'},
  {'ns': 0, 'title': 'Abhidharma'},
  {'ns': 0, 'title': 'Absolute (philosophy)'}],
 'ns': 0,
 'pageid': 13692155,
 'title': 'Philosophy'}
In [7]:
links = list(result["query"]["pages"].values())[0]["links"]
links
Out[7]:
[{'ns': 0, 'title': '19th-century philosophy'},
 {'ns': 0, 'title': 'A.C. Grayling'},
 {'ns': 0, 'title': 'A priori knowledge'},
 {'ns': 0, 'title': 'Abhidharma'},
 {'ns': 0, 'title': 'Absolute (philosophy)'}]
In [8]:
[link["title"] for link in links]
Out[8]:
['19th-century philosophy',
 'A.C. Grayling',
 'A priori knowledge',
 'Abhidharma',
 'Absolute (philosophy)']
In [9]:
title = "Blues dance"
result = json.loads(requests.get(
    'https://en.wikipedia.org/w/api.php', 
    params = {
        "action": "query",
        "titles": title,
        "prop": "linkshere",
        "format": "json",
        "lhlimit": 50,
        "lhnamespace": 0,
        "lhshow": "!redirect"
    }).text)
links = list(result["query"]["pages"].values())[0]["linkshere"]
print([link["title"] for link in links])
['Blues', 'List of dance style categories', 'Dance improvisation', 'List of lindy hop moves', 'Lindy exchange', 'Blues (disambiguation)', 'Close embrace', 'List of North American folk music traditions', 'Lindy hop today', 'Talang 2007', 'The Spirit Moves', 'List of street and vernacular dances', 'Butler May', 'Disappearance of Tiffany Daniels']

content

In [10]:
title = "Contact improvisation"
result = json.loads(requests.get(
    'https://en.wikipedia.org/w/api.php', 
    params = {
        "action": "query",
        "titles": title,
        "prop": "extracts",
        "format": "json",
    }).text)
list(result["query"]["pages"].values())[0]["extract"][0:600]
Out[10]:
"<p><b>Contact improvisation</b> is a form of improvised dancing that has been developing internationally since 1972. Contact improvisation involves the exploration from one's body to the next by using the fundamentals of sharing weight, touch, being kinetically aware and finding a point of contact between you and your partner(s). American dancer and choreographer Steve Paxton had first conceived and presented this form of movement and has impacted how dancers and people view movement by a large factor. Paxton utilized his past training such as Aikido, a martial art form, to explore and push bo"

Open Graph Protocol (youtube, dr.dk, ...)

In [11]:
!pip install python-opengraph
from opengraph import OpenGraph
Requirement already satisfied: python-opengraph in /home/rasmuserik/anaconda3/lib/python3.6/site-packages
Requirement already satisfied: requests>=2.7 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from python-opengraph)
Requirement already satisfied: beautifulsoup4>=4.3 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from python-opengraph)
Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from requests>=2.7->python-opengraph)
Requirement already satisfied: idna<2.7,>=2.5 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from requests>=2.7->python-opengraph)
Requirement already satisfied: urllib3<1.23,>=1.21.1 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from requests>=2.7->python-opengraph)
Requirement already satisfied: certifi>=2017.4.17 in /home/rasmuserik/anaconda3/lib/python3.6/site-packages (from requests>=2.7->python-opengraph)
In [12]:
dr = OpenGraph("https://www.dr.dk/nyheder/viden/teknologi/aktivist-kaemper-dine-digitale-rettigheder-mod-tech-giganter")
print(dr)
{'type': 'article', 'title': 'Aktivist kæmper for dine digitale rettigheder mod tech-giganter', 'site_name': 'DR', 'description': 'En østrigsk aktivist har startet en ny organisation, der vil sagsøge tech-virksomheder, som krænker dine digitale rettigheder.', 'url': 'https://www.dr.dk/nyheder/viden/teknologi/aktivist-kaemper-dine-digitale-rettigheder-mod-tech-giganter', 'image': 'http://asset.dr.dk/ImageScaler/?file=%2Fimages%2Fother%2F2018%2F02%2F08%2Fscanpix-20171003-150322-l.jpg&server=www.dr.dk'}
/home/rasmuserik/anaconda3/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 193 of the file /home/rasmuserik/anaconda3/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this:

 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))
In [13]:
youtube = OpenGraph("https://www.youtube.com/watch?v=hVimVzgtD6w")
print(youtube.title)
print(youtube.description)
print(youtube)
The best stats you've ever seen | Hans Rosling
http://www.ted.com With the drama and urgency of a sportscaster, statistics guru Hans Rosling uses an amazing new presentation tool, Gapminder, to present da...
{'type': 'video.other', 'title': "The best stats you've ever seen | Hans Rosling", 'site_name': 'YouTube', 'description': 'http://www.ted.com With the drama and urgency of a sportscaster, statistics guru Hans Rosling uses an amazing new presentation tool, Gapminder, to present da...', 'url': 'https://www.youtube.com/watch?v=hVimVzgtD6w', 'image': 'https://i.ytimg.com/vi/hVimVzgtD6w/hqdefault.jpg', 'video:url': 'http://www.youtube.com/v/hVimVzgtD6w?version=3&autohide=1', 'video:secure_url': 'https://www.youtube.com/v/hVimVzgtD6w?version=3&autohide=1', 'video:type': 'application/x-shockwave-flash', 'video:width': '480', 'video:height': '360', 'video:tag': 'child health'}
/home/rasmuserik/anaconda3/lib/python3.6/site-packages/bs4/__init__.py:181: UserWarning: No parser was explicitly specified, so I'm using the best available HTML parser for this system ("lxml"). This usually isn't a problem, but if you run this code on another system, or in a different virtual environment, it may use a different parser and behave differently.

The code that caused this warning is on line 193 of the file /home/rasmuserik/anaconda3/lib/python3.6/runpy.py. To get rid of this warning, change code that looks like this:

 BeautifulSoup(YOUR_MARKUP})

to this:

 BeautifulSoup(YOUR_MARKUP, "lxml")

  markup_type=markup_type))

RSS

In [14]:
!pip install feedparser
import feedparser
Requirement already satisfied: feedparser in /home/rasmuserik/anaconda3/lib/python3.6/site-packages
In [15]:
response = requests.get('https://www.dr.dk/nyheder/service/feeds/allenyheder')
feedparser.parse(response.text)['entries'][0]
Out[15]:
{'dr_channeldisplayname': 'Udland',
 'dr_channelname': 'Udland',
 'dr_imageuri620x349': 'https://www.dr.dk/images/other/2018/02/22/scanpix-20180221-195053-7_0.jpg',
 'dr_postingguid': '24b32be6-eb99-4d89-8ea8-98f287ca46b4',
 'dr_xmlimagearticle': '',
 'guidislink': False,
 'id': '24b32be6-eb99-4d89-8ea8-98f287ca46b4',
 'link': 'http://www.dr.dk/nyheder/udland/melissa-gemte-19-elever-i-skab-under-skyderi-bevaebnede-laerere-er-latterligt',
 'links': [{'href': 'http://www.dr.dk/nyheder/udland/melissa-gemte-19-elever-i-skab-under-skyderi-bevaebnede-laerere-er-latterligt',
   'rel': 'alternate',
   'type': 'text/html'}],
 'published': 'Thu, 22 Feb 2018 17:57:00 +0100',
 'published_parsed': time.struct_time(tm_year=2018, tm_mon=2, tm_mday=22, tm_hour=16, tm_min=57, tm_sec=0, tm_wday=3, tm_yday=53, tm_isdst=0),
 'summary': "USA's præsident har foreslået at træne og bevæbne 20 procent af de amerikanske lærere.",
 'summary_detail': {'base': '',
  'language': None,
  'type': 'text/html',
  'value': "USA's præsident har foreslået at træne og bevæbne 20 procent af de amerikanske lærere."},
 'title': 'Melissa gemte 19 elever i skab under skyderi: "Bevæbnede lærere er latterligt"',
 'title_detail': {'base': '',
  'language': None,
  'type': 'text/plain',
  'value': 'Melissa gemte 19 elever i skab under skyderi: "Bevæbnede lærere er latterligt"'}}

creative commons search (incl. flickr)

In [16]:
query = 'cat'
html = requests.get(
    "https://ccsearch.creativecommons.org/",
    params = {
        "search": query,
        "page": 1,
        "search_fields": ["title", "creator", "tags"],
        "per_page": 20,
        "licenses": ['ALL-$', 'ALL-MOD'],
        "work_types": ["photos", "cultural"],
        "providers": ["500px", "flickr", "europeana", "met", "nypl", "rijksmuseum"]        
    },
    headers = {
        'User-Agent': 'Mozilla/5.0 Data Science Workshop at inf.ku.dk'
    }).text
html[0:200]
Out[16]:
'\n<!doctype html>\n<meta name="viewport" content="width=device-width, initial-scale=1">\n<meta charset="UTF-8"><script type="text/javascript">window.NREUM||(NREUM={}),__nr_require=function(e,t,n){functio'
In [17]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
images = [img.attrs for img in soup.find_all('img') if img.attrs.get('data-title', False)]
images[0]
Out[17]:
{'data-created_on': '2016-11-16T16:05:57.498240+00:00',
 'data-creator': 'Cat Wendt',
 'data-creator_url': 'https://www.flickr.com/people/catwendt/',
 'data-foreign_landing_url': 'https://www.flickr.com/photos/catwendt/15589191923',
 'data-identifier': 'DkKgzQ0SUf8DA6waC0vn2w==',
 'data-license': 'by',
 'data-license_version': '2.0',
 'data-provider': 'flickr',
 'data-provider_name': 'Flickr',
 'data-provider_url': 'https://flickr.com',
 'data-source': 'openimages',
 'data-tags': "['vertebrate', 'carnivoran', 'cat', 'british semi longhair', 'norwegian forest cat', 'mammal', 'domestic long haired cat', 'cat like mammal', 'animal', 'nose', 'european shorthair', 'close up', 'domestic short haired cat', 'small to medium sized cats', 'whiskers', 'pet', 'nebelung', 'maine coon', 'siberian', 'carnivoran']",
 'data-title': 'Cat Selfie (Buffy)',
 'data-url': 'https://farm8.staticflickr.com/7464/15589191923_e8ba97bf6c_o.jpg',
 'src': 'https://c8.staticflickr.com/8/7464/15589191923_99889a9f83_z.jpg'}

openplatform.dbc.dk bibliographical data

In [18]:
search_results = json.loads(requests.get(
    'https://openplatform.dbc.dk/v2/search', 
    params = {
        "access_token": "fa2dfc00a4697058b43b8f8c52a03af60e889339",
        "q": 'fo="Haruki Murakami"',
        "fields": ["title", "creator", "pid", "coverUrlFull"],
        "limit": 10
    }).text)
search_results
Out[18]:
{'data': [{'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=24504174&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=11f628e324c65a92297e'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:24504174'],
   'title': ['Sønden for grænsen og vesten for solen']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=25254996&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=4ce81715a0ab600c0af5'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:25254996'],
   'title': ['Sputnik min elskede']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=29662177&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=2fbde785c4df21df6073'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:29662177'],
   'title': ['En vild fårejagt']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=26742943&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=f5f1643dbff43dba9a24'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:26742943'],
   'title': ['Kafka på stranden']},
  {'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:22497464'],
   'title': ['Dans, dans, dans']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=26001897&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=876523536b115eb9583d'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:26001897'],
   'title': ['Norwegian wood']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=27434665&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=8294ba999768764cba01'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:27434665'],
   'title': ['Efter midnat']},
  {'creator': ['Haruki Murakami'],
   'pid': ['874310-katalog:DBB0416292'],
   'title': ['Fødselsdagshistorier']},
  {'coverUrlFull': ['https://moreinfo.addi.dk/2.10/more_info_get.php?lokalid=23645564&attachment_type=forside_stor&bibliotek=870970&source_id=870970&key=a52f1f7c9a9e3017f36e'],
   'creator': ['Haruki Murakami'],
   'pid': ['870970-basis:23645564'],
   'title': ['Trækopfuglens krønike']},
  {'creator': ['Murakami Haruki'],
   'pid': ['875880-katalog:001320829'],
   'title': ['Norwegian wood']}],
 'statusCode': 200}
In [19]:
json.loads(requests.get(
    'https://openplatform.dbc.dk/v2/work', 
    params = {
        "access_token": "fa2dfc00a4697058b43b8f8c52a03af60e889339",
        "pids": '["870970-basis:28934297"]'
    }).text)
Out[19]:
{'data': [{'abstract': ['Aomame er en 30-årig smart pige, uddannet kampsportsinstruktør, men arbejder p.t. som lejemorder. Tengo er matematiklærer med forfatterdrømme. Han skal omskrive en sær 17-årig piges sære historie. Begge hovedfigurer oplever, at deres virkelighed forvrides let, hvad påvirker deres virkelighed?'],
   'acIdentifier': ['28934297|870970'],
   'acSource': ['Bibliotekskatalog'],
   'accessType': ['physical'],
   'alternative': ['1984'],
   'audience': ['voksenmaterialer'],
   'contributorTrl': ['Mette Holm'],
   'creator': ['Haruki Murakami'],
   'creatorAut': ['Haruki Murakami'],
   'creatorSort': ['Murakami, Haruki'],
   'date': ['2011'],
   'dcLanguage': ['Dansk'],
   'dcTitle': ['1Q84'],
   'dcTitleFull': ['1Q84. Bog 1'],
   'extent': ['403 sider', '3 bind'],
   'fedoraPid': ['870970-basis:28934297'],
   'hasCreatorDescription': ['870971-forfweb:89734541'],
   'hasReview': ['870971-anmeld:34803048',
    '870971-anmeld:34803501',
    '870971-anmeld:34803013',
    '870971-anmeld:35135340',
    '870971-anmeld:34801029',
    '870976-anmeld:30958357',
    '870971-anmeld:34803498',
    '870971-anmeld:35585338',
    '870971-anmeld:34804206'],
   'identifierISBN': ['9788779559202'],
   'language': ['Dansk'],
   'languageISO6392': ['dan'],
   'pid': ['870970-basis:28934297'],
   'publisher': ['Klim'],
   'source': ['1Q84'],
   'spatialDBCS': ['Japan'],
   'subjectDBCS': ['kultur',
    'kærlighed',
    'magisk realisme',
    'magt',
    'parallelle verdener',
    'skrivekunst'],
   'subjectDK5': ['sk'],
   'subjectDK5Text': ['Skønlitteratur'],
   'temporalDBCP': ['1980-1989'],
   'title': ['1Q84'],
   'titleFull': ['1Q84'],
   'type': ['Bog (bind 1)'],
   'typeBibDKType': ['Bog'],
   'undefined': ['_BASEURL_url_ctx_fmt=info:ofi/fmt:kev:mtx:ctx&ctx_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:book&rft.btitle=1Q84&rft.aulast=Murakami&rft.aufirst=Haruki&rft.auinit=H&rft.date=2011&rft.isbn=9788779559202&rft.pub=Klim&rft.edition=1.+udgave,+1.+oplag+%282011%29&rft.genre=book',
    '_BASEURL_url_ctx_fmt=info:ofi/fmt:kev:mtx:ctx&ctx_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:book&rft.btitle=1Q84&rft.aulast=Murakami&rft.aufirst=Haruki&rft.auinit=H&rft.date=2011&rft.isbn=9788779559202&rft.pub=Klim&rft.edition=1.+udgave,+1.+oplag&rft.genre=book',
    '_BASEURL_url_ctx_fmt=info:ofi/fmt:kev:mtx:ctx&ctx_ver=Z39.88-2004&rft_val_fmt=info:ofi/fmt:kev:mtx:book&rft.btitle=1Q84&rft.aulast=Murakami&rft.aufirst=Haruki&rft.auinit=H&rft.date=2013&rft.isbn=9788779559202&rft.pub=Klim&rft.edition=1.+udgave&rft.genre=book&rfr_id=info:sid/dbc.dk:820010-katalog'],
   'version': ['1. udgave, 1. oplag (2011)'],
   'workType': ['book']}],
 'statusCode': 200}