# robots.txt for https://warwick.ac.uk/ # Apply to all user agents User-agent: * # Don't index the training pages to try and stop people who want to study architecture from applying here because Warwick doesn't offer an Architecture course # Explanation: https://twitter.com/matmannion/status/1146342325980975104 Disallow: /training/ # Disallow indexing of the CMS application itself as no useful content exists there for externals, with exclusions below Disallow: /sitebuilder2/ # let google get ical feeds Allow: /sitebuilder2/api/sitebuilder.ics Allow: /sitebuilder2/api/gadgets/ Allow: /sitebuilder2/api/rss/ Allow: /sitebuilder2/api/sitemap/ Allow: /sitebuilder2/api/videoSitemap.xml # Allow thumbnail images Allow: /sitebuilder2/file/* Sitemap: https://warwick.ac.uk/sitebuilder2/api/sitemap/index.xml # Disallow query string variations of sports calendars/news User-agent: Rogerbot Disallow: /services/sport/events/calendar/*?* Disallow: /services/sport/news/*?* Disallow: /services/sport/active/tennis/classes/*?* Disallow: /services/sport/content-hub/feed/*?* # Disallow query string variations of conferences news Disallow: /services/conferences/content-corner/*?* Disallow: /services/conferences/news/*?*