# For tips see http://www.askapache.com/seo/seo-with-robotstxt.html # Normal robots.txt body is purely substring match only # We exclude lots of general purpose forms which are available in various mount points of the site # and internal image bank which is hidden in the navigation tree in any case User-agent: * Disallow: set_language Disallow: force_web Disallow: login_form Disallow: sendto_form Disallow: /images Disallow: /etusivu Disallow: /sisalto/etusivu Disallow: footer-text Disallow: /ohjeet Disallow: /sisalto/ohjeet Disallow: carousel Disallow: kuvapankki # Do not show the site front page twice in search results Disallow: http://www.siida.fi/sisalto/siida-nayttelykeskus-ja-kohtaamispaikka Disallow: /search_form Disallow: /sendto_form Disallow: /accessibility-info Disallow: /contact-info Disallow: /login_form Disallow: /mail_password_form?userid= Disallow: /news_item Disallow: /enabling_cookies Disallow: /front-page Disallow: /test-folder Disallow: /portal_javascripts Disallow: /portal_kss Disallow: /author Disallow: /*talkback Disallow: /*RSS Disallow: /events_listing Disallow: /vcs_view Disallow: /ics_view Disallow: /events/events-by-date/ # Googlebot allows regex in its syntax # Block all URLs including query strings (? pattern) - contentish objects expose query string only for actions or status reports which # might confuse search results. # This will also block ?set_language User-Agent: Googlebot Disallow: /*?* Disallow: /*folder_factories$ Allow: /@@mobile_sitemap? # Allow Adsense bot on entire site User-agent: Mediapartners-Google* Disallow: Allow: /*