1: <?php
2:
3: namespace Alpha\Util\Http;
4:
5: /**
6: * A utility class for carrying out various tasks on HTTP user agent strings.
7: *
8: * @since 1.0
9: *
10: * @author John Collins <dev@alphaframework.org>
11: * @license http://www.opensource.org/licenses/bsd-license.php The BSD License
12: * @copyright Copyright (c) 2015, John Collins (founder of Alpha Framework).
13: * All rights reserved.
14: *
15: * <pre>
16: * Redistribution and use in source and binary forms, with or
17: * without modification, are permitted provided that the
18: * following conditions are met:
19: *
20: * * Redistributions of source code must retain the above
21: * copyright notice, this list of conditions and the
22: * following disclaimer.
23: * * Redistributions in binary form must reproduce the above
24: * copyright notice, this list of conditions and the
25: * following disclaimer in the documentation and/or other
26: * materials provided with the distribution.
27: * * Neither the name of the Alpha Framework nor the names
28: * of its contributors may be used to endorse or promote
29: * products derived from this software without specific
30: * prior written permission.
31: *
32: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
33: * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
34: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
35: * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
36: * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
37: * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
38: * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
39: * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
40: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
41: * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
42: * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
43: * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
44: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45: * </pre>
46: */
47: class AgentUtils
48: {
49: /**
50: * An array of partial user agent strings belonging to well known web spider bots.
51: *
52: * @var array
53: *
54: * @since 1.0
55: */
56: private static $bots = array(
57: 'ia_archiver',
58: 'Scooter/',
59: 'Ask Jeeves',
60: 'Baiduspider+(',
61: 'bingbot/',
62: 'Disqus/',
63: 'Exabot/',
64: 'FAST Enterprise Crawler',
65: 'FAST-WebCrawler/',
66: 'http://www.neomo.de/',
67: 'Gigabot/',
68: 'Mediapartners-Google',
69: 'Google Desktop',
70: 'Feedfetcher-Google',
71: 'Googlebot',
72: 'heise-IT-Markt-Crawler',
73: 'heritrix/1.',
74: 'ibm.com/cs/crawler',
75: 'ICCrawler - ICjobs',
76: 'ichiro/2',
77: 'MJ12bot/',
78: 'MetagerBot/',
79: 'msnbot-NewsBlogs/',
80: 'msnbot/',
81: 'msnbot-media/',
82: 'NG-Search/',
83: 'http://lucene.apache.org/nutch/',
84: 'NutchCVS/',
85: 'OmniExplorer_Bot/',
86: 'online link validator',
87: 'psbot/0',
88: 'Seekbot/',
89: 'Sensis Web Crawler',
90: 'SEO search Crawler/',
91: 'Seoma [SEO Crawler]',
92: 'SEOsearch/',
93: 'Snappy/1.1 ( http://www.urltrends.com/ )',
94: 'http://www.tkl.iis.u-tokyo.ac.jp/~crawler/',
95: 'SynooBot/',
96: 'crawleradmin.t-info@telekom.de',
97: 'TurnitinBot/',
98: 'voyager/1.0',
99: 'W3 SiteSearch Crawler',
100: 'W3C-checklink/',
101: 'W3C_*Validator',
102: 'http://www.WISEnutbot.com',
103: 'yacybot',
104: 'Yahoo-MMCrawler/',
105: 'Yahoo! DE Slurp',
106: 'Yahoo! Slurp',
107: 'YahooSeeker/',
108: );
109:
110: /**
111: * Static method to check if the provided user agent string matches any of the known user
112: * agent strings in the $bots array on this class.
113: *
114: * @param string $userAgent The user agent string that we want to check.
115: *
116: * @return bool
117: *
118: * @since 1.0
119: */
120: public static function isBot($userAgent)
121: {
122: $isBot = false;
123:
124: foreach (self::$bots as $botName) {
125: if (stristr($userAgent, $botName) == true) {
126: $isBot = true;
127: break;
128: }
129: }
130:
131: return $isBot;
132: }
133: }
134: