Retrieve tweets using multiple twitter accounts for the search API

You can read my last article before you proceed with further, where you can read how to use Twitter4J API which enables us to fetch more then 100 tweets. The idea is simple. Twitter search API allows the use of multiple accounts. You may create multiple accounts for search API and switch the sessions when one account reach it’s threshold. Repeat the search process as long as you can. Once the first app reach to the limit, program will automatically switch the keys and retrieve session with new app. By this way you can download unlimited tweets. The only limitation is the rate limits which controls how fast you make the calls and subject to how much data Twitter can find for you in the search index.

To run the following program create a text file with the name “AllkeyFile” and add your keys of multiple apps.  The example is given below.

App1

consumer key

consumer secret

access token

access secret

App2

consumer key

consumer secret

access token

access secret

You may continue with several app details.

package tweets;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Hashtable;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import twitter4j.GeoLocation;
import twitter4j.Paging;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.Status;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.conf.ConfigurationBuilder;
import twitter4j.json.DataObjectFactory;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 *
 * @author coding-guru.com
 */

public class TwitterCrawler implements Runnable {
	private String msg;
	private KeyHandler key;
	private static FileRW rw = new FileRW("test.csv");
	private static String tag = null;

	public TwitterCrawler() {
		this.key = new KeyHandler();
	}

	public TwitterCrawler(String str) {
		this.msg = str;
	}

	// method to connect to twitter
	public Twitter getRequest() {
		Twitter twitterRequest = null;

		key.getKey();

		ConfigurationBuilder cb = new ConfigurationBuilder();
		cb.setDebugEnabled(true).setOAuthConsumerKey(key.consumerkey_global)
		.setOAuthConsumerSecret(key.consumersecret_global).setOAuthAccessToken(key.accesstoken_global)
		.setOAuthAccessTokenSecret(key.accesssecret_global);

		// cb is the instance of configurationBuilder
		cb.setJSONStoreEnabled(true);
		twitterRequest = new TwitterFactory(cb.build()).getInstance();

		System.out.println(twitterRequest.getAuthorization().toString());

		return twitterRequest;
	}

	// check the rate limit access
	private Twitter checkRateLimit(QueryResult result, Twitter twitterRequest) {
		// System.out.println("Left over
		// ["+result.getRateLimitStatus().getRemaining()+"] requests");
		if (result.getRateLimitStatus().getRemaining() <= 0) {
			System.out.println("Left over [" + result.getRateLimitStatus().getRemaining() + "] requests");
			// try {
			// Thread.sleep(result.getRateLimitStatus ().getSecondsUntilReset()
			// * 1000);
			// switchKeys();
			System.err.println("Check Rate: Trying to connect using new keys.");
			// twitterRequest = getRequest (twitterAPIAllKeys);
			twitterRequest = getRequest();
		}
		return twitterRequest;
	}

	public void run() {

		// TwitterCrawler crawlerTwitter = new TwitterCrawler();
		Paging paging = new Paging(1, 5000);
		;

		String importantInfo[] = { "I am doing 1st task", "I am doing 2nd task", "I am doing 3rd task",
				"I am doing 4th task", "I am doing 19th task", "I am doing 20th task" };

		try {
			// make the connection and get the keys and run...
			// load all the keys from the required file in order.

			Twitter twitterRequest = getRequest();
			// for this importantInfo[] means that every task is 30 minutes so
			// that
			// we have
			for (int taskID = 0; taskID < importantInfo.length; taskID++) {

				int hashTagID = Integer.parseInt(Thread.currentThread().getName().substring(7));
				Query hashTag = new Query(tag);
				System.out.println("We Are about to Collecting Particular Hashtag [" + hashTag.getQuery() + "] with "
						+ Thread.currentThread().getName() + "] and storing in folder [" + hashTagID + "]");
				connectedHashtagUnique(hashTagID, hashTag, twitterRequest, key.twitterAPIAllKeys);

			}
		} catch (Exception ex) {
			Logger.getLogger(TwitterCrawler.class.getName()).log(Level.SEVERE, null, ex);
		}
	}

	// Connected Hashtag with Unique Tweets IDs
	private boolean connectedHashtagUnique(int folderID, Query hashtag, Twitter twitterRequest,
			LinkedList twitterAPIAllKeys)
					throws FileNotFoundException, UnsupportedEncodingException, IOException, InterruptedException {
		boolean result = false;
		int counter = 0;

		Hashtable counterTweetsUnique = new Hashtable();
		Query query = hashtag;
		query.setCount(100);

		// making sure all tweets IDs are unique
		System.out.println(Thread.currentThread().getName() + " is going to store in folder# [" + folderID + "]");
		System.out.println("Started Collecting the hashtag [" + hashtag.getQuery() + "] with "
				+ Thread.currentThread().getName() + "] and storing in folder [" + folderID + "]");
		// you need to change based on the file location...
		String fileName = hashtag.getQuery();

		QueryResult queryResult = null;
		// ----------====QueryResult resultCoordinate = null;
		do {
			try {

				queryResult = twitterRequest.search(query);

				List tweets = queryResult.getTweets();
				// ----------====List tweets =
				// resultCoordinate.getTweets();
				for (Status tweet : tweets) {

					String rawJSON = DataObjectFactory.getRawJSON(tweet);
					if (rawJSON != null && !rawJSON.equals("")) {

						Double lat1 = null;
						Double lon1 = null;
						GeoLocation loc = tweet.getGeoLocation();
						// String time = "";
						if (loc != null) {
							lat1 = tweet.getGeoLocation().getLatitude();
							lon1 = tweet.getGeoLocation().getLongitude();
						}
						String text = tweet.getText();

						if (tweet.getText().contains("\n"))
							text = text.replace("\n", " ");
						if (tweet.getText().contains("\r"))
							text = text.replace("\r", " ");
						if (tweet.getText().contains("\r\n"))
							text = text.replace("\r\n", " ");

						rw.writeLine(tweet.getId() + "," + lat1 + "," + lon1 + "," + tweet.getCreatedAt() + "," + text);
						counter++;
					}
				}
				query = queryResult.nextQuery();
				twitterRequest = checkRateLimit(queryResult, twitterRequest);

			} catch (TwitterException tee) {
				// tee.printStackTrace();
				System.out.println("Failed to get timeline: " + tee.getMessage());
				System.out.println("Code: " + tee.getStatusCode());
				if (tee.getStatusCode() == 304) {
					System.out.println("Not Modified");
					System.out.println("There was no new data to return.");
					return result;
				} else if (tee.getStatusCode() == 400) {
					System.out.println("Bad Request");
					System.out.println(
							"The request was invalid or cannot be otherwise served. An accompanying error message will explain further. In API v1.1, requests without authentication are considered invalid and will yield this response.");
					TimeUnit.SECONDS.sleep(120);
					key.switchKeys();
					System.err.println("Trying to connect using new keys.");
					twitterRequest = getRequest();
					System.out.println("Connection established....");
				} else if (tee.getStatusCode() == 401) {
					System.out.println("Unauthorized");
					System.out.println("Authentication credentials were missing or incorrect.\n"
							+ "Also returned in other circumstances, for example all calls to API v1 endpoints now return 401 (use API v1.1 instead).");
					TimeUnit.SECONDS.sleep(120);
					key.switchKeys();
					System.err.println("Trying to connect using new keys.");
					twitterRequest = getRequest();
					System.out.println("Connection established....");
				} else if (tee.getStatusCode() == 403) {
					System.out.println("Forbidden");
					System.out.println(
							"The request is understood, but it has been refused or access is not allowed. An accompanying error message will explain why. This code is used when requests are being denied due to update limits. Other reasons for this status being returned are listed alongside the response codes in the table below.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 404) {
					System.out.println("Not Found");
					System.out.println(
							"The URI requested is invalid or the resource requested, such as a user, does not exists. Also returned when the requested format is not supported by the requested method.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 406) {
					System.out.println("Not Acceptable");
					System.out
					.println("Returned by the Search API when an invalid format is specified in the request.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 410) {
					System.out.println("Gone");
					System.out.println(
							"This resource is gone. Used to indicate that an API endpoint has been turned off. For example: (The Twitter REST API v1 will soon stop functioning. Please migrate to API v1.1.)");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 420) {
					System.out.println("Enhance Your Calm");
					System.out.println(
							"Returned by the version 1 Search and Trends APIs when you are being rate limited.m");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 422) {
					System.out.println("Unprocessable Entity");
					System.out.println(
							"Returned when an image uploaded to POST account / update_profile_banner is unable to be processed");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 429) {
					System.out.println("Too Many Requests");
					System.out.println(
							"Returned in API v1.1 when a request cannot be served due to the application’s rate limit having been exhausted for the resource. See Rate Limiting in API v1.1.");
					TimeUnit.SECONDS.sleep(120);
					key.switchKeys();
					System.err.println("Trying to connect using new keys.");
					// twitterRequest = getRequest(twitterAPIAllKeys);
					twitterRequest = getRequest();
					System.out.println("Connection established....");
				} else if (tee.getStatusCode() == 500) {
					System.out.println("Internal Server Error");
					System.out.println(
							"Something is broken. Please post to the developer forums so the Twitter team can investigate.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 502) {
					System.out.println("Bad Gateway");
					System.out.println("Twitter is down or being upgraded.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 503) {
					System.out.println("Service Unavailable");
					System.out.println("The Twitter servers are up, but overloaded with requests. Try again later.");
					result = false;
					return result;
				} else if (tee.getStatusCode() == 504) {
					System.out.println("Gateway timeout");
					System.out.println(
							"The Twitter servers are up, but the request couldn’t be serviced due to some failure within our stack. Try again later");
					result = false;
					return result;
				}
			} catch (IOException e) {
				System.err.println(e.getMessage());
			}

			// ----------====} while (resultCoordinate == null ||
			// resultCoordinate.hasNext());
			System.out.println("Collected [" + counter + "] Tweets from hashtag with [" + counterTweetsUnique.size()
			+ "] Unique Tweets [" + hashtag.getQuery() + "] with [" + Thread.currentThread().getName() + "] ");
		} while (queryResult == null || queryResult.hasNext());

		return result;
	}

	public static void main(String[] args) throws Exception {
		TwitterCrawler.tag = "cardiovascular diseases";
		for (int i = 0; i < 2; i++) {
			new Thread(new TwitterCrawler()).start();
			TimeUnit.SECONDS.sleep(10);
		}
	}
}

KeyHandler class is used to read keys from file and stored in a list in order. Once the session of 1st key is over, the application will connect through 2nd key.

package tweets;

import java.io.BufferedReader;
import java.io.FileReader;
import java.util.LinkedList;
import java.util.logging.Level;
import java.util.logging.Logger;

public class KeyHandler {
	// Variables to link all Twitter Keys.
	public static LinkedList twitterAPIAllKeys = new LinkedList();

	String consumerkey_global = "";
	String consumersecret_global = "";
	String accesstoken_global = "";
	String accesssecret_global = "";

	KeyHandler() {
		try {
			twitterAPIAllKeys = loadKey("AllkeyFile.txt");
		} catch (Exception ex) {
			Logger.getLogger(TwitterCrawler.class.getName()).log(Level.SEVERE, null, ex);
		}
	}

	// method to load all keys for Twitter API
	public LinkedList loadKey(String filename) throws Exception {
		LinkedList consumerQueue = new LinkedList();

		FileReader fr = new FileReader(filename);
		BufferedReader br = new BufferedReader(fr);
		String readStr = "";

		while ((readStr = br.readLine()) != null) {
			// System.out.println(readStr);
			consumerQueue.add(readStr);

		}
		fr.close();

		// --Print out
		System.out.println("Consumer Queue Print out... [ " + consumerQueue + " ].");

		// return all the keys I have collected.
		return consumerQueue;
	}

	// method for getting a key out of list of keys to connect
	protected void switchKeys() {

		twitterAPIAllKeys.add(twitterAPIAllKeys.pop().toString());
		twitterAPIAllKeys.add(twitterAPIAllKeys.pop().toString());
		twitterAPIAllKeys.add(twitterAPIAllKeys.pop().toString());
		twitterAPIAllKeys.add(twitterAPIAllKeys.pop().toString());

		System.out.println("Keys are Switched ... [ " + twitterAPIAllKeys + " ].");
	}

	protected void getKey() {
		// identifying the first key to connect.
		consumerkey_global = twitterAPIAllKeys.pop().toString();
		consumersecret_global = twitterAPIAllKeys.pop().toString();
		accesstoken_global = twitterAPIAllKeys.pop().toString();
		accesssecret_global = twitterAPIAllKeys.pop().toString();

		twitterAPIAllKeys.add(consumerkey_global);
		twitterAPIAllKeys.add(consumersecret_global);
		twitterAPIAllKeys.add(accesstoken_global);
		twitterAPIAllKeys.add(accesssecret_global);
	}
}

Finally, a class to right tweets in a csv file.

package tweets;

import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
 
/**
 * 
 * @author coding-guru.com
 *
 */
public class FileRW {
	FileOutputStream outputStream;
	OutputStreamWriter outputStreamWriter;
	BufferedWriter bufferedWriter;
	
	public FileRW(String fileName){
		try {
			outputStream = new FileOutputStream(fileName);
			outputStreamWriter = new OutputStreamWriter(outputStream, "UTF-16");
			bufferedWriter = new BufferedWriter(outputStreamWriter);
		} catch (IOException e) {
			System.out.println(e.getMessage());
			e.printStackTrace();
		}
	}
 
	public void writeLine(String text) throws IOException {
		bufferedWriter.write(text);
		bufferedWriter.flush();
		bufferedWriter.newLine();
	}
 
	public void close(){
		try {
			bufferedWriter.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

Leave a Reply

Your email address will not be published.

This site uses Akismet to reduce spam. Learn how your comment data is processed.