How to retrieve more than 100 tweets with the Twitter API and Twitter4J

The twitter4j wraps the twitter API and provides you access to public_timeline and retrieve latest tweets. The twiter API returns only the last 20 tweets. For retrieving old tweets you may have to request the timeline a number of times. Twitter does not provide any other option (I guess you can use the streaming API to get the tweets pushed, but you can’t get more than 20 older messages). In order to find older tweets, you can use twitter4j library where you can search for Tweets using Query class and Twitter.search(twitter4j.Query) method. Below is the example code which can be used to search tweets and write retrieved tweets in a .csv file. Writing in a file needs little improvements.

package twitter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import twitter4j.Query;
import twitter4j.QueryResult;
import twitter4j.Status;
import twitter4j.Twitter;
import twitter4j.TwitterException;
import twitter4j.TwitterFactory;
import twitter4j.conf.ConfigurationBuilder;

/**
 * 
 * @author coding-guru.com
 *
 */
public class Tweets {
	
	ConfigurationBuilder cb = new ConfigurationBuilder();
	Twitter twitter;
	ArrayList tweets;

	Tweets() {
		cb.setDebugEnabled(true).setOAuthConsumerKey("Your Consumer Key")
				.setOAuthConsumerSecret("Your Consumer Secret")
				.setOAuthAccessToken("Your Access Token")
				.setOAuthAccessTokenSecret("Your Access Token Secret");
		twitter = new TwitterFactory(cb.build()).getInstance();
		tweets = new ArrayList();
	}

	public void getTweets(String tag, int numberOfTweets, int queryCount) {
		Query query = new Query(tag);
		long lastID = Long.MAX_VALUE;

		while (tweets.size() < numberOfTweets) {
			if (numberOfTweets - tweets.size() > 100)
				query.setCount(queryCount);
			else
				query.setCount(numberOfTweets - tweets.size());
			try {
				QueryResult result = twitter.search(query);
				tweets.addAll(result.getTweets());
				System.out.println("Gathered " + tweets.size() + " tweets" + "\n");
				for (Status t : tweets) {
					if (t.getId() < lastID)
						lastID = t.getId();
				}
			}

			catch (TwitterException te) {
				System.out.println("Couldn't connect: " + te);
			}
			;
			query.setMaxId(lastID - 1);
		}
	}

	public void writeTweets() {
		FileWR writer = new FileWR("Tweets");

		try {
			writer.writeFile("S#,Location,Date, User, Message \n");
		} catch (IOException e1) {
			e1.printStackTrace();
		}

		for (int i = 0; i < tweets.size(); i++) {
			Status t = (Status) tweets.get(i);

			
			String user = t.getUser().getScreenName();
			String msg = t.getText();
			
			Date d = t.getCreatedAt();
			Calendar cal = Calendar.getInstance();
			cal.setTime(d);
			int year = cal.get(Calendar.YEAR);
			int month = cal.get(Calendar.MONTH);
			int day = cal.get(Calendar.DAY_OF_MONTH);

			try {

				writer.writeFile(i + "," + t.getUser().getLocation() + "," + month + " - " + day + " - " + year
						+ ", USER: " + user + " , wrote: " + msg + "\n");
			} catch (IOException e) {
				e.printStackTrace();
			}

		}
		writer.close();
	}

	public static void main(String[] args) throws Exception {
		Tweets t = new Tweets();
		t.getTweets("#Cancer", 50, 50);
		t.writeTweets();
	}
}

package twitter;

import java.io.FileWriter;
import java.io.IOException;
 
/**
 * 
 * @authorcoding-guru.com
 *
 */
public class FileWR {
	static FileWriter writer;
	public FileWR(String fileName){
		try {
			writer = new FileWriter(fileName+".csv");
		} catch (IOException e) {
			System.out.println(e.getMessage());
			e.printStackTrace();
		}
	}
 
	public static void writeFile(String text) throws IOException {
		writer.write(text);
	}
 
	public static void close(){
		try {
			writer.close();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

I am also sharing a small code to use the twitter streaming API which helps to get the tweets pushed, but you can't get more than 20 older messages.

package crawlTweets;

import oauth.signpost.OAuthConsumer;
import oauth.signpost.commonshttp.CommonsHttpOAuthConsumer;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.json.*;

public class CrawlTweets {

	static String AccessToken = "Your Access Token";
	static String AccessSecret = "Your Access Secret";
	static String ConsumerKey = "Your Consumer Key";
	static String ConsumerSecret = "Your Consumer Secret";

	public static void main(String[] args) throws Exception {
		OAuthConsumer consumer = new CommonsHttpOAuthConsumer(ConsumerKey, ConsumerSecret);

		consumer.setTokenWithSecret(AccessToken, AccessSecret);

		HttpGet request = new HttpGet(
				"https://api.twitter.com/1.1/search/tweets.json?q=%23freebandnames&since_id=24012619984051000&max_id=250126199840518145&result_type=mixed&count=4");
		consumer.sign(request);

		HttpClient client = new DefaultHttpClient();
		HttpResponse response = client.execute(request);

		JSONObject obj = new JSONObject(response);
		System.out.println(obj.getJSONObject("statuses"));
		// JSONArray arr =
		// obj.getJSONObject("statuses").getJSONObject("entities").getJSONArray("hashtags");
		// JSONArray arr = obj.getJSONArray("statuses");
		// String id = obj.get("statuses").toString();

		// System.out.println(arr.toString());

	}
}

No Responses

Leave a Reply

Your email address will not be published. Required fields are marked *

This site uses Akismet to reduce spam. Learn how your comment data is processed.