Site Overlay

Spark Streaming to fetch data from Twitter #100daysofcode

Hello World,

To get the data from twitter generate the 4 keys from twitter api then use the below scala program.

Features : Added config file to read the configuration.

Save the config file on resources folder.

Github link :

import org.apache.spark._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._ // not necessary since Spark 1.3
import twitter4j.conf.ConfigurationBuilder
import twitter4j.auth.OAuthAuthorization
import twitter4j.Status
import org.apache.spark.streaming.twitter.TwitterUtils
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.twitter._
import org.apache.spark.streaming.StreamingContext._
import org.apache.log4j.Level
import com.typesafe.config.{ Config, ConfigFactory }

object sparkstreaming {
def main(args: Array[String]) = {

//Read Config file for the Auth keys
val config = ConfigFactory.load(“application.conf”).getConfig(“twitter-conf”)
val consumerKey=config.getString(“consumerKey”)
val consumerSecret=config.getString(“consumerSecret”)
val accessToken=config.getString(“accessToken”)
val accessTokenSecret=config.getString(“accessTokenSecret”)
val tcb = new ConfigurationBuilder
val filters = “bigdata”.split(“,”).toSeq


// // Create a local StreamingContext batch interval of 5 seconds
val ssc = new StreamingContext(“local[*]”, “TwitterStreaming”, Seconds(5))
val auth = new OAuthAuthorization(

// Create a DStream from Twitter using our streaming context
val tweets = TwitterUtils.createStream(ssc, Some(auth),filters)
val statuses = => status.getText())

ssc.start() // Start the computation
ssc.awaitTermination() // Wait for the computation to terminate

#100daysofcode #day3

Signing off

Leave a Reply

Your email address will not be published. Required fields are marked *