if (!require(BatchGetSymbols)) install.packages('BatchGetSymbols')
## Loading required package: BatchGetSymbols
## Loading required package: rvest
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
library(BatchGetSymbols)
library(ggplot2)
library(reshape2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
# Download real data from Yahoo finance
# set dates
first.date <- "2011-12-31"
last.date <- "2015-12-31"
# set tickers
tickers <- c('F', 'TSLA')
l.out <- BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
cache.folder = file.path(tempdir(), 'BGS_Cache') ) # cache in tempdir()
##
## Running BatchGetSymbols for:
##
## tickers =F, TSLA
## Downloading data for benchmark ticker
## ^GSPC | yahoo (1|1) | Not Cached | Saving cache
## F | yahoo (1|2) | Not Cached | Saving cache - Got 100% of valid prices | Nice!
## TSLA | yahoo (2|2) | Not Cached | Saving cache - Got 100% of valid prices | Well done!
print(l.out$df.control)
## # A tibble: 2 × 6
## ticker src download.status total.obs perc.benchmark.dates threshold.decision
## <chr> <chr> <chr> <int> <dbl> <chr>
## 1 F yahoo OK 1005 1 KEEP
## 2 TSLA yahoo OK 1005 1 KEEP
# Reshape to get returns
df <- reshape.wide(l.out$df.tickers)
idx <- seq(1,length(df$price.adjusted$ref.date),10)
price.adjusted <- df$price.adjusted[idx,]
ret.adjusted.prices <- data.frame(F = calc.ret(price.adjusted$F)[-1], TSLA = calc.ret(price.adjusted$TSLA)[-1])
# Correlation
cov(ret.adjusted.prices)
## F TSLA
## F 0.0026060088 0.0008561211
## TSLA 0.0008561211 0.0114644436
cor(ret.adjusted.prices)
## F TSLA
## F 1.0000000 0.1566285
## TSLA 0.1566285 1.0000000
# Construct a new portfolio 0.8 share F + 0.2 share TSLA
portfolio = 0.86*price.adjusted$F+0.14*price.adjusted$TSLA
portfolio_return = calc.ret(portfolio, ticker="F_TSLA")
ret.adjusted.prices$F_TSLA = portfolio_return[-1]
price.adjusted$F_TSLA = portfolio
ret.adjusted.prices$ref.date = price.adjusted$ref.date[-1]
# Plotting
df_plot_price = melt(price.adjusted, id.vars = c("ref.date"), value.name = "price")
p <- ggplot(df_plot_price, aes(x = ref.date, y = price, color = variable))
p <- p + geom_line()
p <- p + facet_wrap(~variable, scales = 'free_y')
print(p)
## histogram of bi-weekly return
df_plot_return = melt(ret.adjusted.prices, id.vars = c("ref.date"), value.name = "return" )
mu <- ddply(df_plot_return, "variable", summarise, grp.mean=mean(return))
sdv <- ddply(df_plot_return, "variable", summarise, grp.sd=sd(return))
head(mu)
## variable grp.mean
## 1 F 0.004986493
## 2 TSLA 0.026289045
## 3 F_TSLA 0.009047794
head(sdv)
## variable grp.sd
## 1 F 0.05104908
## 2 TSLA 0.10707214
## 3 F_TSLA 0.05060105
p <- ggplot(df_plot_return, aes(x = return, color = variable, fill = variable))
p <- p + geom_histogram(alpha = 0.5, position="dodge")
print(p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.