if (!require(BatchGetSymbols)) install.packages('BatchGetSymbols')
## Loading required package: BatchGetSymbols
## Loading required package: rvest
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
library(BatchGetSymbols)
library(ggplot2)
library(reshape2)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
# Download real data from Yahoo finance
# set dates
first.date <- "2021-12-31"
last.date <- "2024-12-31"
# set tickers
tickers <- c('F', 'TSLA')
l.out <- BatchGetSymbols(tickers = tickers,
first.date = first.date,
last.date = last.date,
cache.folder = file.path(tempdir(), 'BGS_Cache') ) # cache in tempdir()
## Warning: `BatchGetSymbols()` was deprecated in BatchGetSymbols 2.6.4.
## ℹ Please use `yfR::yf_get()` instead.
## ℹ 2022-05-01: Package BatchGetSymbols will soon be replaced by yfR. More
## details about the change is available at github
## <<www.github.com/msperlin/yfR> You can install yfR by executing:
##
## remotes::install_github('msperlin/yfR')
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##
## Running BatchGetSymbols for:
##
## tickers =F, TSLA
## Downloading data for benchmark ticker
## ^GSPC | yahoo (1|1) | Not Cached | Saving cache
## F | yahoo (1|2) | Not Cached | Saving cache - Got 100% of valid prices | Youre doing good!
## TSLA | yahoo (2|2) | Not Cached | Saving cache - Got 100% of valid prices | Looking good!
print(l.out$df.control)
## # A tibble: 2 × 6
## ticker src download.status total.obs perc.benchmark.dates threshold.decision
## <chr> <chr> <chr> <int> <dbl> <chr>
## 1 F yahoo OK 753 1 KEEP
## 2 TSLA yahoo OK 753 1 KEEP
# Reshape to get returns
df <- reshape.wide(l.out$df.tickers)
idx <- seq(1,length(df$price.adjusted$ref.date), 10)
price.adjusted <- df$price.adjusted[idx,]
ret.adjusted.prices <- data.frame(F = calc.ret(price.adjusted$F)[-1], TSLA = calc.ret(price.adjusted$TSLA)[-1])
# Correlation
cov(ret.adjusted.prices)
## F TSLA
## F 0.008508731 0.005999859
## TSLA 0.005999859 0.018110185
cor(ret.adjusted.prices)
## F TSLA
## F 1.0000000 0.4833338
## TSLA 0.4833338 1.0000000
# Calculate optimal weights
w = (cov(ret.adjusted.prices)[2, 2] - cov(ret.adjusted.prices)[1, 2])/(cov(ret.adjusted.prices)[1, 1] + cov(ret.adjusted.prices)[2, 2] - 2*cov(ret.adjusted.prices)[1, 2])
w = min(max(w, 0), 1)
w
## [1] 0.8283851
print("Variance of Optimal Portfolio")
## [1] "Variance of Optimal Portfolio"
print(w^2*cov(ret.adjusted.prices)[1, 1] + (1-w)^2*cov(ret.adjusted.prices)[2, 2] + 2*w*(1-w)*cov(ret.adjusted.prices)[1, 2])
## [1] 0.008078171
# Construct a new portfolio w F + (1-w) TSLA
price.adjusted$F = price.adjusted$F/price.adjusted$F[1]
price.adjusted$TSLA = price.adjusted$TSLA/price.adjusted$TSLA[1]
portfolio = w*price.adjusted$F+(1-w)*price.adjusted$TSLA
portfolio_return = calc.ret(portfolio, ticker="F_TSLA")
ret.adjusted.prices$F_TSLA = portfolio_return[-1]
price.adjusted$F_TSLA = portfolio
ret.adjusted.prices$ref.date = price.adjusted$ref.date[-1]
# Plotting
df_plot_price = melt(price.adjusted, id.vars = c("ref.date"), value.name = "price")
p <- ggplot(df_plot_price, aes(x = ref.date, y = price, color = variable))
p <- p + geom_line()
print(p)
## histogram of bi-weekly return
df_plot_return = melt(ret.adjusted.prices, id.vars = c("ref.date"), value.name = "return" )
mu <- ddply(df_plot_return, "variable", summarise, grp.mean=mean(return))
sdv <- ddply(df_plot_return, "variable", summarise, grp.sd=sd(return))
head(mu)
## variable grp.mean
## 1 F -0.0026418464
## 2 TSLA 0.0118885593
## 3 F_TSLA -0.0004503417
head(sdv)
## variable grp.sd
## 1 F 0.09224278
## 2 TSLA 0.13457409
## 3 F_TSLA 0.08963838
p <- ggplot(df_plot_return, aes(x = return, color = variable, fill = variable))
p <- p + geom_histogram(alpha = 0.5, position="dodge")
print(p)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.